1//! Advanced Vector Extensions 2 (AVX)
2//!
3//! AVX2 expands most AVX commands to 256-bit wide vector registers and
4//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
5//!
6//! The references are:
7//!
8//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9//! Instruction Set Reference, A-Z][intel64_ref].
10//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11//! System Instructions][amd64_ref].
12//!
13//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14//! overview of the instructions available.
15//!
16//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
20
21use crate::core_arch::{simd::*, x86::*};
22use crate::intrinsics::simd::*;
23
24#[cfg(test)]
25use stdarch_test::assert_instr;
26
27/// Computes the absolute values of packed 32-bit integers in `a`.
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi32)
30#[inline]
31#[target_feature(enable = "avx2")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35 let a: i32x8 = a.as_i32x8();
36 let zero: i32x8 = i32x8::splat(0);
37 let r: i32x8 = simd_select::<m32x8, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
38 transmute(src:r)
39}
40
41/// Computes the absolute values of packed 16-bit integers in `a`.
42///
43/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi16)
44#[inline]
45#[target_feature(enable = "avx2")]
46#[cfg_attr(test, assert_instr(vpabsw))]
47#[stable(feature = "simd_x86", since = "1.27.0")]
48pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
49 let a: i16x16 = a.as_i16x16();
50 let zero: i16x16 = i16x16::splat(0);
51 let r: i16x16 = simd_select::<m16x16, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
52 transmute(src:r)
53}
54
55/// Computes the absolute values of packed 8-bit integers in `a`.
56///
57/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi8)
58#[inline]
59#[target_feature(enable = "avx2")]
60#[cfg_attr(test, assert_instr(vpabsb))]
61#[stable(feature = "simd_x86", since = "1.27.0")]
62pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
63 let a: i8x32 = a.as_i8x32();
64 let zero: i8x32 = i8x32::splat(0);
65 let r: i8x32 = simd_select::<m8x32, _>(mask:simd_lt(a, zero), if_true:simd_neg(a), if_false:a);
66 transmute(src:r)
67}
68
69/// Adds packed 64-bit integers in `a` and `b`.
70///
71/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi64)
72#[inline]
73#[target_feature(enable = "avx2")]
74#[cfg_attr(test, assert_instr(vpaddq))]
75#[stable(feature = "simd_x86", since = "1.27.0")]
76pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
77 transmute(src:simd_add(x:a.as_i64x4(), y:b.as_i64x4()))
78}
79
80/// Adds packed 32-bit integers in `a` and `b`.
81///
82/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi32)
83#[inline]
84#[target_feature(enable = "avx2")]
85#[cfg_attr(test, assert_instr(vpaddd))]
86#[stable(feature = "simd_x86", since = "1.27.0")]
87pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
88 transmute(src:simd_add(x:a.as_i32x8(), y:b.as_i32x8()))
89}
90
91/// Adds packed 16-bit integers in `a` and `b`.
92///
93/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi16)
94#[inline]
95#[target_feature(enable = "avx2")]
96#[cfg_attr(test, assert_instr(vpaddw))]
97#[stable(feature = "simd_x86", since = "1.27.0")]
98pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
99 transmute(src:simd_add(x:a.as_i16x16(), y:b.as_i16x16()))
100}
101
102/// Adds packed 8-bit integers in `a` and `b`.
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_epi8)
105#[inline]
106#[target_feature(enable = "avx2")]
107#[cfg_attr(test, assert_instr(vpaddb))]
108#[stable(feature = "simd_x86", since = "1.27.0")]
109pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
110 transmute(src:simd_add(x:a.as_i8x32(), y:b.as_i8x32()))
111}
112
113/// Adds packed 8-bit integers in `a` and `b` using saturation.
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi8)
116#[inline]
117#[target_feature(enable = "avx2")]
118#[cfg_attr(test, assert_instr(vpaddsb))]
119#[stable(feature = "simd_x86", since = "1.27.0")]
120pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
121 transmute(src:simd_saturating_add(x:a.as_i8x32(), y:b.as_i8x32()))
122}
123
124/// Adds packed 16-bit integers in `a` and `b` using saturation.
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epi16)
127#[inline]
128#[target_feature(enable = "avx2")]
129#[cfg_attr(test, assert_instr(vpaddsw))]
130#[stable(feature = "simd_x86", since = "1.27.0")]
131pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
132 transmute(src:simd_saturating_add(x:a.as_i16x16(), y:b.as_i16x16()))
133}
134
135/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
136///
137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu8)
138#[inline]
139#[target_feature(enable = "avx2")]
140#[cfg_attr(test, assert_instr(vpaddusb))]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
143 transmute(src:simd_saturating_add(x:a.as_u8x32(), y:b.as_u8x32()))
144}
145
146/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
147///
148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_adds_epu16)
149#[inline]
150#[target_feature(enable = "avx2")]
151#[cfg_attr(test, assert_instr(vpaddusw))]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
154 transmute(src:simd_saturating_add(x:a.as_u16x16(), y:b.as_u16x16()))
155}
156
157/// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
158/// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
159///
160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi8)
161#[inline]
162#[target_feature(enable = "avx2")]
163#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
164#[rustc_legacy_const_generics(2)]
165#[stable(feature = "simd_x86", since = "1.27.0")]
166pub unsafe fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
167 static_assert_uimm_bits!(IMM8, 8);
168 // If palignr is shifting the pair of vectors more than the size of two
169 // lanes, emit zero.
170 if IMM8 > 32 {
171 return _mm256_set1_epi8(0);
172 }
173 // If palignr is shifting the pair of input vectors more than one lane,
174 // but less than two lanes, convert to shifting in zeroes.
175 let (a, b) = if IMM8 > 16 {
176 (_mm256_set1_epi8(0), a)
177 } else {
178 (a, b)
179 };
180
181 let a = a.as_i8x32();
182 let b = b.as_i8x32();
183
184 let r: i8x32 = match IMM8 % 16 {
185 0 => simd_shuffle!(
186 b,
187 a,
188 [
189 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
190 23, 24, 25, 26, 27, 28, 29, 30, 31,
191 ],
192 ),
193 1 => simd_shuffle!(
194 b,
195 a,
196 [
197 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
198 24, 25, 26, 27, 28, 29, 30, 31, 48,
199 ],
200 ),
201 2 => simd_shuffle!(
202 b,
203 a,
204 [
205 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24,
206 25, 26, 27, 28, 29, 30, 31, 48, 49,
207 ],
208 ),
209 3 => simd_shuffle!(
210 b,
211 a,
212 [
213 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24,
214 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
215 ],
216 ),
217 4 => simd_shuffle!(
218 b,
219 a,
220 [
221 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25,
222 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
223 ],
224 ),
225 5 => simd_shuffle!(
226 b,
227 a,
228 [
229 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26,
230 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
231 ],
232 ),
233 6 => simd_shuffle!(
234 b,
235 a,
236 [
237 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27,
238 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
239 ],
240 ),
241 7 => simd_shuffle!(
242 b,
243 a,
244 [
245 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27,
246 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
247 ],
248 ),
249 8 => simd_shuffle!(
250 b,
251 a,
252 [
253 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28,
254 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
255 ],
256 ),
257 9 => simd_shuffle!(
258 b,
259 a,
260 [
261 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29,
262 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
263 ],
264 ),
265 10 => simd_shuffle!(
266 b,
267 a,
268 [
269 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30,
270 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
271 ],
272 ),
273 11 => simd_shuffle!(
274 b,
275 a,
276 [
277 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31,
278 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
279 ],
280 ),
281 12 => simd_shuffle!(
282 b,
283 a,
284 [
285 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48,
286 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
287 ],
288 ),
289 13 => simd_shuffle!(
290 b,
291 a,
292 [
293 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49,
294 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
295 ],
296 ),
297 14 => simd_shuffle!(
298 b,
299 a,
300 [
301 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50,
302 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
303 ],
304 ),
305 15 => simd_shuffle!(
306 b,
307 a,
308 [
309 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51,
310 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
311 ],
312 ),
313 _ => b,
314 };
315 transmute(r)
316}
317
318/// Computes the bitwise AND of 256 bits (representing integer data)
319/// in `a` and `b`.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_si256)
322#[inline]
323#[target_feature(enable = "avx2")]
324#[cfg_attr(test, assert_instr(vandps))]
325#[stable(feature = "simd_x86", since = "1.27.0")]
326pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
327 transmute(src:simd_and(x:a.as_i64x4(), y:b.as_i64x4()))
328}
329
330/// Computes the bitwise NOT of 256 bits (representing integer data)
331/// in `a` and then AND with `b`.
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_si256)
334#[inline]
335#[target_feature(enable = "avx2")]
336#[cfg_attr(test, assert_instr(vandnps))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
339 let all_ones: __m256i = _mm256_set1_epi8(-1);
340 transmute(src:simd_and(
341 x:simd_xor(a.as_i64x4(), all_ones.as_i64x4()),
342 y:b.as_i64x4(),
343 ))
344}
345
346/// Averages packed unsigned 16-bit integers in `a` and `b`.
347///
348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu16)
349#[inline]
350#[target_feature(enable = "avx2")]
351#[cfg_attr(test, assert_instr(vpavgw))]
352#[stable(feature = "simd_x86", since = "1.27.0")]
353pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
354 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
355 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
356 let r: u32x16 = simd_shr(lhs:simd_add(simd_add(a, b), u32x16::splat(1)), rhs:u32x16::splat(1));
357 transmute(src:simd_cast::<_, u16x16>(r))
358}
359
360/// Averages packed unsigned 8-bit integers in `a` and `b`.
361///
362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_avg_epu8)
363#[inline]
364#[target_feature(enable = "avx2")]
365#[cfg_attr(test, assert_instr(vpavgb))]
366#[stable(feature = "simd_x86", since = "1.27.0")]
367pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
368 let a: u16x32 = simd_cast::<_, u16x32>(a.as_u8x32());
369 let b: u16x32 = simd_cast::<_, u16x32>(b.as_u8x32());
370 let r: u16x32 = simd_shr(lhs:simd_add(simd_add(a, b), u16x32::splat(1)), rhs:u16x32::splat(1));
371 transmute(src:simd_cast::<_, u8x32>(r))
372}
373
374/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM4`.
375///
376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi32)
377#[inline]
378#[target_feature(enable = "avx2")]
379#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
380#[rustc_legacy_const_generics(2)]
381#[stable(feature = "simd_x86", since = "1.27.0")]
382pub unsafe fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
383 static_assert_uimm_bits!(IMM4, 4);
384 let a: i32x4 = a.as_i32x4();
385 let b: i32x4 = b.as_i32x4();
386 let r: i32x4 = simd_shuffle!(
387 a,
388 b,
389 [
390 [0, 4, 0, 4][IMM4 as usize & 0b11],
391 [1, 1, 5, 5][IMM4 as usize & 0b11],
392 [2, 6, 2, 6][(IMM4 as usize >> 2) & 0b11],
393 [3, 3, 7, 7][(IMM4 as usize >> 2) & 0b11],
394 ],
395 );
396 transmute(src:r)
397}
398
399/// Blends packed 32-bit integers from `a` and `b` using control mask `IMM8`.
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi32)
402#[inline]
403#[target_feature(enable = "avx2")]
404#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
405#[rustc_legacy_const_generics(2)]
406#[stable(feature = "simd_x86", since = "1.27.0")]
407pub unsafe fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
408 static_assert_uimm_bits!(IMM8, 8);
409 let a: i32x8 = a.as_i32x8();
410 let b: i32x8 = b.as_i32x8();
411 let r: i32x8 = simd_shuffle!(
412 a,
413 b,
414 [
415 [0, 8, 0, 8][IMM8 as usize & 0b11],
416 [1, 1, 9, 9][IMM8 as usize & 0b11],
417 [2, 10, 2, 10][(IMM8 as usize >> 2) & 0b11],
418 [3, 3, 11, 11][(IMM8 as usize >> 2) & 0b11],
419 [4, 12, 4, 12][(IMM8 as usize >> 4) & 0b11],
420 [5, 5, 13, 13][(IMM8 as usize >> 4) & 0b11],
421 [6, 14, 6, 14][(IMM8 as usize >> 6) & 0b11],
422 [7, 7, 15, 15][(IMM8 as usize >> 6) & 0b11],
423 ],
424 );
425 transmute(src:r)
426}
427
428/// Blends packed 16-bit integers from `a` and `b` using control mask `IMM8`.
429///
430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_epi16)
431#[inline]
432#[target_feature(enable = "avx2")]
433#[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
434#[rustc_legacy_const_generics(2)]
435#[stable(feature = "simd_x86", since = "1.27.0")]
436pub unsafe fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
437 static_assert_uimm_bits!(IMM8, 8);
438 let a = a.as_i16x16();
439 let b = b.as_i16x16();
440
441 let r: i16x16 = simd_shuffle!(
442 a,
443 b,
444 [
445 [0, 16, 0, 16][IMM8 as usize & 0b11],
446 [1, 1, 17, 17][IMM8 as usize & 0b11],
447 [2, 18, 2, 18][(IMM8 as usize >> 2) & 0b11],
448 [3, 3, 19, 19][(IMM8 as usize >> 2) & 0b11],
449 [4, 20, 4, 20][(IMM8 as usize >> 4) & 0b11],
450 [5, 5, 21, 21][(IMM8 as usize >> 4) & 0b11],
451 [6, 22, 6, 22][(IMM8 as usize >> 6) & 0b11],
452 [7, 7, 23, 23][(IMM8 as usize >> 6) & 0b11],
453 [8, 24, 8, 24][IMM8 as usize & 0b11],
454 [9, 9, 25, 25][IMM8 as usize & 0b11],
455 [10, 26, 10, 26][(IMM8 as usize >> 2) & 0b11],
456 [11, 11, 27, 27][(IMM8 as usize >> 2) & 0b11],
457 [12, 28, 12, 28][(IMM8 as usize >> 4) & 0b11],
458 [13, 13, 29, 29][(IMM8 as usize >> 4) & 0b11],
459 [14, 30, 14, 30][(IMM8 as usize >> 6) & 0b11],
460 [15, 15, 31, 31][(IMM8 as usize >> 6) & 0b11],
461 ],
462 );
463 transmute(r)
464}
465
466/// Blends packed 8-bit integers from `a` and `b` using `mask`.
467///
468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_epi8)
469#[inline]
470#[target_feature(enable = "avx2")]
471#[cfg_attr(test, assert_instr(vpblendvb))]
472#[stable(feature = "simd_x86", since = "1.27.0")]
473pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
474 let mask: i8x32 = simd_lt(x:mask.as_i8x32(), y:i8x32::splat(0));
475 transmute(src:simd_select(mask, if_true:b.as_i8x32(), if_false:a.as_i8x32()))
476}
477
478/// Broadcasts the low packed 8-bit integer from `a` to all elements of
479/// the 128-bit returned value.
480///
481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastb_epi8)
482#[inline]
483#[target_feature(enable = "avx2")]
484#[cfg_attr(test, assert_instr(vpbroadcastb))]
485#[stable(feature = "simd_x86", since = "1.27.0")]
486pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
487 let zero: __m128i = _mm_setzero_si128();
488 let ret: i8x16 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]);
489 transmute::<i8x16, _>(src:ret)
490}
491
492/// Broadcasts the low packed 8-bit integer from `a` to all elements of
493/// the 256-bit returned value.
494///
495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastb_epi8)
496#[inline]
497#[target_feature(enable = "avx2")]
498#[cfg_attr(test, assert_instr(vpbroadcastb))]
499#[stable(feature = "simd_x86", since = "1.27.0")]
500pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
501 let zero: __m128i = _mm_setzero_si128();
502 let ret: i8x32 = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]);
503 transmute::<i8x32, _>(src:ret)
504}
505
506// N.B., `simd_shuffle4` with integer data types for `a` and `b` is
507// often compiled to `vbroadcastss`.
508/// Broadcasts the low packed 32-bit integer from `a` to all elements of
509/// the 128-bit returned value.
510///
511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastd_epi32)
512#[inline]
513#[target_feature(enable = "avx2")]
514#[cfg_attr(test, assert_instr(vbroadcastss))]
515#[stable(feature = "simd_x86", since = "1.27.0")]
516pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
517 let zero: __m128i = _mm_setzero_si128();
518 let ret: i32x4 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]);
519 transmute::<i32x4, _>(src:ret)
520}
521
522// N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
523// often compiled to `vbroadcastss`.
524/// Broadcasts the low packed 32-bit integer from `a` to all elements of
525/// the 256-bit returned value.
526///
527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastd_epi32)
528#[inline]
529#[target_feature(enable = "avx2")]
530#[cfg_attr(test, assert_instr(vbroadcastss))]
531#[stable(feature = "simd_x86", since = "1.27.0")]
532pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
533 let zero: __m128i = _mm_setzero_si128();
534 let ret: i32x8 = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]);
535 transmute::<i32x8, _>(src:ret)
536}
537
538/// Broadcasts the low packed 64-bit integer from `a` to all elements of
539/// the 128-bit returned value.
540///
541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastq_epi64)
542#[inline]
543#[target_feature(enable = "avx2")]
544// Emits `vmovddup` instead of `vpbroadcastq`
545// See https://github.com/rust-lang/stdarch/issues/791
546#[cfg_attr(test, assert_instr(vmovddup))]
547#[stable(feature = "simd_x86", since = "1.27.0")]
548pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
549 let ret: i64x2 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
550 transmute::<i64x2, _>(src:ret)
551}
552
553/// Broadcasts the low packed 64-bit integer from `a` to all elements of
554/// the 256-bit returned value.
555///
556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastq_epi64)
557#[inline]
558#[target_feature(enable = "avx2")]
559#[cfg_attr(test, assert_instr(vbroadcastsd))]
560#[stable(feature = "simd_x86", since = "1.27.0")]
561pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
562 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
563 transmute::<i64x4, _>(src:ret)
564}
565
566/// Broadcasts the low double-precision (64-bit) floating-point element
567/// from `a` to all elements of the 128-bit returned value.
568///
569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastsd_pd)
570#[inline]
571#[target_feature(enable = "avx2")]
572#[cfg_attr(test, assert_instr(vmovddup))]
573#[stable(feature = "simd_x86", since = "1.27.0")]
574pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
575 simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2])
576}
577
578/// Broadcasts the low double-precision (64-bit) floating-point element
579/// from `a` to all elements of the 256-bit returned value.
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsd_pd)
582#[inline]
583#[target_feature(enable = "avx2")]
584#[cfg_attr(test, assert_instr(vbroadcastsd))]
585#[stable(feature = "simd_x86", since = "1.27.0")]
586pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
587 simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4])
588}
589
590// N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
591// `vbroadcastf128`.
592/// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
593/// the 256-bit returned value.
594///
595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastsi128_si256)
596#[inline]
597#[target_feature(enable = "avx2")]
598#[stable(feature = "simd_x86", since = "1.27.0")]
599pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
600 let zero: __m128i = _mm_setzero_si128();
601 let ret: i64x4 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]);
602 transmute::<i64x4, _>(src:ret)
603}
604
605/// Broadcasts the low single-precision (32-bit) floating-point element
606/// from `a` to all elements of the 128-bit returned value.
607///
608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastss_ps)
609#[inline]
610#[target_feature(enable = "avx2")]
611#[cfg_attr(test, assert_instr(vbroadcastss))]
612#[stable(feature = "simd_x86", since = "1.27.0")]
613pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 {
614 simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4])
615}
616
617/// Broadcasts the low single-precision (32-bit) floating-point element
618/// from `a` to all elements of the 256-bit returned value.
619///
620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastss_ps)
621#[inline]
622#[target_feature(enable = "avx2")]
623#[cfg_attr(test, assert_instr(vbroadcastss))]
624#[stable(feature = "simd_x86", since = "1.27.0")]
625pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
626 simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8])
627}
628
629/// Broadcasts the low packed 16-bit integer from a to all elements of
630/// the 128-bit returned value
631///
632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastw_epi16)
633#[inline]
634#[target_feature(enable = "avx2")]
635#[cfg_attr(test, assert_instr(vpbroadcastw))]
636#[stable(feature = "simd_x86", since = "1.27.0")]
637pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
638 let zero: __m128i = _mm_setzero_si128();
639 let ret: i16x8 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]);
640 transmute::<i16x8, _>(src:ret)
641}
642
643/// Broadcasts the low packed 16-bit integer from a to all elements of
644/// the 256-bit returned value
645///
646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastw_epi16)
647#[inline]
648#[target_feature(enable = "avx2")]
649#[cfg_attr(test, assert_instr(vpbroadcastw))]
650#[stable(feature = "simd_x86", since = "1.27.0")]
651pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
652 let zero: __m128i = _mm_setzero_si128();
653 let ret: i16x16 = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]);
654 transmute::<i16x16, _>(src:ret)
655}
656
657/// Compares packed 64-bit integers in `a` and `b` for equality.
658///
659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64)
660#[inline]
661#[target_feature(enable = "avx2")]
662#[cfg_attr(test, assert_instr(vpcmpeqq))]
663#[stable(feature = "simd_x86", since = "1.27.0")]
664pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
665 transmute::<i64x4, _>(src:simd_eq(x:a.as_i64x4(), y:b.as_i64x4()))
666}
667
668/// Compares packed 32-bit integers in `a` and `b` for equality.
669///
670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32)
671#[inline]
672#[target_feature(enable = "avx2")]
673#[cfg_attr(test, assert_instr(vpcmpeqd))]
674#[stable(feature = "simd_x86", since = "1.27.0")]
675pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
676 transmute::<i32x8, _>(src:simd_eq(x:a.as_i32x8(), y:b.as_i32x8()))
677}
678
679/// Compares packed 16-bit integers in `a` and `b` for equality.
680///
681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi16)
682#[inline]
683#[target_feature(enable = "avx2")]
684#[cfg_attr(test, assert_instr(vpcmpeqw))]
685#[stable(feature = "simd_x86", since = "1.27.0")]
686pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
687 transmute::<i16x16, _>(src:simd_eq(x:a.as_i16x16(), y:b.as_i16x16()))
688}
689
690/// Compares packed 8-bit integers in `a` and `b` for equality.
691///
692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi8)
693#[inline]
694#[target_feature(enable = "avx2")]
695#[cfg_attr(test, assert_instr(vpcmpeqb))]
696#[stable(feature = "simd_x86", since = "1.27.0")]
697pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
698 transmute::<i8x32, _>(src:simd_eq(x:a.as_i8x32(), y:b.as_i8x32()))
699}
700
701/// Compares packed 64-bit integers in `a` and `b` for greater-than.
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64)
704#[inline]
705#[target_feature(enable = "avx2")]
706#[cfg_attr(test, assert_instr(vpcmpgtq))]
707#[stable(feature = "simd_x86", since = "1.27.0")]
708pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
709 transmute::<i64x4, _>(src:simd_gt(x:a.as_i64x4(), y:b.as_i64x4()))
710}
711
712/// Compares packed 32-bit integers in `a` and `b` for greater-than.
713///
714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32)
715#[inline]
716#[target_feature(enable = "avx2")]
717#[cfg_attr(test, assert_instr(vpcmpgtd))]
718#[stable(feature = "simd_x86", since = "1.27.0")]
719pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
720 transmute::<i32x8, _>(src:simd_gt(x:a.as_i32x8(), y:b.as_i32x8()))
721}
722
723/// Compares packed 16-bit integers in `a` and `b` for greater-than.
724///
725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi16)
726#[inline]
727#[target_feature(enable = "avx2")]
728#[cfg_attr(test, assert_instr(vpcmpgtw))]
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
731 transmute::<i16x16, _>(src:simd_gt(x:a.as_i16x16(), y:b.as_i16x16()))
732}
733
734/// Compares packed 8-bit integers in `a` and `b` for greater-than.
735///
736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi8)
737#[inline]
738#[target_feature(enable = "avx2")]
739#[cfg_attr(test, assert_instr(vpcmpgtb))]
740#[stable(feature = "simd_x86", since = "1.27.0")]
741pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
742 transmute::<i8x32, _>(src:simd_gt(x:a.as_i8x32(), y:b.as_i8x32()))
743}
744
745/// Sign-extend 16-bit integers to 32-bit integers.
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi32)
748#[inline]
749#[target_feature(enable = "avx2")]
750#[cfg_attr(test, assert_instr(vpmovsxwd))]
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
753 transmute::<i32x8, _>(src:simd_cast(a.as_i16x8()))
754}
755
756/// Sign-extend 16-bit integers to 64-bit integers.
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi16_epi64)
759#[inline]
760#[target_feature(enable = "avx2")]
761#[cfg_attr(test, assert_instr(vpmovsxwq))]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
764 let a: i16x8 = a.as_i16x8();
765 let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
766 transmute::<i64x4, _>(src:simd_cast(v64))
767}
768
769/// Sign-extend 32-bit integers to 64-bit integers.
770///
771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi64)
772#[inline]
773#[target_feature(enable = "avx2")]
774#[cfg_attr(test, assert_instr(vpmovsxdq))]
775#[stable(feature = "simd_x86", since = "1.27.0")]
776pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
777 transmute::<i64x4, _>(src:simd_cast(a.as_i32x4()))
778}
779
780/// Sign-extend 8-bit integers to 16-bit integers.
781///
782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi16)
783#[inline]
784#[target_feature(enable = "avx2")]
785#[cfg_attr(test, assert_instr(vpmovsxbw))]
786#[stable(feature = "simd_x86", since = "1.27.0")]
787pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
788 transmute::<i16x16, _>(src:simd_cast(a.as_i8x16()))
789}
790
791/// Sign-extend 8-bit integers to 32-bit integers.
792///
793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi32)
794#[inline]
795#[target_feature(enable = "avx2")]
796#[cfg_attr(test, assert_instr(vpmovsxbd))]
797#[stable(feature = "simd_x86", since = "1.27.0")]
798pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
799 let a: i8x16 = a.as_i8x16();
800 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
801 transmute::<i32x8, _>(src:simd_cast(v64))
802}
803
804/// Sign-extend 8-bit integers to 64-bit integers.
805///
806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi8_epi64)
807#[inline]
808#[target_feature(enable = "avx2")]
809#[cfg_attr(test, assert_instr(vpmovsxbq))]
810#[stable(feature = "simd_x86", since = "1.27.0")]
811pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
812 let a: i8x16 = a.as_i8x16();
813 let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
814 transmute::<i64x4, _>(src:simd_cast(v32))
815}
816
817/// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
818/// integers, and stores the results in `dst`.
819///
820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi32)
821#[inline]
822#[target_feature(enable = "avx2")]
823#[cfg_attr(test, assert_instr(vpmovzxwd))]
824#[stable(feature = "simd_x86", since = "1.27.0")]
825pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
826 transmute::<i32x8, _>(src:simd_cast(a.as_u16x8()))
827}
828
829/// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
830/// integers. The upper four elements of `a` are unused.
831///
832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu16_epi64)
833#[inline]
834#[target_feature(enable = "avx2")]
835#[cfg_attr(test, assert_instr(vpmovzxwq))]
836#[stable(feature = "simd_x86", since = "1.27.0")]
837pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
838 let a: u16x8 = a.as_u16x8();
839 let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
840 transmute::<i64x4, _>(src:simd_cast(v64))
841}
842
843/// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
844///
845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_epi64)
846#[inline]
847#[target_feature(enable = "avx2")]
848#[cfg_attr(test, assert_instr(vpmovzxdq))]
849#[stable(feature = "simd_x86", since = "1.27.0")]
850pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
851 transmute::<i64x4, _>(src:simd_cast(a.as_u32x4()))
852}
853
854/// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi16)
857#[inline]
858#[target_feature(enable = "avx2")]
859#[cfg_attr(test, assert_instr(vpmovzxbw))]
860#[stable(feature = "simd_x86", since = "1.27.0")]
861pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
862 transmute::<i16x16, _>(src:simd_cast(a.as_u8x16()))
863}
864
865/// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
866/// integers. The upper eight elements of `a` are unused.
867///
868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi32)
869#[inline]
870#[target_feature(enable = "avx2")]
871#[cfg_attr(test, assert_instr(vpmovzxbd))]
872#[stable(feature = "simd_x86", since = "1.27.0")]
873pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
874 let a: u8x16 = a.as_u8x16();
875 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
876 transmute::<i32x8, _>(src:simd_cast(v64))
877}
878
879/// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
880/// integers. The upper twelve elements of `a` are unused.
881///
882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu8_epi64)
883#[inline]
884#[target_feature(enable = "avx2")]
885#[cfg_attr(test, assert_instr(vpmovzxbq))]
886#[stable(feature = "simd_x86", since = "1.27.0")]
887pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
888 let a: u8x16 = a.as_u8x16();
889 let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
890 transmute::<i64x4, _>(src:simd_cast(v32))
891}
892
893/// Extracts 128 bits (of integer data) from `a` selected with `IMM1`.
894///
895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti128_si256)
896#[inline]
897#[target_feature(enable = "avx2")]
898#[cfg_attr(
899 all(test, not(target_os = "windows")),
900 assert_instr(vextractf128, IMM1 = 1)
901)]
902#[rustc_legacy_const_generics(1)]
903#[stable(feature = "simd_x86", since = "1.27.0")]
904pub unsafe fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
905 static_assert_uimm_bits!(IMM1, 1);
906 let a: i64x4 = a.as_i64x4();
907 let b: i64x4 = _mm256_undefined_si256().as_i64x4();
908 let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]);
909 transmute(src:dst)
910}
911
912/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
913///
914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi16)
915#[inline]
916#[target_feature(enable = "avx2")]
917#[cfg_attr(test, assert_instr(vphaddw))]
918#[stable(feature = "simd_x86", since = "1.27.0")]
919pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
920 transmute(src:phaddw(a:a.as_i16x16(), b:b.as_i16x16()))
921}
922
923/// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
924///
925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_epi32)
926#[inline]
927#[target_feature(enable = "avx2")]
928#[cfg_attr(test, assert_instr(vphaddd))]
929#[stable(feature = "simd_x86", since = "1.27.0")]
930pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
931 transmute(src:phaddd(a:a.as_i32x8(), b:b.as_i32x8()))
932}
933
934/// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
935/// using saturation.
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadds_epi16)
938#[inline]
939#[target_feature(enable = "avx2")]
940#[cfg_attr(test, assert_instr(vphaddsw))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
943 transmute(src:phaddsw(a:a.as_i16x16(), b:b.as_i16x16()))
944}
945
946/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
947///
948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi16)
949#[inline]
950#[target_feature(enable = "avx2")]
951#[cfg_attr(test, assert_instr(vphsubw))]
952#[stable(feature = "simd_x86", since = "1.27.0")]
953pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
954 transmute(src:phsubw(a:a.as_i16x16(), b:b.as_i16x16()))
955}
956
957/// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
958///
959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_epi32)
960#[inline]
961#[target_feature(enable = "avx2")]
962#[cfg_attr(test, assert_instr(vphsubd))]
963#[stable(feature = "simd_x86", since = "1.27.0")]
964pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
965 transmute(src:phsubd(a:a.as_i32x8(), b:b.as_i32x8()))
966}
967
968/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
969/// using saturation.
970///
971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsubs_epi16)
972#[inline]
973#[target_feature(enable = "avx2")]
974#[cfg_attr(test, assert_instr(vphsubsw))]
975#[stable(feature = "simd_x86", since = "1.27.0")]
976pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
977 transmute(src:phsubsw(a:a.as_i16x16(), b:b.as_i16x16()))
978}
979
980/// Returns values from `slice` at offsets determined by `offsets * scale`,
981/// where
982/// `scale` should be 1, 2, 4 or 8.
983///
984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi32)
985#[inline]
986#[target_feature(enable = "avx2")]
987#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
988#[rustc_legacy_const_generics(2)]
989#[stable(feature = "simd_x86", since = "1.27.0")]
990pub unsafe fn _mm_i32gather_epi32<const SCALE: i32>(
991 slice: *const i32,
992 offsets: __m128i,
993) -> __m128i {
994 static_assert_imm8_scale!(SCALE);
995 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
996 let neg_one: i32x4 = _mm_set1_epi32(-1).as_i32x4();
997 let offsets: i32x4 = offsets.as_i32x4();
998 let slice: *const i8 = slice as *const i8;
999 let r: i32x4 = pgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1000 transmute(src:r)
1001}
1002
1003/// Returns values from `slice` at offsets determined by `offsets * scale`,
1004/// where
1005/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1006/// that position instead.
1007///
1008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi32)
1009#[inline]
1010#[target_feature(enable = "avx2")]
1011#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1012#[rustc_legacy_const_generics(4)]
1013#[stable(feature = "simd_x86", since = "1.27.0")]
1014pub unsafe fn _mm_mask_i32gather_epi32<const SCALE: i32>(
1015 src: __m128i,
1016 slice: *const i32,
1017 offsets: __m128i,
1018 mask: __m128i,
1019) -> __m128i {
1020 static_assert_imm8_scale!(SCALE);
1021 let src: i32x4 = src.as_i32x4();
1022 let mask: i32x4 = mask.as_i32x4();
1023 let offsets: i32x4 = offsets.as_i32x4();
1024 let slice: *const i8 = slice as *const i8;
1025 let r: i32x4 = pgatherdd(src, slice, offsets, mask, SCALE as i8);
1026 transmute(src:r)
1027}
1028
1029/// Returns values from `slice` at offsets determined by `offsets * scale`,
1030/// where
1031/// `scale` should be 1, 2, 4 or 8.
1032///
1033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi32)
1034#[inline]
1035#[target_feature(enable = "avx2")]
1036#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1037#[rustc_legacy_const_generics(2)]
1038#[stable(feature = "simd_x86", since = "1.27.0")]
1039pub unsafe fn _mm256_i32gather_epi32<const SCALE: i32>(
1040 slice: *const i32,
1041 offsets: __m256i,
1042) -> __m256i {
1043 static_assert_imm8_scale!(SCALE);
1044 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
1045 let neg_one: i32x8 = _mm256_set1_epi32(-1).as_i32x8();
1046 let offsets: i32x8 = offsets.as_i32x8();
1047 let slice: *const i8 = slice as *const i8;
1048 let r: i32x8 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1049 transmute(src:r)
1050}
1051
1052/// Returns values from `slice` at offsets determined by `offsets * scale`,
1053/// where
1054/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1055/// that position instead.
1056///
1057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi32)
1058#[inline]
1059#[target_feature(enable = "avx2")]
1060#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
1061#[rustc_legacy_const_generics(4)]
1062#[stable(feature = "simd_x86", since = "1.27.0")]
1063pub unsafe fn _mm256_mask_i32gather_epi32<const SCALE: i32>(
1064 src: __m256i,
1065 slice: *const i32,
1066 offsets: __m256i,
1067 mask: __m256i,
1068) -> __m256i {
1069 static_assert_imm8_scale!(SCALE);
1070 let src: i32x8 = src.as_i32x8();
1071 let mask: i32x8 = mask.as_i32x8();
1072 let offsets: i32x8 = offsets.as_i32x8();
1073 let slice: *const i8 = slice as *const i8;
1074 let r: i32x8 = vpgatherdd(src, slice, offsets, mask, SCALE as i8);
1075 transmute(src:r)
1076}
1077
1078/// Returns values from `slice` at offsets determined by `offsets * scale`,
1079/// where
1080/// `scale` should be 1, 2, 4 or 8.
1081///
1082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_ps)
1083#[inline]
1084#[target_feature(enable = "avx2")]
1085#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1086#[rustc_legacy_const_generics(2)]
1087#[stable(feature = "simd_x86", since = "1.27.0")]
1088pub unsafe fn _mm_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1089 static_assert_imm8_scale!(SCALE);
1090 let zero: __m128 = _mm_setzero_ps();
1091 let neg_one: __m128 = _mm_set1_ps(-1.0);
1092 let offsets: i32x4 = offsets.as_i32x4();
1093 let slice: *const i8 = slice as *const i8;
1094 pgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1095}
1096
1097/// Returns values from `slice` at offsets determined by `offsets * scale`,
1098/// where
1099/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1100/// that position instead.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_ps)
1103#[inline]
1104#[target_feature(enable = "avx2")]
1105#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1106#[rustc_legacy_const_generics(4)]
1107#[stable(feature = "simd_x86", since = "1.27.0")]
1108pub unsafe fn _mm_mask_i32gather_ps<const SCALE: i32>(
1109 src: __m128,
1110 slice: *const f32,
1111 offsets: __m128i,
1112 mask: __m128,
1113) -> __m128 {
1114 static_assert_imm8_scale!(SCALE);
1115 let offsets: i32x4 = offsets.as_i32x4();
1116 let slice: *const i8 = slice as *const i8;
1117 pgatherdps(src, slice, offsets, mask, SCALE as i8)
1118}
1119
1120/// Returns values from `slice` at offsets determined by `offsets * scale`,
1121/// where
1122/// `scale` should be 1, 2, 4 or 8.
1123///
1124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_ps)
1125#[inline]
1126#[target_feature(enable = "avx2")]
1127#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1128#[rustc_legacy_const_generics(2)]
1129#[stable(feature = "simd_x86", since = "1.27.0")]
1130pub unsafe fn _mm256_i32gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m256 {
1131 static_assert_imm8_scale!(SCALE);
1132 let zero: __m256 = _mm256_setzero_ps();
1133 let neg_one: __m256 = _mm256_set1_ps(-1.0);
1134 let offsets: i32x8 = offsets.as_i32x8();
1135 let slice: *const i8 = slice as *const i8;
1136 vpgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1137}
1138
1139/// Returns values from `slice` at offsets determined by `offsets * scale`,
1140/// where
1141/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1142/// that position instead.
1143///
1144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_ps)
1145#[inline]
1146#[target_feature(enable = "avx2")]
1147#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
1148#[rustc_legacy_const_generics(4)]
1149#[stable(feature = "simd_x86", since = "1.27.0")]
1150pub unsafe fn _mm256_mask_i32gather_ps<const SCALE: i32>(
1151 src: __m256,
1152 slice: *const f32,
1153 offsets: __m256i,
1154 mask: __m256,
1155) -> __m256 {
1156 static_assert_imm8_scale!(SCALE);
1157 let offsets: i32x8 = offsets.as_i32x8();
1158 let slice: *const i8 = slice as *const i8;
1159 vpgatherdps(src, slice, offsets, mask, SCALE as i8)
1160}
1161
1162/// Returns values from `slice` at offsets determined by `offsets * scale`,
1163/// where
1164/// `scale` should be 1, 2, 4 or 8.
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_epi64)
1167#[inline]
1168#[target_feature(enable = "avx2")]
1169#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1170#[rustc_legacy_const_generics(2)]
1171#[stable(feature = "simd_x86", since = "1.27.0")]
1172pub unsafe fn _mm_i32gather_epi64<const SCALE: i32>(
1173 slice: *const i64,
1174 offsets: __m128i,
1175) -> __m128i {
1176 static_assert_imm8_scale!(SCALE);
1177 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1178 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1179 let offsets: i32x4 = offsets.as_i32x4();
1180 let slice: *const i8 = slice as *const i8;
1181 let r: i64x2 = pgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1182 transmute(src:r)
1183}
1184
1185/// Returns values from `slice` at offsets determined by `offsets * scale`,
1186/// where
1187/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1188/// that position instead.
1189///
1190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_epi64)
1191#[inline]
1192#[target_feature(enable = "avx2")]
1193#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1194#[rustc_legacy_const_generics(4)]
1195#[stable(feature = "simd_x86", since = "1.27.0")]
1196pub unsafe fn _mm_mask_i32gather_epi64<const SCALE: i32>(
1197 src: __m128i,
1198 slice: *const i64,
1199 offsets: __m128i,
1200 mask: __m128i,
1201) -> __m128i {
1202 static_assert_imm8_scale!(SCALE);
1203 let src: i64x2 = src.as_i64x2();
1204 let mask: i64x2 = mask.as_i64x2();
1205 let offsets: i32x4 = offsets.as_i32x4();
1206 let slice: *const i8 = slice as *const i8;
1207 let r: i64x2 = pgatherdq(src, slice, offsets, mask, SCALE as i8);
1208 transmute(src:r)
1209}
1210
1211/// Returns values from `slice` at offsets determined by `offsets * scale`,
1212/// where
1213/// `scale` should be 1, 2, 4 or 8.
1214///
1215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_epi64)
1216#[inline]
1217#[target_feature(enable = "avx2")]
1218#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1219#[rustc_legacy_const_generics(2)]
1220#[stable(feature = "simd_x86", since = "1.27.0")]
1221pub unsafe fn _mm256_i32gather_epi64<const SCALE: i32>(
1222 slice: *const i64,
1223 offsets: __m128i,
1224) -> __m256i {
1225 static_assert_imm8_scale!(SCALE);
1226 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1227 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1228 let offsets: i32x4 = offsets.as_i32x4();
1229 let slice: *const i8 = slice as *const i8;
1230 let r: i64x4 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1231 transmute(src:r)
1232}
1233
1234/// Returns values from `slice` at offsets determined by `offsets * scale`,
1235/// where
1236/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1237/// that position instead.
1238///
1239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_epi64)
1240#[inline]
1241#[target_feature(enable = "avx2")]
1242#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
1243#[rustc_legacy_const_generics(4)]
1244#[stable(feature = "simd_x86", since = "1.27.0")]
1245pub unsafe fn _mm256_mask_i32gather_epi64<const SCALE: i32>(
1246 src: __m256i,
1247 slice: *const i64,
1248 offsets: __m128i,
1249 mask: __m256i,
1250) -> __m256i {
1251 static_assert_imm8_scale!(SCALE);
1252 let src: i64x4 = src.as_i64x4();
1253 let mask: i64x4 = mask.as_i64x4();
1254 let offsets: i32x4 = offsets.as_i32x4();
1255 let slice: *const i8 = slice as *const i8;
1256 let r: i64x4 = vpgatherdq(src, slice, offsets, mask, SCALE as i8);
1257 transmute(src:r)
1258}
1259
1260/// Returns values from `slice` at offsets determined by `offsets * scale`,
1261/// where
1262/// `scale` should be 1, 2, 4 or 8.
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i32gather_pd)
1265#[inline]
1266#[target_feature(enable = "avx2")]
1267#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1268#[rustc_legacy_const_generics(2)]
1269#[stable(feature = "simd_x86", since = "1.27.0")]
1270pub unsafe fn _mm_i32gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1271 static_assert_imm8_scale!(SCALE);
1272 let zero: __m128d = _mm_setzero_pd();
1273 let neg_one: __m128d = _mm_set1_pd(-1.0);
1274 let offsets: i32x4 = offsets.as_i32x4();
1275 let slice: *const i8 = slice as *const i8;
1276 pgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1277}
1278
1279/// Returns values from `slice` at offsets determined by `offsets * scale`,
1280/// where
1281/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1282/// that position instead.
1283///
1284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i32gather_pd)
1285#[inline]
1286#[target_feature(enable = "avx2")]
1287#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1288#[rustc_legacy_const_generics(4)]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290pub unsafe fn _mm_mask_i32gather_pd<const SCALE: i32>(
1291 src: __m128d,
1292 slice: *const f64,
1293 offsets: __m128i,
1294 mask: __m128d,
1295) -> __m128d {
1296 static_assert_imm8_scale!(SCALE);
1297 let offsets: i32x4 = offsets.as_i32x4();
1298 let slice: *const i8 = slice as *const i8;
1299 pgatherdpd(src, slice, offsets, mask, SCALE as i8)
1300}
1301
1302/// Returns values from `slice` at offsets determined by `offsets * scale`,
1303/// where
1304/// `scale` should be 1, 2, 4 or 8.
1305///
1306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32gather_pd)
1307#[inline]
1308#[target_feature(enable = "avx2")]
1309#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1310#[rustc_legacy_const_generics(2)]
1311#[stable(feature = "simd_x86", since = "1.27.0")]
1312pub unsafe fn _mm256_i32gather_pd<const SCALE: i32>(
1313 slice: *const f64,
1314 offsets: __m128i,
1315) -> __m256d {
1316 static_assert_imm8_scale!(SCALE);
1317 let zero: __m256d = _mm256_setzero_pd();
1318 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1319 let offsets: i32x4 = offsets.as_i32x4();
1320 let slice: *const i8 = slice as *const i8;
1321 vpgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1322}
1323
1324/// Returns values from `slice` at offsets determined by `offsets * scale`,
1325/// where
1326/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1327/// that position instead.
1328///
1329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i32gather_pd)
1330#[inline]
1331#[target_feature(enable = "avx2")]
1332#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
1333#[rustc_legacy_const_generics(4)]
1334#[stable(feature = "simd_x86", since = "1.27.0")]
1335pub unsafe fn _mm256_mask_i32gather_pd<const SCALE: i32>(
1336 src: __m256d,
1337 slice: *const f64,
1338 offsets: __m128i,
1339 mask: __m256d,
1340) -> __m256d {
1341 static_assert_imm8_scale!(SCALE);
1342 let offsets: i32x4 = offsets.as_i32x4();
1343 let slice: *const i8 = slice as *const i8;
1344 vpgatherdpd(src, slice, offsets, mask, SCALE as i8)
1345}
1346
1347/// Returns values from `slice` at offsets determined by `offsets * scale`,
1348/// where
1349/// `scale` should be 1, 2, 4 or 8.
1350///
1351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi32)
1352#[inline]
1353#[target_feature(enable = "avx2")]
1354#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1355#[rustc_legacy_const_generics(2)]
1356#[stable(feature = "simd_x86", since = "1.27.0")]
1357pub unsafe fn _mm_i64gather_epi32<const SCALE: i32>(
1358 slice: *const i32,
1359 offsets: __m128i,
1360) -> __m128i {
1361 static_assert_imm8_scale!(SCALE);
1362 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1363 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1364 let offsets: i64x2 = offsets.as_i64x2();
1365 let slice: *const i8 = slice as *const i8;
1366 let r: i32x4 = pgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1367 transmute(src:r)
1368}
1369
1370/// Returns values from `slice` at offsets determined by `offsets * scale`,
1371/// where
1372/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1373/// that position instead.
1374///
1375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi32)
1376#[inline]
1377#[target_feature(enable = "avx2")]
1378#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1379#[rustc_legacy_const_generics(4)]
1380#[stable(feature = "simd_x86", since = "1.27.0")]
1381pub unsafe fn _mm_mask_i64gather_epi32<const SCALE: i32>(
1382 src: __m128i,
1383 slice: *const i32,
1384 offsets: __m128i,
1385 mask: __m128i,
1386) -> __m128i {
1387 static_assert_imm8_scale!(SCALE);
1388 let src: i32x4 = src.as_i32x4();
1389 let mask: i32x4 = mask.as_i32x4();
1390 let offsets: i64x2 = offsets.as_i64x2();
1391 let slice: *const i8 = slice as *const i8;
1392 let r: i32x4 = pgatherqd(src, slice, offsets, mask, SCALE as i8);
1393 transmute(src:r)
1394}
1395
1396/// Returns values from `slice` at offsets determined by `offsets * scale`,
1397/// where
1398/// `scale` should be 1, 2, 4 or 8.
1399///
1400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi32)
1401#[inline]
1402#[target_feature(enable = "avx2")]
1403#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1404#[rustc_legacy_const_generics(2)]
1405#[stable(feature = "simd_x86", since = "1.27.0")]
1406pub unsafe fn _mm256_i64gather_epi32<const SCALE: i32>(
1407 slice: *const i32,
1408 offsets: __m256i,
1409) -> __m128i {
1410 static_assert_imm8_scale!(SCALE);
1411 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
1412 let neg_one: i32x4 = _mm_set1_epi64x(-1).as_i32x4();
1413 let offsets: i64x4 = offsets.as_i64x4();
1414 let slice: *const i8 = slice as *const i8;
1415 let r: i32x4 = vpgatherqd(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1416 transmute(src:r)
1417}
1418
1419/// Returns values from `slice` at offsets determined by `offsets * scale`,
1420/// where
1421/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1422/// that position instead.
1423///
1424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi32)
1425#[inline]
1426#[target_feature(enable = "avx2")]
1427#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
1428#[rustc_legacy_const_generics(4)]
1429#[stable(feature = "simd_x86", since = "1.27.0")]
1430pub unsafe fn _mm256_mask_i64gather_epi32<const SCALE: i32>(
1431 src: __m128i,
1432 slice: *const i32,
1433 offsets: __m256i,
1434 mask: __m128i,
1435) -> __m128i {
1436 static_assert_imm8_scale!(SCALE);
1437 let src: i32x4 = src.as_i32x4();
1438 let mask: i32x4 = mask.as_i32x4();
1439 let offsets: i64x4 = offsets.as_i64x4();
1440 let slice: *const i8 = slice as *const i8;
1441 let r: i32x4 = vpgatherqd(src, slice, offsets, mask, SCALE as i8);
1442 transmute(src:r)
1443}
1444
1445/// Returns values from `slice` at offsets determined by `offsets * scale`,
1446/// where
1447/// `scale` should be 1, 2, 4 or 8.
1448///
1449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_ps)
1450#[inline]
1451#[target_feature(enable = "avx2")]
1452#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1453#[rustc_legacy_const_generics(2)]
1454#[stable(feature = "simd_x86", since = "1.27.0")]
1455pub unsafe fn _mm_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m128i) -> __m128 {
1456 static_assert_imm8_scale!(SCALE);
1457 let zero: __m128 = _mm_setzero_ps();
1458 let neg_one: __m128 = _mm_set1_ps(-1.0);
1459 let offsets: i64x2 = offsets.as_i64x2();
1460 let slice: *const i8 = slice as *const i8;
1461 pgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1462}
1463
1464/// Returns values from `slice` at offsets determined by `offsets * scale`,
1465/// where
1466/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1467/// that position instead.
1468///
1469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_ps)
1470#[inline]
1471#[target_feature(enable = "avx2")]
1472#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1473#[rustc_legacy_const_generics(4)]
1474#[stable(feature = "simd_x86", since = "1.27.0")]
1475pub unsafe fn _mm_mask_i64gather_ps<const SCALE: i32>(
1476 src: __m128,
1477 slice: *const f32,
1478 offsets: __m128i,
1479 mask: __m128,
1480) -> __m128 {
1481 static_assert_imm8_scale!(SCALE);
1482 let offsets: i64x2 = offsets.as_i64x2();
1483 let slice: *const i8 = slice as *const i8;
1484 pgatherqps(src, slice, offsets, mask, SCALE as i8)
1485}
1486
1487/// Returns values from `slice` at offsets determined by `offsets * scale`,
1488/// where
1489/// `scale` should be 1, 2, 4 or 8.
1490///
1491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_ps)
1492#[inline]
1493#[target_feature(enable = "avx2")]
1494#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1495#[rustc_legacy_const_generics(2)]
1496#[stable(feature = "simd_x86", since = "1.27.0")]
1497pub unsafe fn _mm256_i64gather_ps<const SCALE: i32>(slice: *const f32, offsets: __m256i) -> __m128 {
1498 static_assert_imm8_scale!(SCALE);
1499 let zero: __m128 = _mm_setzero_ps();
1500 let neg_one: __m128 = _mm_set1_ps(-1.0);
1501 let offsets: i64x4 = offsets.as_i64x4();
1502 let slice: *const i8 = slice as *const i8;
1503 vpgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1504}
1505
1506/// Returns values from `slice` at offsets determined by `offsets * scale`,
1507/// where
1508/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1509/// that position instead.
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_ps)
1512#[inline]
1513#[target_feature(enable = "avx2")]
1514#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
1515#[rustc_legacy_const_generics(4)]
1516#[stable(feature = "simd_x86", since = "1.27.0")]
1517pub unsafe fn _mm256_mask_i64gather_ps<const SCALE: i32>(
1518 src: __m128,
1519 slice: *const f32,
1520 offsets: __m256i,
1521 mask: __m128,
1522) -> __m128 {
1523 static_assert_imm8_scale!(SCALE);
1524 let offsets: i64x4 = offsets.as_i64x4();
1525 let slice: *const i8 = slice as *const i8;
1526 vpgatherqps(src, slice, offsets, mask, SCALE as i8)
1527}
1528
1529/// Returns values from `slice` at offsets determined by `offsets * scale`,
1530/// where
1531/// `scale` should be 1, 2, 4 or 8.
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_epi64)
1534#[inline]
1535#[target_feature(enable = "avx2")]
1536#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1537#[rustc_legacy_const_generics(2)]
1538#[stable(feature = "simd_x86", since = "1.27.0")]
1539pub unsafe fn _mm_i64gather_epi64<const SCALE: i32>(
1540 slice: *const i64,
1541 offsets: __m128i,
1542) -> __m128i {
1543 static_assert_imm8_scale!(SCALE);
1544 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
1545 let neg_one: i64x2 = _mm_set1_epi64x(-1).as_i64x2();
1546 let slice: *const i8 = slice as *const i8;
1547 let offsets: i64x2 = offsets.as_i64x2();
1548 let r: i64x2 = pgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1549 transmute(src:r)
1550}
1551
1552/// Returns values from `slice` at offsets determined by `offsets * scale`,
1553/// where
1554/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1555/// that position instead.
1556///
1557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_epi64)
1558#[inline]
1559#[target_feature(enable = "avx2")]
1560#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1561#[rustc_legacy_const_generics(4)]
1562#[stable(feature = "simd_x86", since = "1.27.0")]
1563pub unsafe fn _mm_mask_i64gather_epi64<const SCALE: i32>(
1564 src: __m128i,
1565 slice: *const i64,
1566 offsets: __m128i,
1567 mask: __m128i,
1568) -> __m128i {
1569 static_assert_imm8_scale!(SCALE);
1570 let src: i64x2 = src.as_i64x2();
1571 let mask: i64x2 = mask.as_i64x2();
1572 let offsets: i64x2 = offsets.as_i64x2();
1573 let slice: *const i8 = slice as *const i8;
1574 let r: i64x2 = pgatherqq(src, slice, offsets, mask, SCALE as i8);
1575 transmute(src:r)
1576}
1577
1578/// Returns values from `slice` at offsets determined by `offsets * scale`,
1579/// where
1580/// `scale` should be 1, 2, 4 or 8.
1581///
1582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_epi64)
1583#[inline]
1584#[target_feature(enable = "avx2")]
1585#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1586#[rustc_legacy_const_generics(2)]
1587#[stable(feature = "simd_x86", since = "1.27.0")]
1588pub unsafe fn _mm256_i64gather_epi64<const SCALE: i32>(
1589 slice: *const i64,
1590 offsets: __m256i,
1591) -> __m256i {
1592 static_assert_imm8_scale!(SCALE);
1593 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
1594 let neg_one: i64x4 = _mm256_set1_epi64x(-1).as_i64x4();
1595 let slice: *const i8 = slice as *const i8;
1596 let offsets: i64x4 = offsets.as_i64x4();
1597 let r: i64x4 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE as i8);
1598 transmute(src:r)
1599}
1600
1601/// Returns values from `slice` at offsets determined by `offsets * scale`,
1602/// where
1603/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1604/// that position instead.
1605///
1606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_epi64)
1607#[inline]
1608#[target_feature(enable = "avx2")]
1609#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
1610#[rustc_legacy_const_generics(4)]
1611#[stable(feature = "simd_x86", since = "1.27.0")]
1612pub unsafe fn _mm256_mask_i64gather_epi64<const SCALE: i32>(
1613 src: __m256i,
1614 slice: *const i64,
1615 offsets: __m256i,
1616 mask: __m256i,
1617) -> __m256i {
1618 static_assert_imm8_scale!(SCALE);
1619 let src: i64x4 = src.as_i64x4();
1620 let mask: i64x4 = mask.as_i64x4();
1621 let offsets: i64x4 = offsets.as_i64x4();
1622 let slice: *const i8 = slice as *const i8;
1623 let r: i64x4 = vpgatherqq(src, slice, offsets, mask, SCALE as i8);
1624 transmute(src:r)
1625}
1626
1627/// Returns values from `slice` at offsets determined by `offsets * scale`,
1628/// where
1629/// `scale` should be 1, 2, 4 or 8.
1630///
1631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_i64gather_pd)
1632#[inline]
1633#[target_feature(enable = "avx2")]
1634#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1635#[rustc_legacy_const_generics(2)]
1636#[stable(feature = "simd_x86", since = "1.27.0")]
1637pub unsafe fn _mm_i64gather_pd<const SCALE: i32>(slice: *const f64, offsets: __m128i) -> __m128d {
1638 static_assert_imm8_scale!(SCALE);
1639 let zero: __m128d = _mm_setzero_pd();
1640 let neg_one: __m128d = _mm_set1_pd(-1.0);
1641 let slice: *const i8 = slice as *const i8;
1642 let offsets: i64x2 = offsets.as_i64x2();
1643 pgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1644}
1645
1646/// Returns values from `slice` at offsets determined by `offsets * scale`,
1647/// where
1648/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1649/// that position instead.
1650///
1651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_i64gather_pd)
1652#[inline]
1653#[target_feature(enable = "avx2")]
1654#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1655#[rustc_legacy_const_generics(4)]
1656#[stable(feature = "simd_x86", since = "1.27.0")]
1657pub unsafe fn _mm_mask_i64gather_pd<const SCALE: i32>(
1658 src: __m128d,
1659 slice: *const f64,
1660 offsets: __m128i,
1661 mask: __m128d,
1662) -> __m128d {
1663 static_assert_imm8_scale!(SCALE);
1664 let slice: *const i8 = slice as *const i8;
1665 let offsets: i64x2 = offsets.as_i64x2();
1666 pgatherqpd(src, slice, offsets, mask, SCALE as i8)
1667}
1668
1669/// Returns values from `slice` at offsets determined by `offsets * scale`,
1670/// where
1671/// `scale` should be 1, 2, 4 or 8.
1672///
1673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i64gather_pd)
1674#[inline]
1675#[target_feature(enable = "avx2")]
1676#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1677#[rustc_legacy_const_generics(2)]
1678#[stable(feature = "simd_x86", since = "1.27.0")]
1679pub unsafe fn _mm256_i64gather_pd<const SCALE: i32>(
1680 slice: *const f64,
1681 offsets: __m256i,
1682) -> __m256d {
1683 static_assert_imm8_scale!(SCALE);
1684 let zero: __m256d = _mm256_setzero_pd();
1685 let neg_one: __m256d = _mm256_set1_pd(-1.0);
1686 let slice: *const i8 = slice as *const i8;
1687 let offsets: i64x4 = offsets.as_i64x4();
1688 vpgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE as i8)
1689}
1690
1691/// Returns values from `slice` at offsets determined by `offsets * scale`,
1692/// where
1693/// `scale` should be 1, 2, 4 or 8. If mask is set, load the value from `src` in
1694/// that position instead.
1695///
1696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_i64gather_pd)
1697#[inline]
1698#[target_feature(enable = "avx2")]
1699#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
1700#[rustc_legacy_const_generics(4)]
1701#[stable(feature = "simd_x86", since = "1.27.0")]
1702pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
1703 src: __m256d,
1704 slice: *const f64,
1705 offsets: __m256i,
1706 mask: __m256d,
1707) -> __m256d {
1708 static_assert_imm8_scale!(SCALE);
1709 let slice: *const i8 = slice as *const i8;
1710 let offsets: i64x4 = offsets.as_i64x4();
1711 vpgatherqpd(src, slice, offsets, mask, SCALE as i8)
1712}
1713
1714/// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1715/// location specified by `IMM1`.
1716///
1717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti128_si256)
1718#[inline]
1719#[target_feature(enable = "avx2")]
1720#[cfg_attr(
1721 all(test, not(target_os = "windows")),
1722 assert_instr(vinsertf128, IMM1 = 1)
1723)]
1724#[rustc_legacy_const_generics(2)]
1725#[stable(feature = "simd_x86", since = "1.27.0")]
1726pub unsafe fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1727 static_assert_uimm_bits!(IMM1, 1);
1728 let a: i64x4 = a.as_i64x4();
1729 let b: i64x4 = _mm256_castsi128_si256(b).as_i64x4();
1730 let dst: i64x4 = simd_shuffle!(a, b, [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize]);
1731 transmute(src:dst)
1732}
1733
1734/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1735/// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1736/// of intermediate 32-bit integers.
1737///
1738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_madd_epi16)
1739#[inline]
1740#[target_feature(enable = "avx2")]
1741#[cfg_attr(test, assert_instr(vpmaddwd))]
1742#[stable(feature = "simd_x86", since = "1.27.0")]
1743pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
1744 transmute(src:pmaddwd(a:a.as_i16x16(), b:b.as_i16x16()))
1745}
1746
1747/// Vertically multiplies each unsigned 8-bit integer from `a` with the
1748/// corresponding signed 8-bit integer from `b`, producing intermediate
1749/// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1750/// signed 16-bit integers
1751///
1752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maddubs_epi16)
1753#[inline]
1754#[target_feature(enable = "avx2")]
1755#[cfg_attr(test, assert_instr(vpmaddubsw))]
1756#[stable(feature = "simd_x86", since = "1.27.0")]
1757pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1758 transmute(src:pmaddubsw(a:a.as_u8x32(), b:b.as_u8x32()))
1759}
1760
1761/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1762/// (elements are zeroed out when the highest bit is not set in the
1763/// corresponding element).
1764///
1765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi32)
1766#[inline]
1767#[target_feature(enable = "avx2")]
1768#[cfg_attr(test, assert_instr(vpmaskmovd))]
1769#[stable(feature = "simd_x86", since = "1.27.0")]
1770pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
1771 transmute(src:maskloadd(mem_addr as *const i8, mask:mask.as_i32x4()))
1772}
1773
1774/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1775/// (elements are zeroed out when the highest bit is not set in the
1776/// corresponding element).
1777///
1778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi32)
1779#[inline]
1780#[target_feature(enable = "avx2")]
1781#[cfg_attr(test, assert_instr(vpmaskmovd))]
1782#[stable(feature = "simd_x86", since = "1.27.0")]
1783pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
1784 transmute(src:maskloadd256(mem_addr as *const i8, mask:mask.as_i32x8()))
1785}
1786
1787/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1788/// (elements are zeroed out when the highest bit is not set in the
1789/// corresponding element).
1790///
1791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_epi64)
1792#[inline]
1793#[target_feature(enable = "avx2")]
1794#[cfg_attr(test, assert_instr(vpmaskmovq))]
1795#[stable(feature = "simd_x86", since = "1.27.0")]
1796pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
1797 transmute(src:maskloadq(mem_addr as *const i8, mask:mask.as_i64x2()))
1798}
1799
1800/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
1801/// (elements are zeroed out when the highest bit is not set in the
1802/// corresponding element).
1803///
1804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_epi64)
1805#[inline]
1806#[target_feature(enable = "avx2")]
1807#[cfg_attr(test, assert_instr(vpmaskmovq))]
1808#[stable(feature = "simd_x86", since = "1.27.0")]
1809pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
1810 transmute(src:maskloadq256(mem_addr as *const i8, mask:mask.as_i64x4()))
1811}
1812
1813/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1814/// using `mask` (elements are not stored when the highest bit is not set
1815/// in the corresponding element).
1816///
1817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi32)
1818#[inline]
1819#[target_feature(enable = "avx2")]
1820#[cfg_attr(test, assert_instr(vpmaskmovd))]
1821#[stable(feature = "simd_x86", since = "1.27.0")]
1822pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
1823 maskstored(mem_addr as *mut i8, mask:mask.as_i32x4(), a:a.as_i32x4())
1824}
1825
1826/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
1827/// using `mask` (elements are not stored when the highest bit is not set
1828/// in the corresponding element).
1829///
1830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi32)
1831#[inline]
1832#[target_feature(enable = "avx2")]
1833#[cfg_attr(test, assert_instr(vpmaskmovd))]
1834#[stable(feature = "simd_x86", since = "1.27.0")]
1835pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
1836 maskstored256(mem_addr as *mut i8, mask:mask.as_i32x8(), a:a.as_i32x8())
1837}
1838
1839/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1840/// using `mask` (elements are not stored when the highest bit is not set
1841/// in the corresponding element).
1842///
1843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_epi64)
1844#[inline]
1845#[target_feature(enable = "avx2")]
1846#[cfg_attr(test, assert_instr(vpmaskmovq))]
1847#[stable(feature = "simd_x86", since = "1.27.0")]
1848pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
1849 maskstoreq(mem_addr as *mut i8, mask:mask.as_i64x2(), a:a.as_i64x2())
1850}
1851
1852/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
1853/// using `mask` (elements are not stored when the highest bit is not set
1854/// in the corresponding element).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_epi64)
1857#[inline]
1858#[target_feature(enable = "avx2")]
1859#[cfg_attr(test, assert_instr(vpmaskmovq))]
1860#[stable(feature = "simd_x86", since = "1.27.0")]
1861pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
1862 maskstoreq256(mem_addr as *mut i8, mask:mask.as_i64x4(), a:a.as_i64x4())
1863}
1864
1865/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1866/// maximum values.
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi16)
1869#[inline]
1870#[target_feature(enable = "avx2")]
1871#[cfg_attr(test, assert_instr(vpmaxsw))]
1872#[stable(feature = "simd_x86", since = "1.27.0")]
1873pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
1874 let a: i16x16 = a.as_i16x16();
1875 let b: i16x16 = b.as_i16x16();
1876 transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1877}
1878
1879/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1880/// maximum values.
1881///
1882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi32)
1883#[inline]
1884#[target_feature(enable = "avx2")]
1885#[cfg_attr(test, assert_instr(vpmaxsd))]
1886#[stable(feature = "simd_x86", since = "1.27.0")]
1887pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
1888 let a: i32x8 = a.as_i32x8();
1889 let b: i32x8 = b.as_i32x8();
1890 transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1891}
1892
1893/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1894/// maximum values.
1895///
1896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi8)
1897#[inline]
1898#[target_feature(enable = "avx2")]
1899#[cfg_attr(test, assert_instr(vpmaxsb))]
1900#[stable(feature = "simd_x86", since = "1.27.0")]
1901pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
1902 let a: i8x32 = a.as_i8x32();
1903 let b: i8x32 = b.as_i8x32();
1904 transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1905}
1906
1907/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1908/// the packed maximum values.
1909///
1910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu16)
1911#[inline]
1912#[target_feature(enable = "avx2")]
1913#[cfg_attr(test, assert_instr(vpmaxuw))]
1914#[stable(feature = "simd_x86", since = "1.27.0")]
1915pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
1916 let a: u16x16 = a.as_u16x16();
1917 let b: u16x16 = b.as_u16x16();
1918 transmute(src:simd_select::<i16x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1919}
1920
1921/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
1922/// the packed maximum values.
1923///
1924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu32)
1925#[inline]
1926#[target_feature(enable = "avx2")]
1927#[cfg_attr(test, assert_instr(vpmaxud))]
1928#[stable(feature = "simd_x86", since = "1.27.0")]
1929pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
1930 let a: u32x8 = a.as_u32x8();
1931 let b: u32x8 = b.as_u32x8();
1932 transmute(src:simd_select::<i32x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1933}
1934
1935/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
1936/// the packed maximum values.
1937///
1938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu8)
1939#[inline]
1940#[target_feature(enable = "avx2")]
1941#[cfg_attr(test, assert_instr(vpmaxub))]
1942#[stable(feature = "simd_x86", since = "1.27.0")]
1943pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
1944 let a: u8x32 = a.as_u8x32();
1945 let b: u8x32 = b.as_u8x32();
1946 transmute(src:simd_select::<i8x32, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
1947}
1948
1949/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
1950/// minimum values.
1951///
1952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi16)
1953#[inline]
1954#[target_feature(enable = "avx2")]
1955#[cfg_attr(test, assert_instr(vpminsw))]
1956#[stable(feature = "simd_x86", since = "1.27.0")]
1957pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
1958 let a: i16x16 = a.as_i16x16();
1959 let b: i16x16 = b.as_i16x16();
1960 transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1961}
1962
1963/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
1964/// minimum values.
1965///
1966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi32)
1967#[inline]
1968#[target_feature(enable = "avx2")]
1969#[cfg_attr(test, assert_instr(vpminsd))]
1970#[stable(feature = "simd_x86", since = "1.27.0")]
1971pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
1972 let a: i32x8 = a.as_i32x8();
1973 let b: i32x8 = b.as_i32x8();
1974 transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1975}
1976
1977/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
1978/// minimum values.
1979///
1980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi8)
1981#[inline]
1982#[target_feature(enable = "avx2")]
1983#[cfg_attr(test, assert_instr(vpminsb))]
1984#[stable(feature = "simd_x86", since = "1.27.0")]
1985pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
1986 let a: i8x32 = a.as_i8x32();
1987 let b: i8x32 = b.as_i8x32();
1988 transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
1989}
1990
1991/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
1992/// the packed minimum values.
1993///
1994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu16)
1995#[inline]
1996#[target_feature(enable = "avx2")]
1997#[cfg_attr(test, assert_instr(vpminuw))]
1998#[stable(feature = "simd_x86", since = "1.27.0")]
1999pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
2000 let a: u16x16 = a.as_u16x16();
2001 let b: u16x16 = b.as_u16x16();
2002 transmute(src:simd_select::<i16x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2003}
2004
2005/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2006/// the packed minimum values.
2007///
2008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu32)
2009#[inline]
2010#[target_feature(enable = "avx2")]
2011#[cfg_attr(test, assert_instr(vpminud))]
2012#[stable(feature = "simd_x86", since = "1.27.0")]
2013pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
2014 let a: u32x8 = a.as_u32x8();
2015 let b: u32x8 = b.as_u32x8();
2016 transmute(src:simd_select::<i32x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2017}
2018
2019/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2020/// the packed minimum values.
2021///
2022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu8)
2023#[inline]
2024#[target_feature(enable = "avx2")]
2025#[cfg_attr(test, assert_instr(vpminub))]
2026#[stable(feature = "simd_x86", since = "1.27.0")]
2027pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
2028 let a: u8x32 = a.as_u8x32();
2029 let b: u8x32 = b.as_u8x32();
2030 transmute(src:simd_select::<i8x32, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2031}
2032
2033/// Creates mask from the most significant bit of each 8-bit element in `a`,
2034/// return the result.
2035///
2036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_epi8)
2037#[inline]
2038#[target_feature(enable = "avx2")]
2039#[cfg_attr(test, assert_instr(vpmovmskb))]
2040#[stable(feature = "simd_x86", since = "1.27.0")]
2041pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
2042 let z: i8x32 = i8x32::splat(0);
2043 let m: i8x32 = simd_lt(x:a.as_i8x32(), y:z);
2044 simd_bitmask::<_, u32>(m) as i32
2045}
2046
2047/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2048/// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2049/// results in dst. Eight SADs are performed for each 128-bit lane using one
2050/// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2051/// selected from `b` starting at on the offset specified in `imm8`. Eight
2052/// quadruplets are formed from sequential 8-bit integers selected from `a`
2053/// starting at the offset specified in `imm8`.
2054///
2055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mpsadbw_epu8)
2056#[inline]
2057#[target_feature(enable = "avx2")]
2058#[cfg_attr(test, assert_instr(vmpsadbw, IMM8 = 0))]
2059#[rustc_legacy_const_generics(2)]
2060#[stable(feature = "simd_x86", since = "1.27.0")]
2061pub unsafe fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2062 static_assert_uimm_bits!(IMM8, 8);
2063 transmute(src:mpsadbw(a:a.as_u8x32(), b:b.as_u8x32(), IMM8))
2064}
2065
2066/// Multiplies the low 32-bit integers from each packed 64-bit element in
2067/// `a` and `b`
2068///
2069/// Returns the 64-bit results.
2070///
2071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epi32)
2072#[inline]
2073#[target_feature(enable = "avx2")]
2074#[cfg_attr(test, assert_instr(vpmuldq))]
2075#[stable(feature = "simd_x86", since = "1.27.0")]
2076pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
2077 let a: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
2078 let b: i64x4 = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
2079 transmute(src:simd_mul(x:a, y:b))
2080}
2081
2082/// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2083/// element in `a` and `b`
2084///
2085/// Returns the unsigned 64-bit results.
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_epu32)
2088#[inline]
2089#[target_feature(enable = "avx2")]
2090#[cfg_attr(test, assert_instr(vpmuludq))]
2091#[stable(feature = "simd_x86", since = "1.27.0")]
2092pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
2093 let a: u64x4 = a.as_u64x4();
2094 let b: u64x4 = b.as_u64x4();
2095 let mask: u64x4 = u64x4::splat(u32::MAX.into());
2096 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
2097}
2098
2099/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2100/// intermediate 32-bit integers and returning the high 16 bits of the
2101/// intermediate integers.
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epi16)
2104#[inline]
2105#[target_feature(enable = "avx2")]
2106#[cfg_attr(test, assert_instr(vpmulhw))]
2107#[stable(feature = "simd_x86", since = "1.27.0")]
2108pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
2109 let a: i32x16 = simd_cast::<_, i32x16>(a.as_i16x16());
2110 let b: i32x16 = simd_cast::<_, i32x16>(b.as_i16x16());
2111 let r: i32x16 = simd_shr(lhs:simd_mul(a, b), rhs:i32x16::splat(16));
2112 transmute(src:simd_cast::<i32x16, i16x16>(r))
2113}
2114
2115/// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2116/// intermediate 32-bit integers and returning the high 16 bits of the
2117/// intermediate integers.
2118///
2119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhi_epu16)
2120#[inline]
2121#[target_feature(enable = "avx2")]
2122#[cfg_attr(test, assert_instr(vpmulhuw))]
2123#[stable(feature = "simd_x86", since = "1.27.0")]
2124pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
2125 let a: u32x16 = simd_cast::<_, u32x16>(a.as_u16x16());
2126 let b: u32x16 = simd_cast::<_, u32x16>(b.as_u16x16());
2127 let r: u32x16 = simd_shr(lhs:simd_mul(a, b), rhs:u32x16::splat(16));
2128 transmute(src:simd_cast::<u32x16, u16x16>(r))
2129}
2130
2131/// Multiplies the packed 16-bit integers in `a` and `b`, producing
2132/// intermediate 32-bit integers, and returns the low 16 bits of the
2133/// intermediate integers
2134///
2135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi16)
2136#[inline]
2137#[target_feature(enable = "avx2")]
2138#[cfg_attr(test, assert_instr(vpmullw))]
2139#[stable(feature = "simd_x86", since = "1.27.0")]
2140pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
2141 transmute(src:simd_mul(x:a.as_i16x16(), y:b.as_i16x16()))
2142}
2143
2144/// Multiplies the packed 32-bit integers in `a` and `b`, producing
2145/// intermediate 64-bit integers, and returns the low 32 bits of the
2146/// intermediate integers
2147///
2148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi32)
2149#[inline]
2150#[target_feature(enable = "avx2")]
2151#[cfg_attr(test, assert_instr(vpmulld))]
2152#[stable(feature = "simd_x86", since = "1.27.0")]
2153pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
2154 transmute(src:simd_mul(x:a.as_i32x8(), y:b.as_i32x8()))
2155}
2156
2157/// Multiplies packed 16-bit integers in `a` and `b`, producing
2158/// intermediate signed 32-bit integers. Truncate each intermediate
2159/// integer to the 18 most significant bits, round by adding 1, and
2160/// return bits `[16:1]`.
2161///
2162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mulhrs_epi16)
2163#[inline]
2164#[target_feature(enable = "avx2")]
2165#[cfg_attr(test, assert_instr(vpmulhrsw))]
2166#[stable(feature = "simd_x86", since = "1.27.0")]
2167pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
2168 transmute(src:pmulhrsw(a:a.as_i16x16(), b:b.as_i16x16()))
2169}
2170
2171/// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2172/// and `b`
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_si256)
2175#[inline]
2176#[target_feature(enable = "avx2")]
2177#[cfg_attr(test, assert_instr(vorps))]
2178#[stable(feature = "simd_x86", since = "1.27.0")]
2179pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
2180 transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8()))
2181}
2182
2183/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2184/// using signed saturation
2185///
2186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
2187#[inline]
2188#[target_feature(enable = "avx2")]
2189#[cfg_attr(test, assert_instr(vpacksswb))]
2190#[stable(feature = "simd_x86", since = "1.27.0")]
2191pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
2192 transmute(src:packsswb(a:a.as_i16x16(), b:b.as_i16x16()))
2193}
2194
2195/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2196/// using signed saturation
2197///
2198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
2199#[inline]
2200#[target_feature(enable = "avx2")]
2201#[cfg_attr(test, assert_instr(vpackssdw))]
2202#[stable(feature = "simd_x86", since = "1.27.0")]
2203pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
2204 transmute(src:packssdw(a:a.as_i32x8(), b:b.as_i32x8()))
2205}
2206
2207/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2208/// using unsigned saturation
2209///
2210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
2211#[inline]
2212#[target_feature(enable = "avx2")]
2213#[cfg_attr(test, assert_instr(vpackuswb))]
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
2216 transmute(src:packuswb(a:a.as_i16x16(), b:b.as_i16x16()))
2217}
2218
2219/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2220/// using unsigned saturation
2221///
2222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
2223#[inline]
2224#[target_feature(enable = "avx2")]
2225#[cfg_attr(test, assert_instr(vpackusdw))]
2226#[stable(feature = "simd_x86", since = "1.27.0")]
2227pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
2228 transmute(src:packusdw(a:a.as_i32x8(), b:b.as_i32x8()))
2229}
2230
2231/// Permutes packed 32-bit integers from `a` according to the content of `b`.
2232///
2233/// The last 3 bits of each integer of `b` are used as addresses into the 8
2234/// integers of `a`.
2235///
2236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_epi32)
2237#[inline]
2238#[target_feature(enable = "avx2")]
2239#[cfg_attr(test, assert_instr(vpermps))]
2240#[stable(feature = "simd_x86", since = "1.27.0")]
2241pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
2242 transmute(src:permd(a:a.as_u32x8(), b:b.as_u32x8()))
2243}
2244
2245/// Permutes 64-bit integers from `a` using control mask `imm8`.
2246///
2247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_epi64)
2248#[inline]
2249#[target_feature(enable = "avx2")]
2250#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2251#[rustc_legacy_const_generics(1)]
2252#[stable(feature = "simd_x86", since = "1.27.0")]
2253pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2254 static_assert_uimm_bits!(IMM8, 8);
2255 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
2256 let r: i64x4 = simd_shuffle!(
2257 a.as_i64x4(),
2258 zero,
2259 [
2260 IMM8 as u32 & 0b11,
2261 (IMM8 as u32 >> 2) & 0b11,
2262 (IMM8 as u32 >> 4) & 0b11,
2263 (IMM8 as u32 >> 6) & 0b11,
2264 ],
2265 );
2266 transmute(src:r)
2267}
2268
2269/// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2270///
2271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2x128_si256)
2272#[inline]
2273#[target_feature(enable = "avx2")]
2274#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
2275#[rustc_legacy_const_generics(2)]
2276#[stable(feature = "simd_x86", since = "1.27.0")]
2277pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
2278 static_assert_uimm_bits!(IMM8, 8);
2279 transmute(src:vperm2i128(a:a.as_i64x4(), b:b.as_i64x4(), IMM8 as i8))
2280}
2281
2282/// Shuffles 64-bit floating-point elements in `a` across lanes using the
2283/// control in `imm8`.
2284///
2285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute4x64_pd)
2286#[inline]
2287#[target_feature(enable = "avx2")]
2288#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2289#[rustc_legacy_const_generics(1)]
2290#[stable(feature = "simd_x86", since = "1.27.0")]
2291pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2292 static_assert_uimm_bits!(IMM8, 8);
2293 simd_shuffle!(
2294 a,
2295 _mm256_undefined_pd(),
2296 [
2297 IMM8 as u32 & 0b11,
2298 (IMM8 as u32 >> 2) & 0b11,
2299 (IMM8 as u32 >> 4) & 0b11,
2300 (IMM8 as u32 >> 6) & 0b11,
2301 ],
2302 )
2303}
2304
2305/// Shuffles eight 32-bit floating-point elements in `a` across lanes using
2306/// the corresponding 32-bit integer index in `idx`.
2307///
2308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar8x32_ps)
2309#[inline]
2310#[target_feature(enable = "avx2")]
2311#[cfg_attr(test, assert_instr(vpermps))]
2312#[stable(feature = "simd_x86", since = "1.27.0")]
2313pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 {
2314 permps(a, b:idx.as_i32x8())
2315}
2316
2317/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2318/// and `b`, then horizontally sum each consecutive 8 differences to
2319/// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2320/// integers in the low 16 bits of the 64-bit return value
2321///
2322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sad_epu8)
2323#[inline]
2324#[target_feature(enable = "avx2")]
2325#[cfg_attr(test, assert_instr(vpsadbw))]
2326#[stable(feature = "simd_x86", since = "1.27.0")]
2327pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i {
2328 transmute(src:psadbw(a:a.as_u8x32(), b:b.as_u8x32()))
2329}
2330
2331/// Shuffles bytes from `a` according to the content of `b`.
2332///
2333/// For each of the 128-bit low and high halves of the vectors, the last
2334/// 4 bits of each byte of `b` are used as addresses into the respective
2335/// low or high 16 bytes of `a`. That is, the halves are shuffled separately.
2336///
2337/// In addition, if the highest significant bit of a byte of `b` is set, the
2338/// respective destination byte is set to 0.
2339///
2340/// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2341/// equivalent to:
2342///
2343/// ```
2344/// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2345/// let mut r = [0; 32];
2346/// for i in 0..16 {
2347/// // if the most significant bit of b is set,
2348/// // then the destination byte is set to 0.
2349/// if b[i] & 0x80 == 0u8 {
2350/// r[i] = a[(b[i] % 16) as usize];
2351/// }
2352/// if b[i + 16] & 0x80 == 0u8 {
2353/// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2354/// }
2355/// }
2356/// r
2357/// }
2358/// ```
2359///
2360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi8)
2361#[inline]
2362#[target_feature(enable = "avx2")]
2363#[cfg_attr(test, assert_instr(vpshufb))]
2364#[stable(feature = "simd_x86", since = "1.27.0")]
2365pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2366 transmute(src:pshufb(a:a.as_u8x32(), b:b.as_u8x32()))
2367}
2368
2369/// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2370/// `imm8`.
2371///
2372/// ```rust
2373/// #[cfg(target_arch = "x86")]
2374/// use std::arch::x86::*;
2375/// #[cfg(target_arch = "x86_64")]
2376/// use std::arch::x86_64::*;
2377///
2378/// # fn main() {
2379/// # if is_x86_feature_detected!("avx2") {
2380/// # #[target_feature(enable = "avx2")]
2381/// # unsafe fn worker() {
2382/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2383///
2384/// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2385/// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2386///
2387/// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2388/// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2389///
2390/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2391/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2392/// # }
2393/// # unsafe { worker(); }
2394/// # }
2395/// # }
2396/// ```
2397///
2398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_epi32)
2399#[inline]
2400#[target_feature(enable = "avx2")]
2401#[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
2402#[rustc_legacy_const_generics(1)]
2403#[stable(feature = "simd_x86", since = "1.27.0")]
2404pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2405 static_assert_uimm_bits!(MASK, 8);
2406 let r: i32x8 = simd_shuffle!(
2407 a.as_i32x8(),
2408 a.as_i32x8(),
2409 [
2410 MASK as u32 & 0b11,
2411 (MASK as u32 >> 2) & 0b11,
2412 (MASK as u32 >> 4) & 0b11,
2413 (MASK as u32 >> 6) & 0b11,
2414 (MASK as u32 & 0b11) + 4,
2415 ((MASK as u32 >> 2) & 0b11) + 4,
2416 ((MASK as u32 >> 4) & 0b11) + 4,
2417 ((MASK as u32 >> 6) & 0b11) + 4,
2418 ],
2419 );
2420 transmute(src:r)
2421}
2422
2423/// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2424/// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2425/// to the output.
2426///
2427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflehi_epi16)
2428#[inline]
2429#[target_feature(enable = "avx2")]
2430#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2431#[rustc_legacy_const_generics(1)]
2432#[stable(feature = "simd_x86", since = "1.27.0")]
2433pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2434 static_assert_uimm_bits!(IMM8, 8);
2435 let a = a.as_i16x16();
2436 let r: i16x16 = simd_shuffle!(
2437 a,
2438 a,
2439 [
2440 0,
2441 1,
2442 2,
2443 3,
2444 4 + (IMM8 as u32 & 0b11),
2445 4 + ((IMM8 as u32 >> 2) & 0b11),
2446 4 + ((IMM8 as u32 >> 4) & 0b11),
2447 4 + ((IMM8 as u32 >> 6) & 0b11),
2448 8,
2449 9,
2450 10,
2451 11,
2452 12 + (IMM8 as u32 & 0b11),
2453 12 + ((IMM8 as u32 >> 2) & 0b11),
2454 12 + ((IMM8 as u32 >> 4) & 0b11),
2455 12 + ((IMM8 as u32 >> 6) & 0b11),
2456 ],
2457 );
2458 transmute(r)
2459}
2460
2461/// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2462/// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2463/// to the output.
2464///
2465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shufflelo_epi16)
2466#[inline]
2467#[target_feature(enable = "avx2")]
2468#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
2469#[rustc_legacy_const_generics(1)]
2470#[stable(feature = "simd_x86", since = "1.27.0")]
2471pub unsafe fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2472 static_assert_uimm_bits!(IMM8, 8);
2473 let a = a.as_i16x16();
2474 let r: i16x16 = simd_shuffle!(
2475 a,
2476 a,
2477 [
2478 0 + (IMM8 as u32 & 0b11),
2479 0 + ((IMM8 as u32 >> 2) & 0b11),
2480 0 + ((IMM8 as u32 >> 4) & 0b11),
2481 0 + ((IMM8 as u32 >> 6) & 0b11),
2482 4,
2483 5,
2484 6,
2485 7,
2486 8 + (IMM8 as u32 & 0b11),
2487 8 + ((IMM8 as u32 >> 2) & 0b11),
2488 8 + ((IMM8 as u32 >> 4) & 0b11),
2489 8 + ((IMM8 as u32 >> 6) & 0b11),
2490 12,
2491 13,
2492 14,
2493 15,
2494 ],
2495 );
2496 transmute(r)
2497}
2498
2499/// Negates packed 16-bit integers in `a` when the corresponding signed
2500/// 16-bit integer in `b` is negative, and returns the results.
2501/// Results are zeroed out when the corresponding element in `b` is zero.
2502///
2503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi16)
2504#[inline]
2505#[target_feature(enable = "avx2")]
2506#[cfg_attr(test, assert_instr(vpsignw))]
2507#[stable(feature = "simd_x86", since = "1.27.0")]
2508pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i {
2509 transmute(src:psignw(a:a.as_i16x16(), b:b.as_i16x16()))
2510}
2511
2512/// Negates packed 32-bit integers in `a` when the corresponding signed
2513/// 32-bit integer in `b` is negative, and returns the results.
2514/// Results are zeroed out when the corresponding element in `b` is zero.
2515///
2516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi32)
2517#[inline]
2518#[target_feature(enable = "avx2")]
2519#[cfg_attr(test, assert_instr(vpsignd))]
2520#[stable(feature = "simd_x86", since = "1.27.0")]
2521pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i {
2522 transmute(src:psignd(a:a.as_i32x8(), b:b.as_i32x8()))
2523}
2524
2525/// Negates packed 8-bit integers in `a` when the corresponding signed
2526/// 8-bit integer in `b` is negative, and returns the results.
2527/// Results are zeroed out when the corresponding element in `b` is zero.
2528///
2529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sign_epi8)
2530#[inline]
2531#[target_feature(enable = "avx2")]
2532#[cfg_attr(test, assert_instr(vpsignb))]
2533#[stable(feature = "simd_x86", since = "1.27.0")]
2534pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i {
2535 transmute(src:psignb(a:a.as_i8x32(), b:b.as_i8x32()))
2536}
2537
2538/// Shifts packed 16-bit integers in `a` left by `count` while
2539/// shifting in zeros, and returns the result
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi16)
2542#[inline]
2543#[target_feature(enable = "avx2")]
2544#[cfg_attr(test, assert_instr(vpsllw))]
2545#[stable(feature = "simd_x86", since = "1.27.0")]
2546pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i {
2547 transmute(src:psllw(a:a.as_i16x16(), count:count.as_i16x8()))
2548}
2549
2550/// Shifts packed 32-bit integers in `a` left by `count` while
2551/// shifting in zeros, and returns the result
2552///
2553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi32)
2554#[inline]
2555#[target_feature(enable = "avx2")]
2556#[cfg_attr(test, assert_instr(vpslld))]
2557#[stable(feature = "simd_x86", since = "1.27.0")]
2558pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i {
2559 transmute(src:pslld(a:a.as_i32x8(), count:count.as_i32x4()))
2560}
2561
2562/// Shifts packed 64-bit integers in `a` left by `count` while
2563/// shifting in zeros, and returns the result
2564///
2565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sll_epi64)
2566#[inline]
2567#[target_feature(enable = "avx2")]
2568#[cfg_attr(test, assert_instr(vpsllq))]
2569#[stable(feature = "simd_x86", since = "1.27.0")]
2570pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
2571 transmute(src:psllq(a:a.as_i64x4(), count:count.as_i64x2()))
2572}
2573
2574/// Shifts packed 16-bit integers in `a` left by `IMM8` while
2575/// shifting in zeros, return the results;
2576///
2577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi16)
2578#[inline]
2579#[target_feature(enable = "avx2")]
2580#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
2581#[rustc_legacy_const_generics(1)]
2582#[stable(feature = "simd_x86", since = "1.27.0")]
2583pub unsafe fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2584 static_assert_uimm_bits!(IMM8, 8);
2585 if IMM8 >= 16 {
2586 _mm256_setzero_si256()
2587 } else {
2588 transmute(src:simd_shl(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
2589 }
2590}
2591
2592/// Shifts packed 32-bit integers in `a` left by `IMM8` while
2593/// shifting in zeros, return the results;
2594///
2595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi32)
2596#[inline]
2597#[target_feature(enable = "avx2")]
2598#[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
2599#[rustc_legacy_const_generics(1)]
2600#[stable(feature = "simd_x86", since = "1.27.0")]
2601pub unsafe fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2602 static_assert_uimm_bits!(IMM8, 8);
2603 if IMM8 >= 32 {
2604 _mm256_setzero_si256()
2605 } else {
2606 transmute(src:simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
2607 }
2608}
2609
2610/// Shifts packed 64-bit integers in `a` left by `IMM8` while
2611/// shifting in zeros, return the results;
2612///
2613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_epi64)
2614#[inline]
2615#[target_feature(enable = "avx2")]
2616#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
2617#[rustc_legacy_const_generics(1)]
2618#[stable(feature = "simd_x86", since = "1.27.0")]
2619pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2620 static_assert_uimm_bits!(IMM8, 8);
2621 if IMM8 >= 64 {
2622 _mm256_setzero_si256()
2623 } else {
2624 transmute(src:simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
2625 }
2626}
2627
2628/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2629///
2630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_slli_si256)
2631#[inline]
2632#[target_feature(enable = "avx2")]
2633#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2634#[rustc_legacy_const_generics(1)]
2635#[stable(feature = "simd_x86", since = "1.27.0")]
2636pub unsafe fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2637 static_assert_uimm_bits!(IMM8, 8);
2638 _mm256_bslli_epi128::<IMM8>(a)
2639}
2640
2641/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2642///
2643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bslli_epi128)
2644#[inline]
2645#[target_feature(enable = "avx2")]
2646#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
2647#[rustc_legacy_const_generics(1)]
2648#[stable(feature = "simd_x86", since = "1.27.0")]
2649pub unsafe fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2650 static_assert_uimm_bits!(IMM8, 8);
2651 const fn mask(shift: i32, i: u32) -> u32 {
2652 let shift = shift as u32 & 0xff;
2653 if shift > 15 || i % 16 < shift {
2654 0
2655 } else {
2656 32 + (i - shift)
2657 }
2658 }
2659 let a = a.as_i8x32();
2660 let zero = _mm256_setzero_si256().as_i8x32();
2661 let r: i8x32 = simd_shuffle!(
2662 zero,
2663 a,
2664 [
2665 mask(IMM8, 0),
2666 mask(IMM8, 1),
2667 mask(IMM8, 2),
2668 mask(IMM8, 3),
2669 mask(IMM8, 4),
2670 mask(IMM8, 5),
2671 mask(IMM8, 6),
2672 mask(IMM8, 7),
2673 mask(IMM8, 8),
2674 mask(IMM8, 9),
2675 mask(IMM8, 10),
2676 mask(IMM8, 11),
2677 mask(IMM8, 12),
2678 mask(IMM8, 13),
2679 mask(IMM8, 14),
2680 mask(IMM8, 15),
2681 mask(IMM8, 16),
2682 mask(IMM8, 17),
2683 mask(IMM8, 18),
2684 mask(IMM8, 19),
2685 mask(IMM8, 20),
2686 mask(IMM8, 21),
2687 mask(IMM8, 22),
2688 mask(IMM8, 23),
2689 mask(IMM8, 24),
2690 mask(IMM8, 25),
2691 mask(IMM8, 26),
2692 mask(IMM8, 27),
2693 mask(IMM8, 28),
2694 mask(IMM8, 29),
2695 mask(IMM8, 30),
2696 mask(IMM8, 31),
2697 ],
2698 );
2699 transmute(r)
2700}
2701
2702/// Shifts packed 32-bit integers in `a` left by the amount
2703/// specified by the corresponding element in `count` while
2704/// shifting in zeros, and returns the result.
2705///
2706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi32)
2707#[inline]
2708#[target_feature(enable = "avx2")]
2709#[cfg_attr(test, assert_instr(vpsllvd))]
2710#[stable(feature = "simd_x86", since = "1.27.0")]
2711pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
2712 transmute(src:psllvd(a:a.as_i32x4(), count:count.as_i32x4()))
2713}
2714
2715/// Shifts packed 32-bit integers in `a` left by the amount
2716/// specified by the corresponding element in `count` while
2717/// shifting in zeros, and returns the result.
2718///
2719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi32)
2720#[inline]
2721#[target_feature(enable = "avx2")]
2722#[cfg_attr(test, assert_instr(vpsllvd))]
2723#[stable(feature = "simd_x86", since = "1.27.0")]
2724pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
2725 transmute(src:psllvd256(a:a.as_i32x8(), count:count.as_i32x8()))
2726}
2727
2728/// Shifts packed 64-bit integers in `a` left by the amount
2729/// specified by the corresponding element in `count` while
2730/// shifting in zeros, and returns the result.
2731///
2732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sllv_epi64)
2733#[inline]
2734#[target_feature(enable = "avx2")]
2735#[cfg_attr(test, assert_instr(vpsllvq))]
2736#[stable(feature = "simd_x86", since = "1.27.0")]
2737pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
2738 transmute(src:psllvq(a:a.as_i64x2(), count:count.as_i64x2()))
2739}
2740
2741/// Shifts packed 64-bit integers in `a` left by the amount
2742/// specified by the corresponding element in `count` while
2743/// shifting in zeros, and returns the result.
2744///
2745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sllv_epi64)
2746#[inline]
2747#[target_feature(enable = "avx2")]
2748#[cfg_attr(test, assert_instr(vpsllvq))]
2749#[stable(feature = "simd_x86", since = "1.27.0")]
2750pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
2751 transmute(src:psllvq256(a:a.as_i64x4(), count:count.as_i64x4()))
2752}
2753
2754/// Shifts packed 16-bit integers in `a` right by `count` while
2755/// shifting in sign bits.
2756///
2757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi16)
2758#[inline]
2759#[target_feature(enable = "avx2")]
2760#[cfg_attr(test, assert_instr(vpsraw))]
2761#[stable(feature = "simd_x86", since = "1.27.0")]
2762pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i {
2763 transmute(src:psraw(a:a.as_i16x16(), count:count.as_i16x8()))
2764}
2765
2766/// Shifts packed 32-bit integers in `a` right by `count` while
2767/// shifting in sign bits.
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi32)
2770#[inline]
2771#[target_feature(enable = "avx2")]
2772#[cfg_attr(test, assert_instr(vpsrad))]
2773#[stable(feature = "simd_x86", since = "1.27.0")]
2774pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
2775 transmute(src:psrad(a:a.as_i32x8(), count:count.as_i32x4()))
2776}
2777
2778/// Shifts packed 16-bit integers in `a` right by `IMM8` while
2779/// shifting in sign bits.
2780///
2781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi16)
2782#[inline]
2783#[target_feature(enable = "avx2")]
2784#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
2785#[rustc_legacy_const_generics(1)]
2786#[stable(feature = "simd_x86", since = "1.27.0")]
2787pub unsafe fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2788 static_assert_uimm_bits!(IMM8, 8);
2789 transmute(src:simd_shr(lhs:a.as_i16x16(), rhs:i16x16::splat(IMM8.min(15) as i16)))
2790}
2791
2792/// Shifts packed 32-bit integers in `a` right by `IMM8` while
2793/// shifting in sign bits.
2794///
2795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi32)
2796#[inline]
2797#[target_feature(enable = "avx2")]
2798#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
2799#[rustc_legacy_const_generics(1)]
2800#[stable(feature = "simd_x86", since = "1.27.0")]
2801pub unsafe fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
2802 static_assert_uimm_bits!(IMM8, 8);
2803 transmute(src:simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31))))
2804}
2805
2806/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2807/// corresponding element in `count` while shifting in sign bits.
2808///
2809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi32)
2810#[inline]
2811#[target_feature(enable = "avx2")]
2812#[cfg_attr(test, assert_instr(vpsravd))]
2813#[stable(feature = "simd_x86", since = "1.27.0")]
2814pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
2815 transmute(src:psravd(a:a.as_i32x4(), count:count.as_i32x4()))
2816}
2817
2818/// Shifts packed 32-bit integers in `a` right by the amount specified by the
2819/// corresponding element in `count` while shifting in sign bits.
2820///
2821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi32)
2822#[inline]
2823#[target_feature(enable = "avx2")]
2824#[cfg_attr(test, assert_instr(vpsravd))]
2825#[stable(feature = "simd_x86", since = "1.27.0")]
2826pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
2827 transmute(src:psravd256(a:a.as_i32x8(), count:count.as_i32x8()))
2828}
2829
2830/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2831///
2832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_si256)
2833#[inline]
2834#[target_feature(enable = "avx2")]
2835#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2836#[rustc_legacy_const_generics(1)]
2837#[stable(feature = "simd_x86", since = "1.27.0")]
2838pub unsafe fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
2839 static_assert_uimm_bits!(IMM8, 8);
2840 _mm256_bsrli_epi128::<IMM8>(a)
2841}
2842
2843/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
2844///
2845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bsrli_epi128)
2846#[inline]
2847#[target_feature(enable = "avx2")]
2848#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
2849#[rustc_legacy_const_generics(1)]
2850#[stable(feature = "simd_x86", since = "1.27.0")]
2851pub unsafe fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
2852 static_assert_uimm_bits!(IMM8, 8);
2853 let a = a.as_i8x32();
2854 let zero = _mm256_setzero_si256().as_i8x32();
2855 let r: i8x32 = match IMM8 % 16 {
2856 0 => simd_shuffle!(
2857 a,
2858 zero,
2859 [
2860 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
2861 23, 24, 25, 26, 27, 28, 29, 30, 31,
2862 ],
2863 ),
2864 1 => simd_shuffle!(
2865 a,
2866 zero,
2867 [
2868 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
2869 24, 25, 26, 27, 28, 29, 30, 31, 32,
2870 ],
2871 ),
2872 2 => simd_shuffle!(
2873 a,
2874 zero,
2875 [
2876 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, 24,
2877 25, 26, 27, 28, 29, 30, 31, 32, 32,
2878 ],
2879 ),
2880 3 => simd_shuffle!(
2881 a,
2882 zero,
2883 [
2884 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, 24,
2885 25, 26, 27, 28, 29, 30, 31, 32, 32, 32,
2886 ],
2887 ),
2888 4 => simd_shuffle!(
2889 a,
2890 zero,
2891 [
2892 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, 25,
2893 26, 27, 28, 29, 30, 31, 32, 32, 32, 32,
2894 ],
2895 ),
2896 5 => simd_shuffle!(
2897 a,
2898 zero,
2899 [
2900 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, 26,
2901 27, 28, 29, 30, 31, 32, 32, 32, 32, 32,
2902 ],
2903 ),
2904 6 => simd_shuffle!(
2905 a,
2906 zero,
2907 [
2908 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, 27,
2909 28, 29, 30, 31, 32, 32, 32, 32, 32, 32,
2910 ],
2911 ),
2912 7 => simd_shuffle!(
2913 a,
2914 zero,
2915 [
2916 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, 27,
2917 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32,
2918 ],
2919 ),
2920 8 => simd_shuffle!(
2921 a,
2922 zero,
2923 [
2924 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, 28,
2925 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32,
2926 ],
2927 ),
2928 9 => simd_shuffle!(
2929 a,
2930 zero,
2931 [
2932 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, 29,
2933 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2934 ],
2935 ),
2936 10 => simd_shuffle!(
2937 a,
2938 zero,
2939 [
2940 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, 30,
2941 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2942 ],
2943 ),
2944 11 => simd_shuffle!(
2945 a,
2946 zero,
2947 [
2948 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, 31,
2949 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2950 ],
2951 ),
2952 12 => simd_shuffle!(
2953 a,
2954 zero,
2955 [
2956 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, 32,
2957 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2958 ],
2959 ),
2960 13 => simd_shuffle!(
2961 a,
2962 zero,
2963 [
2964 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, 32,
2965 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2966 ],
2967 ),
2968 14 => simd_shuffle!(
2969 a,
2970 zero,
2971 [
2972 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, 32,
2973 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2974 ],
2975 ),
2976 15 => simd_shuffle!(
2977 a,
2978 zero,
2979 [
2980 14, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, 32,
2981 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2982 ],
2983 ),
2984 _ => zero,
2985 };
2986 transmute(r)
2987}
2988
2989/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
2990/// zeros.
2991///
2992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi16)
2993#[inline]
2994#[target_feature(enable = "avx2")]
2995#[cfg_attr(test, assert_instr(vpsrlw))]
2996#[stable(feature = "simd_x86", since = "1.27.0")]
2997pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i {
2998 transmute(src:psrlw(a:a.as_i16x16(), count:count.as_i16x8()))
2999}
3000
3001/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3002/// zeros.
3003///
3004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi32)
3005#[inline]
3006#[target_feature(enable = "avx2")]
3007#[cfg_attr(test, assert_instr(vpsrld))]
3008#[stable(feature = "simd_x86", since = "1.27.0")]
3009pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i {
3010 transmute(src:psrld(a:a.as_i32x8(), count:count.as_i32x4()))
3011}
3012
3013/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3014/// zeros.
3015///
3016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srl_epi64)
3017#[inline]
3018#[target_feature(enable = "avx2")]
3019#[cfg_attr(test, assert_instr(vpsrlq))]
3020#[stable(feature = "simd_x86", since = "1.27.0")]
3021pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
3022 transmute(src:psrlq(a:a.as_i64x4(), count:count.as_i64x2()))
3023}
3024
3025/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
3026/// zeros
3027///
3028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi16)
3029#[inline]
3030#[target_feature(enable = "avx2")]
3031#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
3032#[rustc_legacy_const_generics(1)]
3033#[stable(feature = "simd_x86", since = "1.27.0")]
3034pub unsafe fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
3035 static_assert_uimm_bits!(IMM8, 8);
3036 if IMM8 >= 16 {
3037 _mm256_setzero_si256()
3038 } else {
3039 transmute(src:simd_shr(lhs:a.as_u16x16(), rhs:u16x16::splat(IMM8 as u16)))
3040 }
3041}
3042
3043/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
3044/// zeros
3045///
3046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi32)
3047#[inline]
3048#[target_feature(enable = "avx2")]
3049#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
3050#[rustc_legacy_const_generics(1)]
3051#[stable(feature = "simd_x86", since = "1.27.0")]
3052pub unsafe fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
3053 static_assert_uimm_bits!(IMM8, 8);
3054 if IMM8 >= 32 {
3055 _mm256_setzero_si256()
3056 } else {
3057 transmute(src:simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8 as u32)))
3058 }
3059}
3060
3061/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
3062/// zeros
3063///
3064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srli_epi64)
3065#[inline]
3066#[target_feature(enable = "avx2")]
3067#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
3068#[rustc_legacy_const_generics(1)]
3069#[stable(feature = "simd_x86", since = "1.27.0")]
3070pub unsafe fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
3071 static_assert_uimm_bits!(IMM8, 8);
3072 if IMM8 >= 64 {
3073 _mm256_setzero_si256()
3074 } else {
3075 transmute(src:simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64)))
3076 }
3077}
3078
3079/// Shifts packed 32-bit integers in `a` right by the amount specified by
3080/// the corresponding element in `count` while shifting in zeros,
3081///
3082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi32)
3083#[inline]
3084#[target_feature(enable = "avx2")]
3085#[cfg_attr(test, assert_instr(vpsrlvd))]
3086#[stable(feature = "simd_x86", since = "1.27.0")]
3087pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
3088 transmute(src:psrlvd(a:a.as_i32x4(), count:count.as_i32x4()))
3089}
3090
3091/// Shifts packed 32-bit integers in `a` right by the amount specified by
3092/// the corresponding element in `count` while shifting in zeros,
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi32)
3095#[inline]
3096#[target_feature(enable = "avx2")]
3097#[cfg_attr(test, assert_instr(vpsrlvd))]
3098#[stable(feature = "simd_x86", since = "1.27.0")]
3099pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
3100 transmute(src:psrlvd256(a:a.as_i32x8(), count:count.as_i32x8()))
3101}
3102
3103/// Shifts packed 64-bit integers in `a` right by the amount specified by
3104/// the corresponding element in `count` while shifting in zeros,
3105///
3106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srlv_epi64)
3107#[inline]
3108#[target_feature(enable = "avx2")]
3109#[cfg_attr(test, assert_instr(vpsrlvq))]
3110#[stable(feature = "simd_x86", since = "1.27.0")]
3111pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
3112 transmute(src:psrlvq(a:a.as_i64x2(), count:count.as_i64x2()))
3113}
3114
3115/// Shifts packed 64-bit integers in `a` right by the amount specified by
3116/// the corresponding element in `count` while shifting in zeros,
3117///
3118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srlv_epi64)
3119#[inline]
3120#[target_feature(enable = "avx2")]
3121#[cfg_attr(test, assert_instr(vpsrlvq))]
3122#[stable(feature = "simd_x86", since = "1.27.0")]
3123pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
3124 transmute(src:psrlvq256(a:a.as_i64x4(), count:count.as_i64x4()))
3125}
3126
3127// TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3128
3129/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3130///
3131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
3132#[inline]
3133#[target_feature(enable = "avx2")]
3134#[cfg_attr(test, assert_instr(vpsubw))]
3135#[stable(feature = "simd_x86", since = "1.27.0")]
3136pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
3137 transmute(src:simd_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16()))
3138}
3139
3140/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3141///
3142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi32)
3143#[inline]
3144#[target_feature(enable = "avx2")]
3145#[cfg_attr(test, assert_instr(vpsubd))]
3146#[stable(feature = "simd_x86", since = "1.27.0")]
3147pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
3148 transmute(src:simd_sub(lhs:a.as_i32x8(), rhs:b.as_i32x8()))
3149}
3150
3151/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3152///
3153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi64)
3154#[inline]
3155#[target_feature(enable = "avx2")]
3156#[cfg_attr(test, assert_instr(vpsubq))]
3157#[stable(feature = "simd_x86", since = "1.27.0")]
3158pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
3159 transmute(src:simd_sub(lhs:a.as_i64x4(), rhs:b.as_i64x4()))
3160}
3161
3162/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi8)
3165#[inline]
3166#[target_feature(enable = "avx2")]
3167#[cfg_attr(test, assert_instr(vpsubb))]
3168#[stable(feature = "simd_x86", since = "1.27.0")]
3169pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
3170 transmute(src:simd_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32()))
3171}
3172
3173/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3174/// `a` using saturation.
3175///
3176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi16)
3177#[inline]
3178#[target_feature(enable = "avx2")]
3179#[cfg_attr(test, assert_instr(vpsubsw))]
3180#[stable(feature = "simd_x86", since = "1.27.0")]
3181pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
3182 transmute(src:simd_saturating_sub(lhs:a.as_i16x16(), rhs:b.as_i16x16()))
3183}
3184
3185/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3186/// `a` using saturation.
3187///
3188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epi8)
3189#[inline]
3190#[target_feature(enable = "avx2")]
3191#[cfg_attr(test, assert_instr(vpsubsb))]
3192#[stable(feature = "simd_x86", since = "1.27.0")]
3193pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
3194 transmute(src:simd_saturating_sub(lhs:a.as_i8x32(), rhs:b.as_i8x32()))
3195}
3196
3197/// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3198/// integers in `a` using saturation.
3199///
3200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu16)
3201#[inline]
3202#[target_feature(enable = "avx2")]
3203#[cfg_attr(test, assert_instr(vpsubusw))]
3204#[stable(feature = "simd_x86", since = "1.27.0")]
3205pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
3206 transmute(src:simd_saturating_sub(lhs:a.as_u16x16(), rhs:b.as_u16x16()))
3207}
3208
3209/// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3210/// integers in `a` using saturation.
3211///
3212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_subs_epu8)
3213#[inline]
3214#[target_feature(enable = "avx2")]
3215#[cfg_attr(test, assert_instr(vpsubusb))]
3216#[stable(feature = "simd_x86", since = "1.27.0")]
3217pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
3218 transmute(src:simd_saturating_sub(lhs:a.as_u8x32(), rhs:b.as_u8x32()))
3219}
3220
3221/// Unpacks and interleave 8-bit integers from the high half of each
3222/// 128-bit lane in `a` and `b`.
3223///
3224/// ```rust
3225/// #[cfg(target_arch = "x86")]
3226/// use std::arch::x86::*;
3227/// #[cfg(target_arch = "x86_64")]
3228/// use std::arch::x86_64::*;
3229///
3230/// # fn main() {
3231/// # if is_x86_feature_detected!("avx2") {
3232/// # #[target_feature(enable = "avx2")]
3233/// # unsafe fn worker() {
3234/// let a = _mm256_setr_epi8(
3235/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3236/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3237/// );
3238/// let b = _mm256_setr_epi8(
3239/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3240/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3241/// -30, -31,
3242/// );
3243///
3244/// let c = _mm256_unpackhi_epi8(a, b);
3245///
3246/// let expected = _mm256_setr_epi8(
3247/// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3248/// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3249/// -31,
3250/// );
3251/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3252///
3253/// # }
3254/// # unsafe { worker(); }
3255/// # }
3256/// # }
3257/// ```
3258///
3259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi8)
3260#[inline]
3261#[target_feature(enable = "avx2")]
3262#[cfg_attr(test, assert_instr(vpunpckhbw))]
3263#[stable(feature = "simd_x86", since = "1.27.0")]
3264pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
3265 #[rustfmt::skip]
3266 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3267 8, 40, 9, 41, 10, 42, 11, 43,
3268 12, 44, 13, 45, 14, 46, 15, 47,
3269 24, 56, 25, 57, 26, 58, 27, 59,
3270 28, 60, 29, 61, 30, 62, 31, 63,
3271 ]);
3272 transmute(src:r)
3273}
3274
3275/// Unpacks and interleave 8-bit integers from the low half of each
3276/// 128-bit lane of `a` and `b`.
3277///
3278/// ```rust
3279/// #[cfg(target_arch = "x86")]
3280/// use std::arch::x86::*;
3281/// #[cfg(target_arch = "x86_64")]
3282/// use std::arch::x86_64::*;
3283///
3284/// # fn main() {
3285/// # if is_x86_feature_detected!("avx2") {
3286/// # #[target_feature(enable = "avx2")]
3287/// # unsafe fn worker() {
3288/// let a = _mm256_setr_epi8(
3289/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3290/// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3291/// );
3292/// let b = _mm256_setr_epi8(
3293/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3294/// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3295/// -30, -31,
3296/// );
3297///
3298/// let c = _mm256_unpacklo_epi8(a, b);
3299///
3300/// let expected = _mm256_setr_epi8(
3301/// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3302/// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3303/// );
3304/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3305///
3306/// # }
3307/// # unsafe { worker(); }
3308/// # }
3309/// # }
3310/// ```
3311///
3312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi8)
3313#[inline]
3314#[target_feature(enable = "avx2")]
3315#[cfg_attr(test, assert_instr(vpunpcklbw))]
3316#[stable(feature = "simd_x86", since = "1.27.0")]
3317pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
3318 #[rustfmt::skip]
3319 let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
3320 0, 32, 1, 33, 2, 34, 3, 35,
3321 4, 36, 5, 37, 6, 38, 7, 39,
3322 16, 48, 17, 49, 18, 50, 19, 51,
3323 20, 52, 21, 53, 22, 54, 23, 55,
3324 ]);
3325 transmute(src:r)
3326}
3327
3328/// Unpacks and interleave 16-bit integers from the high half of each
3329/// 128-bit lane of `a` and `b`.
3330///
3331/// ```rust
3332/// #[cfg(target_arch = "x86")]
3333/// use std::arch::x86::*;
3334/// #[cfg(target_arch = "x86_64")]
3335/// use std::arch::x86_64::*;
3336///
3337/// # fn main() {
3338/// # if is_x86_feature_detected!("avx2") {
3339/// # #[target_feature(enable = "avx2")]
3340/// # unsafe fn worker() {
3341/// let a = _mm256_setr_epi16(
3342/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3343/// );
3344/// let b = _mm256_setr_epi16(
3345/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3346/// );
3347///
3348/// let c = _mm256_unpackhi_epi16(a, b);
3349///
3350/// let expected = _mm256_setr_epi16(
3351/// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3352/// );
3353/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3354///
3355/// # }
3356/// # unsafe { worker(); }
3357/// # }
3358/// # }
3359/// ```
3360///
3361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi16)
3362#[inline]
3363#[target_feature(enable = "avx2")]
3364#[cfg_attr(test, assert_instr(vpunpckhwd))]
3365#[stable(feature = "simd_x86", since = "1.27.0")]
3366pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
3367 let r: i16x16 = simd_shuffle!(
3368 a.as_i16x16(),
3369 b.as_i16x16(),
3370 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3371 );
3372 transmute(src:r)
3373}
3374
3375/// Unpacks and interleave 16-bit integers from the low half of each
3376/// 128-bit lane of `a` and `b`.
3377///
3378/// ```rust
3379/// #[cfg(target_arch = "x86")]
3380/// use std::arch::x86::*;
3381/// #[cfg(target_arch = "x86_64")]
3382/// use std::arch::x86_64::*;
3383///
3384/// # fn main() {
3385/// # if is_x86_feature_detected!("avx2") {
3386/// # #[target_feature(enable = "avx2")]
3387/// # unsafe fn worker() {
3388///
3389/// let a = _mm256_setr_epi16(
3390/// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3391/// );
3392/// let b = _mm256_setr_epi16(
3393/// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3394/// );
3395///
3396/// let c = _mm256_unpacklo_epi16(a, b);
3397///
3398/// let expected = _mm256_setr_epi16(
3399/// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3400/// );
3401/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3402///
3403/// # }
3404/// # unsafe { worker(); }
3405/// # }
3406/// # }
3407/// ```
3408///
3409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi16)
3410#[inline]
3411#[target_feature(enable = "avx2")]
3412#[cfg_attr(test, assert_instr(vpunpcklwd))]
3413#[stable(feature = "simd_x86", since = "1.27.0")]
3414pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
3415 let r: i16x16 = simd_shuffle!(
3416 a.as_i16x16(),
3417 b.as_i16x16(),
3418 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3419 );
3420 transmute(src:r)
3421}
3422
3423/// Unpacks and interleave 32-bit integers from the high half of each
3424/// 128-bit lane of `a` and `b`.
3425///
3426/// ```rust
3427/// #[cfg(target_arch = "x86")]
3428/// use std::arch::x86::*;
3429/// #[cfg(target_arch = "x86_64")]
3430/// use std::arch::x86_64::*;
3431///
3432/// # fn main() {
3433/// # if is_x86_feature_detected!("avx2") {
3434/// # #[target_feature(enable = "avx2")]
3435/// # unsafe fn worker() {
3436/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3437/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3438///
3439/// let c = _mm256_unpackhi_epi32(a, b);
3440///
3441/// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3442/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3443///
3444/// # }
3445/// # unsafe { worker(); }
3446/// # }
3447/// # }
3448/// ```
3449///
3450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi32)
3451#[inline]
3452#[target_feature(enable = "avx2")]
3453#[cfg_attr(test, assert_instr(vunpckhps))]
3454#[stable(feature = "simd_x86", since = "1.27.0")]
3455pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
3456 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3457 transmute(src:r)
3458}
3459
3460/// Unpacks and interleave 32-bit integers from the low half of each
3461/// 128-bit lane of `a` and `b`.
3462///
3463/// ```rust
3464/// #[cfg(target_arch = "x86")]
3465/// use std::arch::x86::*;
3466/// #[cfg(target_arch = "x86_64")]
3467/// use std::arch::x86_64::*;
3468///
3469/// # fn main() {
3470/// # if is_x86_feature_detected!("avx2") {
3471/// # #[target_feature(enable = "avx2")]
3472/// # unsafe fn worker() {
3473/// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3474/// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3475///
3476/// let c = _mm256_unpacklo_epi32(a, b);
3477///
3478/// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3479/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3480///
3481/// # }
3482/// # unsafe { worker(); }
3483/// # }
3484/// # }
3485/// ```
3486///
3487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi32)
3488#[inline]
3489#[target_feature(enable = "avx2")]
3490#[cfg_attr(test, assert_instr(vunpcklps))]
3491#[stable(feature = "simd_x86", since = "1.27.0")]
3492pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
3493 let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3494 transmute(src:r)
3495}
3496
3497/// Unpacks and interleave 64-bit integers from the high half of each
3498/// 128-bit lane of `a` and `b`.
3499///
3500/// ```rust
3501/// #[cfg(target_arch = "x86")]
3502/// use std::arch::x86::*;
3503/// #[cfg(target_arch = "x86_64")]
3504/// use std::arch::x86_64::*;
3505///
3506/// # fn main() {
3507/// # if is_x86_feature_detected!("avx2") {
3508/// # #[target_feature(enable = "avx2")]
3509/// # unsafe fn worker() {
3510/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3511/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3512///
3513/// let c = _mm256_unpackhi_epi64(a, b);
3514///
3515/// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3516/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3517///
3518/// # }
3519/// # unsafe { worker(); }
3520/// # }
3521/// # }
3522/// ```
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_epi64)
3525#[inline]
3526#[target_feature(enable = "avx2")]
3527#[cfg_attr(test, assert_instr(vunpckhpd))]
3528#[stable(feature = "simd_x86", since = "1.27.0")]
3529pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
3530 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
3531 transmute(src:r)
3532}
3533
3534/// Unpacks and interleave 64-bit integers from the low half of each
3535/// 128-bit lane of `a` and `b`.
3536///
3537/// ```rust
3538/// #[cfg(target_arch = "x86")]
3539/// use std::arch::x86::*;
3540/// #[cfg(target_arch = "x86_64")]
3541/// use std::arch::x86_64::*;
3542///
3543/// # fn main() {
3544/// # if is_x86_feature_detected!("avx2") {
3545/// # #[target_feature(enable = "avx2")]
3546/// # unsafe fn worker() {
3547/// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3548/// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3549///
3550/// let c = _mm256_unpacklo_epi64(a, b);
3551///
3552/// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3553/// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3554///
3555/// # }
3556/// # unsafe { worker(); }
3557/// # }
3558/// # }
3559/// ```
3560///
3561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_epi64)
3562#[inline]
3563#[target_feature(enable = "avx2")]
3564#[cfg_attr(test, assert_instr(vunpcklpd))]
3565#[stable(feature = "simd_x86", since = "1.27.0")]
3566pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
3567 let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
3568 transmute(src:r)
3569}
3570
3571/// Computes the bitwise XOR of 256 bits (representing integer data)
3572/// in `a` and `b`
3573///
3574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_si256)
3575#[inline]
3576#[target_feature(enable = "avx2")]
3577#[cfg_attr(test, assert_instr(vxorps))]
3578#[stable(feature = "simd_x86", since = "1.27.0")]
3579pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
3580 transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4()))
3581}
3582
3583/// Extracts an 8-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3584/// integer containing the zero-extended integer data.
3585///
3586/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3587///
3588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi8)
3589#[inline]
3590#[target_feature(enable = "avx2")]
3591// This intrinsic has no corresponding instruction.
3592#[rustc_legacy_const_generics(1)]
3593#[stable(feature = "simd_x86", since = "1.27.0")]
3594pub unsafe fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
3595 static_assert_uimm_bits!(INDEX, 5);
3596 simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32
3597}
3598
3599/// Extracts a 16-bit integer from `a`, selected with `INDEX`. Returns a 32-bit
3600/// integer containing the zero-extended integer data.
3601///
3602/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
3603///
3604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi16)
3605#[inline]
3606#[target_feature(enable = "avx2")]
3607// This intrinsic has no corresponding instruction.
3608#[rustc_legacy_const_generics(1)]
3609#[stable(feature = "simd_x86", since = "1.27.0")]
3610pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
3611 static_assert_uimm_bits!(INDEX, 4);
3612 simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32
3613}
3614
3615/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
3616///
3617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
3618#[inline]
3619#[target_feature(enable = "avx2")]
3620// This intrinsic has no corresponding instruction.
3621#[rustc_legacy_const_generics(1)]
3622#[stable(feature = "simd_x86", since = "1.27.0")]
3623pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
3624 static_assert_uimm_bits!(INDEX, 3);
3625 simd_extract!(a.as_i32x8(), INDEX as u32)
3626}
3627
3628/// Returns the first element of the input vector of `[4 x double]`.
3629///
3630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64)
3631#[inline]
3632#[target_feature(enable = "avx2")]
3633//#[cfg_attr(test, assert_instr(movsd))] FIXME
3634#[stable(feature = "simd_x86", since = "1.27.0")]
3635pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
3636 simd_extract!(a, 0)
3637}
3638
3639/// Returns the first element of the input vector of `[8 x i32]`.
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
3642#[inline]
3643#[target_feature(enable = "avx2")]
3644#[stable(feature = "simd_x86", since = "1.27.0")]
3645pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
3646 simd_extract!(a.as_i32x8(), 0)
3647}
3648
3649#[allow(improper_ctypes)]
3650extern "C" {
3651 #[link_name = "llvm.x86.avx2.phadd.w"]
3652 fn phaddw(a: i16x16, b: i16x16) -> i16x16;
3653 #[link_name = "llvm.x86.avx2.phadd.d"]
3654 fn phaddd(a: i32x8, b: i32x8) -> i32x8;
3655 #[link_name = "llvm.x86.avx2.phadd.sw"]
3656 fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
3657 #[link_name = "llvm.x86.avx2.phsub.w"]
3658 fn phsubw(a: i16x16, b: i16x16) -> i16x16;
3659 #[link_name = "llvm.x86.avx2.phsub.d"]
3660 fn phsubd(a: i32x8, b: i32x8) -> i32x8;
3661 #[link_name = "llvm.x86.avx2.phsub.sw"]
3662 fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
3663 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3664 fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
3665 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3666 fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3667 #[link_name = "llvm.x86.avx2.maskload.d"]
3668 fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
3669 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3670 fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
3671 #[link_name = "llvm.x86.avx2.maskload.q"]
3672 fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
3673 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3674 fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
3675 #[link_name = "llvm.x86.avx2.maskstore.d"]
3676 fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
3677 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3678 fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
3679 #[link_name = "llvm.x86.avx2.maskstore.q"]
3680 fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
3681 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3682 fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
3683 #[link_name = "llvm.x86.avx2.mpsadbw"]
3684 fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3685 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3686 fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
3687 #[link_name = "llvm.x86.avx2.packsswb"]
3688 fn packsswb(a: i16x16, b: i16x16) -> i8x32;
3689 #[link_name = "llvm.x86.avx2.packssdw"]
3690 fn packssdw(a: i32x8, b: i32x8) -> i16x16;
3691 #[link_name = "llvm.x86.avx2.packuswb"]
3692 fn packuswb(a: i16x16, b: i16x16) -> u8x32;
3693 #[link_name = "llvm.x86.avx2.packusdw"]
3694 fn packusdw(a: i32x8, b: i32x8) -> u16x16;
3695 #[link_name = "llvm.x86.avx2.psad.bw"]
3696 fn psadbw(a: u8x32, b: u8x32) -> u64x4;
3697 #[link_name = "llvm.x86.avx2.psign.b"]
3698 fn psignb(a: i8x32, b: i8x32) -> i8x32;
3699 #[link_name = "llvm.x86.avx2.psign.w"]
3700 fn psignw(a: i16x16, b: i16x16) -> i16x16;
3701 #[link_name = "llvm.x86.avx2.psign.d"]
3702 fn psignd(a: i32x8, b: i32x8) -> i32x8;
3703 #[link_name = "llvm.x86.avx2.psll.w"]
3704 fn psllw(a: i16x16, count: i16x8) -> i16x16;
3705 #[link_name = "llvm.x86.avx2.psll.d"]
3706 fn pslld(a: i32x8, count: i32x4) -> i32x8;
3707 #[link_name = "llvm.x86.avx2.psll.q"]
3708 fn psllq(a: i64x4, count: i64x2) -> i64x4;
3709 #[link_name = "llvm.x86.avx2.psllv.d"]
3710 fn psllvd(a: i32x4, count: i32x4) -> i32x4;
3711 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3712 fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
3713 #[link_name = "llvm.x86.avx2.psllv.q"]
3714 fn psllvq(a: i64x2, count: i64x2) -> i64x2;
3715 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3716 fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
3717 #[link_name = "llvm.x86.avx2.psra.w"]
3718 fn psraw(a: i16x16, count: i16x8) -> i16x16;
3719 #[link_name = "llvm.x86.avx2.psra.d"]
3720 fn psrad(a: i32x8, count: i32x4) -> i32x8;
3721 #[link_name = "llvm.x86.avx2.psrav.d"]
3722 fn psravd(a: i32x4, count: i32x4) -> i32x4;
3723 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3724 fn psravd256(a: i32x8, count: i32x8) -> i32x8;
3725 #[link_name = "llvm.x86.avx2.psrl.w"]
3726 fn psrlw(a: i16x16, count: i16x8) -> i16x16;
3727 #[link_name = "llvm.x86.avx2.psrl.d"]
3728 fn psrld(a: i32x8, count: i32x4) -> i32x8;
3729 #[link_name = "llvm.x86.avx2.psrl.q"]
3730 fn psrlq(a: i64x4, count: i64x2) -> i64x4;
3731 #[link_name = "llvm.x86.avx2.psrlv.d"]
3732 fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
3733 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3734 fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
3735 #[link_name = "llvm.x86.avx2.psrlv.q"]
3736 fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
3737 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3738 fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
3739 #[link_name = "llvm.x86.avx2.pshuf.b"]
3740 fn pshufb(a: u8x32, b: u8x32) -> u8x32;
3741 #[link_name = "llvm.x86.avx2.permd"]
3742 fn permd(a: u32x8, b: u32x8) -> u32x8;
3743 #[link_name = "llvm.x86.avx2.permps"]
3744 fn permps(a: __m256, b: i32x8) -> __m256;
3745 #[link_name = "llvm.x86.avx2.vperm2i128"]
3746 fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4;
3747 #[link_name = "llvm.x86.avx2.gather.d.d"]
3748 fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4;
3749 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3750 fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8;
3751 #[link_name = "llvm.x86.avx2.gather.d.q"]
3752 fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2;
3753 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3754 fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4;
3755 #[link_name = "llvm.x86.avx2.gather.q.d"]
3756 fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4;
3757 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3758 fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4;
3759 #[link_name = "llvm.x86.avx2.gather.q.q"]
3760 fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2;
3761 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3762 fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4;
3763 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3764 fn pgatherdpd(
3765 src: __m128d,
3766 slice: *const i8,
3767 offsets: i32x4,
3768 mask: __m128d,
3769 scale: i8,
3770 ) -> __m128d;
3771 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3772 fn vpgatherdpd(
3773 src: __m256d,
3774 slice: *const i8,
3775 offsets: i32x4,
3776 mask: __m256d,
3777 scale: i8,
3778 ) -> __m256d;
3779 #[link_name = "llvm.x86.avx2.gather.q.pd"]
3780 fn pgatherqpd(
3781 src: __m128d,
3782 slice: *const i8,
3783 offsets: i64x2,
3784 mask: __m128d,
3785 scale: i8,
3786 ) -> __m128d;
3787 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
3788 fn vpgatherqpd(
3789 src: __m256d,
3790 slice: *const i8,
3791 offsets: i64x4,
3792 mask: __m256d,
3793 scale: i8,
3794 ) -> __m256d;
3795 #[link_name = "llvm.x86.avx2.gather.d.ps"]
3796 fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8)
3797 -> __m128;
3798 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
3799 fn vpgatherdps(
3800 src: __m256,
3801 slice: *const i8,
3802 offsets: i32x8,
3803 mask: __m256,
3804 scale: i8,
3805 ) -> __m256;
3806 #[link_name = "llvm.x86.avx2.gather.q.ps"]
3807 fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8)
3808 -> __m128;
3809 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
3810 fn vpgatherqps(
3811 src: __m128,
3812 slice: *const i8,
3813 offsets: i64x4,
3814 mask: __m128,
3815 scale: i8,
3816 ) -> __m128;
3817 #[link_name = "llvm.x86.avx2.psll.dq"]
3818 fn vpslldq(a: i64x4, b: i32) -> i64x4;
3819 #[link_name = "llvm.x86.avx2.psrl.dq"]
3820 fn vpsrldq(a: i64x4, b: i32) -> i64x4;
3821}
3822
3823#[cfg(test)]
3824mod tests {
3825
3826 use stdarch_test::simd_test;
3827
3828 use crate::core_arch::x86::*;
3829
3830 #[simd_test(enable = "avx2")]
3831 unsafe fn test_mm256_abs_epi32() {
3832 #[rustfmt::skip]
3833 let a = _mm256_setr_epi32(
3834 0, 1, -1, i32::MAX,
3835 i32::MIN, 100, -100, -32,
3836 );
3837 let r = _mm256_abs_epi32(a);
3838 #[rustfmt::skip]
3839 let e = _mm256_setr_epi32(
3840 0, 1, 1, i32::MAX,
3841 i32::MAX.wrapping_add(1), 100, 100, 32,
3842 );
3843 assert_eq_m256i(r, e);
3844 }
3845
3846 #[simd_test(enable = "avx2")]
3847 unsafe fn test_mm256_abs_epi16() {
3848 #[rustfmt::skip]
3849 let a = _mm256_setr_epi16(
3850 0, 1, -1, 2, -2, 3, -3, 4,
3851 -4, 5, -5, i16::MAX, i16::MIN, 100, -100, -32,
3852 );
3853 let r = _mm256_abs_epi16(a);
3854 #[rustfmt::skip]
3855 let e = _mm256_setr_epi16(
3856 0, 1, 1, 2, 2, 3, 3, 4,
3857 4, 5, 5, i16::MAX, i16::MAX.wrapping_add(1), 100, 100, 32,
3858 );
3859 assert_eq_m256i(r, e);
3860 }
3861
3862 #[simd_test(enable = "avx2")]
3863 unsafe fn test_mm256_abs_epi8() {
3864 #[rustfmt::skip]
3865 let a = _mm256_setr_epi8(
3866 0, 1, -1, 2, -2, 3, -3, 4,
3867 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3868 0, 1, -1, 2, -2, 3, -3, 4,
3869 -4, 5, -5, i8::MAX, i8::MIN, 100, -100, -32,
3870 );
3871 let r = _mm256_abs_epi8(a);
3872 #[rustfmt::skip]
3873 let e = _mm256_setr_epi8(
3874 0, 1, 1, 2, 2, 3, 3, 4,
3875 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3876 0, 1, 1, 2, 2, 3, 3, 4,
3877 4, 5, 5, i8::MAX, i8::MAX.wrapping_add(1), 100, 100, 32,
3878 );
3879 assert_eq_m256i(r, e);
3880 }
3881
3882 #[simd_test(enable = "avx2")]
3883 unsafe fn test_mm256_add_epi64() {
3884 let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
3885 let b = _mm256_setr_epi64x(-1, 0, 1, 2);
3886 let r = _mm256_add_epi64(a, b);
3887 let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
3888 assert_eq_m256i(r, e);
3889 }
3890
3891 #[simd_test(enable = "avx2")]
3892 unsafe fn test_mm256_add_epi32() {
3893 let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
3894 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3895 let r = _mm256_add_epi32(a, b);
3896 let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
3897 assert_eq_m256i(r, e);
3898 }
3899
3900 #[simd_test(enable = "avx2")]
3901 unsafe fn test_mm256_add_epi16() {
3902 #[rustfmt::skip]
3903 let a = _mm256_setr_epi16(
3904 0, 1, 2, 3, 4, 5, 6, 7,
3905 8, 9, 10, 11, 12, 13, 14, 15,
3906 );
3907 #[rustfmt::skip]
3908 let b = _mm256_setr_epi16(
3909 0, 1, 2, 3, 4, 5, 6, 7,
3910 8, 9, 10, 11, 12, 13, 14, 15,
3911 );
3912 let r = _mm256_add_epi16(a, b);
3913 #[rustfmt::skip]
3914 let e = _mm256_setr_epi16(
3915 0, 2, 4, 6, 8, 10, 12, 14,
3916 16, 18, 20, 22, 24, 26, 28, 30,
3917 );
3918 assert_eq_m256i(r, e);
3919 }
3920
3921 #[simd_test(enable = "avx2")]
3922 unsafe fn test_mm256_add_epi8() {
3923 #[rustfmt::skip]
3924 let a = _mm256_setr_epi8(
3925 0, 1, 2, 3, 4, 5, 6, 7,
3926 8, 9, 10, 11, 12, 13, 14, 15,
3927 16, 17, 18, 19, 20, 21, 22, 23,
3928 24, 25, 26, 27, 28, 29, 30, 31,
3929 );
3930 #[rustfmt::skip]
3931 let b = _mm256_setr_epi8(
3932 0, 1, 2, 3, 4, 5, 6, 7,
3933 8, 9, 10, 11, 12, 13, 14, 15,
3934 16, 17, 18, 19, 20, 21, 22, 23,
3935 24, 25, 26, 27, 28, 29, 30, 31,
3936 );
3937 let r = _mm256_add_epi8(a, b);
3938 #[rustfmt::skip]
3939 let e = _mm256_setr_epi8(
3940 0, 2, 4, 6, 8, 10, 12, 14,
3941 16, 18, 20, 22, 24, 26, 28, 30,
3942 32, 34, 36, 38, 40, 42, 44, 46,
3943 48, 50, 52, 54, 56, 58, 60, 62,
3944 );
3945 assert_eq_m256i(r, e);
3946 }
3947
3948 #[simd_test(enable = "avx2")]
3949 unsafe fn test_mm256_adds_epi8() {
3950 #[rustfmt::skip]
3951 let a = _mm256_setr_epi8(
3952 0, 1, 2, 3, 4, 5, 6, 7,
3953 8, 9, 10, 11, 12, 13, 14, 15,
3954 16, 17, 18, 19, 20, 21, 22, 23,
3955 24, 25, 26, 27, 28, 29, 30, 31,
3956 );
3957 #[rustfmt::skip]
3958 let b = _mm256_setr_epi8(
3959 32, 33, 34, 35, 36, 37, 38, 39,
3960 40, 41, 42, 43, 44, 45, 46, 47,
3961 48, 49, 50, 51, 52, 53, 54, 55,
3962 56, 57, 58, 59, 60, 61, 62, 63,
3963 );
3964 let r = _mm256_adds_epi8(a, b);
3965 #[rustfmt::skip]
3966 let e = _mm256_setr_epi8(
3967 32, 34, 36, 38, 40, 42, 44, 46,
3968 48, 50, 52, 54, 56, 58, 60, 62,
3969 64, 66, 68, 70, 72, 74, 76, 78,
3970 80, 82, 84, 86, 88, 90, 92, 94,
3971 );
3972 assert_eq_m256i(r, e);
3973 }
3974
3975 #[simd_test(enable = "avx2")]
3976 unsafe fn test_mm256_adds_epi8_saturate_positive() {
3977 let a = _mm256_set1_epi8(0x7F);
3978 let b = _mm256_set1_epi8(1);
3979 let r = _mm256_adds_epi8(a, b);
3980 assert_eq_m256i(r, a);
3981 }
3982
3983 #[simd_test(enable = "avx2")]
3984 unsafe fn test_mm256_adds_epi8_saturate_negative() {
3985 let a = _mm256_set1_epi8(-0x80);
3986 let b = _mm256_set1_epi8(-1);
3987 let r = _mm256_adds_epi8(a, b);
3988 assert_eq_m256i(r, a);
3989 }
3990
3991 #[simd_test(enable = "avx2")]
3992 unsafe fn test_mm256_adds_epi16() {
3993 #[rustfmt::skip]
3994 let a = _mm256_setr_epi16(
3995 0, 1, 2, 3, 4, 5, 6, 7,
3996 8, 9, 10, 11, 12, 13, 14, 15,
3997 );
3998 #[rustfmt::skip]
3999 let b = _mm256_setr_epi16(
4000 32, 33, 34, 35, 36, 37, 38, 39,
4001 40, 41, 42, 43, 44, 45, 46, 47,
4002 );
4003 let r = _mm256_adds_epi16(a, b);
4004 #[rustfmt::skip]
4005 let e = _mm256_setr_epi16(
4006 32, 34, 36, 38, 40, 42, 44, 46,
4007 48, 50, 52, 54, 56, 58, 60, 62,
4008 );
4009
4010 assert_eq_m256i(r, e);
4011 }
4012
4013 #[simd_test(enable = "avx2")]
4014 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4015 let a = _mm256_set1_epi16(0x7FFF);
4016 let b = _mm256_set1_epi16(1);
4017 let r = _mm256_adds_epi16(a, b);
4018 assert_eq_m256i(r, a);
4019 }
4020
4021 #[simd_test(enable = "avx2")]
4022 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4023 let a = _mm256_set1_epi16(-0x8000);
4024 let b = _mm256_set1_epi16(-1);
4025 let r = _mm256_adds_epi16(a, b);
4026 assert_eq_m256i(r, a);
4027 }
4028
4029 #[simd_test(enable = "avx2")]
4030 unsafe fn test_mm256_adds_epu8() {
4031 #[rustfmt::skip]
4032 let a = _mm256_setr_epi8(
4033 0, 1, 2, 3, 4, 5, 6, 7,
4034 8, 9, 10, 11, 12, 13, 14, 15,
4035 16, 17, 18, 19, 20, 21, 22, 23,
4036 24, 25, 26, 27, 28, 29, 30, 31,
4037 );
4038 #[rustfmt::skip]
4039 let b = _mm256_setr_epi8(
4040 32, 33, 34, 35, 36, 37, 38, 39,
4041 40, 41, 42, 43, 44, 45, 46, 47,
4042 48, 49, 50, 51, 52, 53, 54, 55,
4043 56, 57, 58, 59, 60, 61, 62, 63,
4044 );
4045 let r = _mm256_adds_epu8(a, b);
4046 #[rustfmt::skip]
4047 let e = _mm256_setr_epi8(
4048 32, 34, 36, 38, 40, 42, 44, 46,
4049 48, 50, 52, 54, 56, 58, 60, 62,
4050 64, 66, 68, 70, 72, 74, 76, 78,
4051 80, 82, 84, 86, 88, 90, 92, 94,
4052 );
4053 assert_eq_m256i(r, e);
4054 }
4055
4056 #[simd_test(enable = "avx2")]
4057 unsafe fn test_mm256_adds_epu8_saturate() {
4058 let a = _mm256_set1_epi8(!0);
4059 let b = _mm256_set1_epi8(1);
4060 let r = _mm256_adds_epu8(a, b);
4061 assert_eq_m256i(r, a);
4062 }
4063
4064 #[simd_test(enable = "avx2")]
4065 unsafe fn test_mm256_adds_epu16() {
4066 #[rustfmt::skip]
4067 let a = _mm256_setr_epi16(
4068 0, 1, 2, 3, 4, 5, 6, 7,
4069 8, 9, 10, 11, 12, 13, 14, 15,
4070 );
4071 #[rustfmt::skip]
4072 let b = _mm256_setr_epi16(
4073 32, 33, 34, 35, 36, 37, 38, 39,
4074 40, 41, 42, 43, 44, 45, 46, 47,
4075 );
4076 let r = _mm256_adds_epu16(a, b);
4077 #[rustfmt::skip]
4078 let e = _mm256_setr_epi16(
4079 32, 34, 36, 38, 40, 42, 44, 46,
4080 48, 50, 52, 54, 56, 58, 60, 62,
4081 );
4082
4083 assert_eq_m256i(r, e);
4084 }
4085
4086 #[simd_test(enable = "avx2")]
4087 unsafe fn test_mm256_adds_epu16_saturate() {
4088 let a = _mm256_set1_epi16(!0);
4089 let b = _mm256_set1_epi16(1);
4090 let r = _mm256_adds_epu16(a, b);
4091 assert_eq_m256i(r, a);
4092 }
4093
4094 #[simd_test(enable = "avx2")]
4095 unsafe fn test_mm256_and_si256() {
4096 let a = _mm256_set1_epi8(5);
4097 let b = _mm256_set1_epi8(3);
4098 let got = _mm256_and_si256(a, b);
4099 assert_eq_m256i(got, _mm256_set1_epi8(1));
4100 }
4101
4102 #[simd_test(enable = "avx2")]
4103 unsafe fn test_mm256_andnot_si256() {
4104 let a = _mm256_set1_epi8(5);
4105 let b = _mm256_set1_epi8(3);
4106 let got = _mm256_andnot_si256(a, b);
4107 assert_eq_m256i(got, _mm256_set1_epi8(2));
4108 }
4109
4110 #[simd_test(enable = "avx2")]
4111 unsafe fn test_mm256_avg_epu8() {
4112 let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4113 let r = _mm256_avg_epu8(a, b);
4114 assert_eq_m256i(r, _mm256_set1_epi8(6));
4115 }
4116
4117 #[simd_test(enable = "avx2")]
4118 unsafe fn test_mm256_avg_epu16() {
4119 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4120 let r = _mm256_avg_epu16(a, b);
4121 assert_eq_m256i(r, _mm256_set1_epi16(6));
4122 }
4123
4124 #[simd_test(enable = "avx2")]
4125 unsafe fn test_mm_blend_epi32() {
4126 let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4127 let e = _mm_setr_epi32(9, 3, 3, 3);
4128 let r = _mm_blend_epi32::<0x01>(a, b);
4129 assert_eq_m128i(r, e);
4130
4131 let r = _mm_blend_epi32::<0x0E>(b, a);
4132 assert_eq_m128i(r, e);
4133 }
4134
4135 #[simd_test(enable = "avx2")]
4136 unsafe fn test_mm256_blend_epi32() {
4137 let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4138 let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4139 let r = _mm256_blend_epi32::<0x01>(a, b);
4140 assert_eq_m256i(r, e);
4141
4142 let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4143 let r = _mm256_blend_epi32::<0x82>(a, b);
4144 assert_eq_m256i(r, e);
4145
4146 let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4147 let r = _mm256_blend_epi32::<0x7C>(a, b);
4148 assert_eq_m256i(r, e);
4149 }
4150
4151 #[simd_test(enable = "avx2")]
4152 unsafe fn test_mm256_blend_epi16() {
4153 let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4154 let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4155 let r = _mm256_blend_epi16::<0x01>(a, b);
4156 assert_eq_m256i(r, e);
4157
4158 let r = _mm256_blend_epi16::<0xFE>(b, a);
4159 assert_eq_m256i(r, e);
4160 }
4161
4162 #[simd_test(enable = "avx2")]
4163 unsafe fn test_mm256_blendv_epi8() {
4164 let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4165 let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
4166 let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
4167 let r = _mm256_blendv_epi8(a, b, mask);
4168 assert_eq_m256i(r, e);
4169 }
4170
4171 #[simd_test(enable = "avx2")]
4172 unsafe fn test_mm_broadcastb_epi8() {
4173 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4174 let res = _mm_broadcastb_epi8(a);
4175 assert_eq_m128i(res, _mm_set1_epi8(0x2a));
4176 }
4177
4178 #[simd_test(enable = "avx2")]
4179 unsafe fn test_mm256_broadcastb_epi8() {
4180 let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
4181 let res = _mm256_broadcastb_epi8(a);
4182 assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
4183 }
4184
4185 #[simd_test(enable = "avx2")]
4186 unsafe fn test_mm_broadcastd_epi32() {
4187 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4188 let res = _mm_broadcastd_epi32(a);
4189 assert_eq_m128i(res, _mm_set1_epi32(0x2a));
4190 }
4191
4192 #[simd_test(enable = "avx2")]
4193 unsafe fn test_mm256_broadcastd_epi32() {
4194 let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4195 let res = _mm256_broadcastd_epi32(a);
4196 assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
4197 }
4198
4199 #[simd_test(enable = "avx2")]
4200 unsafe fn test_mm_broadcastq_epi64() {
4201 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4202 let res = _mm_broadcastq_epi64(a);
4203 assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
4204 }
4205
4206 #[simd_test(enable = "avx2")]
4207 unsafe fn test_mm256_broadcastq_epi64() {
4208 let a = _mm_setr_epi64x(0x1ffffffff, 0);
4209 let res = _mm256_broadcastq_epi64(a);
4210 assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
4211 }
4212
4213 #[simd_test(enable = "avx2")]
4214 unsafe fn test_mm_broadcastsd_pd() {
4215 let a = _mm_setr_pd(6.88, 3.44);
4216 let res = _mm_broadcastsd_pd(a);
4217 assert_eq_m128d(res, _mm_set1_pd(6.88));
4218 }
4219
4220 #[simd_test(enable = "avx2")]
4221 unsafe fn test_mm256_broadcastsd_pd() {
4222 let a = _mm_setr_pd(6.88, 3.44);
4223 let res = _mm256_broadcastsd_pd(a);
4224 assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
4225 }
4226
4227 #[simd_test(enable = "avx2")]
4228 unsafe fn test_mm256_broadcastsi128_si256() {
4229 let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4230 let res = _mm256_broadcastsi128_si256(a);
4231 let retval = _mm256_setr_epi64x(
4232 0x0987654321012334,
4233 0x5678909876543210,
4234 0x0987654321012334,
4235 0x5678909876543210,
4236 );
4237 assert_eq_m256i(res, retval);
4238 }
4239
4240 #[simd_test(enable = "avx2")]
4241 unsafe fn test_mm_broadcastss_ps() {
4242 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4243 let res = _mm_broadcastss_ps(a);
4244 assert_eq_m128(res, _mm_set1_ps(6.88));
4245 }
4246
4247 #[simd_test(enable = "avx2")]
4248 unsafe fn test_mm256_broadcastss_ps() {
4249 let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
4250 let res = _mm256_broadcastss_ps(a);
4251 assert_eq_m256(res, _mm256_set1_ps(6.88));
4252 }
4253
4254 #[simd_test(enable = "avx2")]
4255 unsafe fn test_mm_broadcastw_epi16() {
4256 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4257 let res = _mm_broadcastw_epi16(a);
4258 assert_eq_m128i(res, _mm_set1_epi16(0x22b));
4259 }
4260
4261 #[simd_test(enable = "avx2")]
4262 unsafe fn test_mm256_broadcastw_epi16() {
4263 let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
4264 let res = _mm256_broadcastw_epi16(a);
4265 assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
4266 }
4267
4268 #[simd_test(enable = "avx2")]
4269 unsafe fn test_mm256_cmpeq_epi8() {
4270 #[rustfmt::skip]
4271 let a = _mm256_setr_epi8(
4272 0, 1, 2, 3, 4, 5, 6, 7,
4273 8, 9, 10, 11, 12, 13, 14, 15,
4274 16, 17, 18, 19, 20, 21, 22, 23,
4275 24, 25, 26, 27, 28, 29, 30, 31,
4276 );
4277 #[rustfmt::skip]
4278 let b = _mm256_setr_epi8(
4279 31, 30, 2, 28, 27, 26, 25, 24,
4280 23, 22, 21, 20, 19, 18, 17, 16,
4281 15, 14, 13, 12, 11, 10, 9, 8,
4282 7, 6, 5, 4, 3, 2, 1, 0,
4283 );
4284 let r = _mm256_cmpeq_epi8(a, b);
4285 assert_eq_m256i(r, _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), !0));
4286 }
4287
4288 #[simd_test(enable = "avx2")]
4289 unsafe fn test_mm256_cmpeq_epi16() {
4290 #[rustfmt::skip]
4291 let a = _mm256_setr_epi16(
4292 0, 1, 2, 3, 4, 5, 6, 7,
4293 8, 9, 10, 11, 12, 13, 14, 15,
4294 );
4295 #[rustfmt::skip]
4296 let b = _mm256_setr_epi16(
4297 15, 14, 2, 12, 11, 10, 9, 8,
4298 7, 6, 5, 4, 3, 2, 1, 0,
4299 );
4300 let r = _mm256_cmpeq_epi16(a, b);
4301 assert_eq_m256i(r, _mm256_insert_epi16::<2>(_mm256_set1_epi16(0), !0));
4302 }
4303
4304 #[simd_test(enable = "avx2")]
4305 unsafe fn test_mm256_cmpeq_epi32() {
4306 let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4307 let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4308 let r = _mm256_cmpeq_epi32(a, b);
4309 let e = _mm256_set1_epi32(0);
4310 let e = _mm256_insert_epi32::<2>(e, !0);
4311 assert_eq_m256i(r, e);
4312 }
4313
4314 #[simd_test(enable = "avx2")]
4315 unsafe fn test_mm256_cmpeq_epi64() {
4316 let a = _mm256_setr_epi64x(0, 1, 2, 3);
4317 let b = _mm256_setr_epi64x(3, 2, 2, 0);
4318 let r = _mm256_cmpeq_epi64(a, b);
4319 assert_eq_m256i(r, _mm256_insert_epi64::<2>(_mm256_set1_epi64x(0), !0));
4320 }
4321
4322 #[simd_test(enable = "avx2")]
4323 unsafe fn test_mm256_cmpgt_epi8() {
4324 let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
4325 let b = _mm256_set1_epi8(0);
4326 let r = _mm256_cmpgt_epi8(a, b);
4327 assert_eq_m256i(r, _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), !0));
4328 }
4329
4330 #[simd_test(enable = "avx2")]
4331 unsafe fn test_mm256_cmpgt_epi16() {
4332 let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
4333 let b = _mm256_set1_epi16(0);
4334 let r = _mm256_cmpgt_epi16(a, b);
4335 assert_eq_m256i(r, _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), !0));
4336 }
4337
4338 #[simd_test(enable = "avx2")]
4339 unsafe fn test_mm256_cmpgt_epi32() {
4340 let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
4341 let b = _mm256_set1_epi32(0);
4342 let r = _mm256_cmpgt_epi32(a, b);
4343 assert_eq_m256i(r, _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), !0));
4344 }
4345
4346 #[simd_test(enable = "avx2")]
4347 unsafe fn test_mm256_cmpgt_epi64() {
4348 let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
4349 let b = _mm256_set1_epi64x(0);
4350 let r = _mm256_cmpgt_epi64(a, b);
4351 assert_eq_m256i(r, _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), !0));
4352 }
4353
4354 #[simd_test(enable = "avx2")]
4355 unsafe fn test_mm256_cvtepi8_epi16() {
4356 #[rustfmt::skip]
4357 let a = _mm_setr_epi8(
4358 0, 0, -1, 1, -2, 2, -3, 3,
4359 -4, 4, -5, 5, -6, 6, -7, 7,
4360 );
4361 #[rustfmt::skip]
4362 let r = _mm256_setr_epi16(
4363 0, 0, -1, 1, -2, 2, -3, 3,
4364 -4, 4, -5, 5, -6, 6, -7, 7,
4365 );
4366 assert_eq_m256i(r, _mm256_cvtepi8_epi16(a));
4367 }
4368
4369 #[simd_test(enable = "avx2")]
4370 unsafe fn test_mm256_cvtepi8_epi32() {
4371 #[rustfmt::skip]
4372 let a = _mm_setr_epi8(
4373 0, 0, -1, 1, -2, 2, -3, 3,
4374 -4, 4, -5, 5, -6, 6, -7, 7,
4375 );
4376 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4377 assert_eq_m256i(r, _mm256_cvtepi8_epi32(a));
4378 }
4379
4380 #[simd_test(enable = "avx2")]
4381 unsafe fn test_mm256_cvtepi8_epi64() {
4382 #[rustfmt::skip]
4383 let a = _mm_setr_epi8(
4384 0, 0, -1, 1, -2, 2, -3, 3,
4385 -4, 4, -5, 5, -6, 6, -7, 7,
4386 );
4387 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4388 assert_eq_m256i(r, _mm256_cvtepi8_epi64(a));
4389 }
4390
4391 #[simd_test(enable = "avx2")]
4392 unsafe fn test_mm256_cvtepi16_epi32() {
4393 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4394 let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4395 assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
4396 }
4397
4398 #[simd_test(enable = "avx2")]
4399 unsafe fn test_mm256_cvtepi16_epi64() {
4400 let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4401 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4402 assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
4403 }
4404
4405 #[simd_test(enable = "avx2")]
4406 unsafe fn test_mm256_cvtepi32_epi64() {
4407 let a = _mm_setr_epi32(0, 0, -1, 1);
4408 let r = _mm256_setr_epi64x(0, 0, -1, 1);
4409 assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
4410 }
4411
4412 #[simd_test(enable = "avx2")]
4413 unsafe fn test_mm256_cvtepu16_epi32() {
4414 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4415 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4416 assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
4417 }
4418
4419 #[simd_test(enable = "avx2")]
4420 unsafe fn test_mm256_cvtepu16_epi64() {
4421 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4422 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4423 assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
4424 }
4425
4426 #[simd_test(enable = "avx2")]
4427 unsafe fn test_mm256_cvtepu32_epi64() {
4428 let a = _mm_setr_epi32(0, 1, 2, 3);
4429 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4430 assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
4431 }
4432
4433 #[simd_test(enable = "avx2")]
4434 unsafe fn test_mm256_cvtepu8_epi16() {
4435 #[rustfmt::skip]
4436 let a = _mm_setr_epi8(
4437 0, 1, 2, 3, 4, 5, 6, 7,
4438 8, 9, 10, 11, 12, 13, 14, 15,
4439 );
4440 #[rustfmt::skip]
4441 let r = _mm256_setr_epi16(
4442 0, 1, 2, 3, 4, 5, 6, 7,
4443 8, 9, 10, 11, 12, 13, 14, 15,
4444 );
4445 assert_eq_m256i(r, _mm256_cvtepu8_epi16(a));
4446 }
4447
4448 #[simd_test(enable = "avx2")]
4449 unsafe fn test_mm256_cvtepu8_epi32() {
4450 #[rustfmt::skip]
4451 let a = _mm_setr_epi8(
4452 0, 1, 2, 3, 4, 5, 6, 7,
4453 8, 9, 10, 11, 12, 13, 14, 15,
4454 );
4455 let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4456 assert_eq_m256i(r, _mm256_cvtepu8_epi32(a));
4457 }
4458
4459 #[simd_test(enable = "avx2")]
4460 unsafe fn test_mm256_cvtepu8_epi64() {
4461 #[rustfmt::skip]
4462 let a = _mm_setr_epi8(
4463 0, 1, 2, 3, 4, 5, 6, 7,
4464 8, 9, 10, 11, 12, 13, 14, 15,
4465 );
4466 let r = _mm256_setr_epi64x(0, 1, 2, 3);
4467 assert_eq_m256i(r, _mm256_cvtepu8_epi64(a));
4468 }
4469
4470 #[simd_test(enable = "avx2")]
4471 unsafe fn test_mm256_extracti128_si256() {
4472 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4473 let r = _mm256_extracti128_si256::<1>(a);
4474 let e = _mm_setr_epi64x(3, 4);
4475 assert_eq_m128i(r, e);
4476 }
4477
4478 #[simd_test(enable = "avx2")]
4479 unsafe fn test_mm256_hadd_epi16() {
4480 let a = _mm256_set1_epi16(2);
4481 let b = _mm256_set1_epi16(4);
4482 let r = _mm256_hadd_epi16(a, b);
4483 let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4484 assert_eq_m256i(r, e);
4485 }
4486
4487 #[simd_test(enable = "avx2")]
4488 unsafe fn test_mm256_hadd_epi32() {
4489 let a = _mm256_set1_epi32(2);
4490 let b = _mm256_set1_epi32(4);
4491 let r = _mm256_hadd_epi32(a, b);
4492 let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4493 assert_eq_m256i(r, e);
4494 }
4495
4496 #[simd_test(enable = "avx2")]
4497 unsafe fn test_mm256_hadds_epi16() {
4498 let a = _mm256_set1_epi16(2);
4499 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4500 let a = _mm256_insert_epi16::<1>(a, 1);
4501 let b = _mm256_set1_epi16(4);
4502 let r = _mm256_hadds_epi16(a, b);
4503 #[rustfmt::skip]
4504 let e = _mm256_setr_epi16(
4505 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4506 4, 4, 4, 4, 8, 8, 8, 8,
4507 );
4508 assert_eq_m256i(r, e);
4509 }
4510
4511 #[simd_test(enable = "avx2")]
4512 unsafe fn test_mm256_hsub_epi16() {
4513 let a = _mm256_set1_epi16(2);
4514 let b = _mm256_set1_epi16(4);
4515 let r = _mm256_hsub_epi16(a, b);
4516 let e = _mm256_set1_epi16(0);
4517 assert_eq_m256i(r, e);
4518 }
4519
4520 #[simd_test(enable = "avx2")]
4521 unsafe fn test_mm256_hsub_epi32() {
4522 let a = _mm256_set1_epi32(2);
4523 let b = _mm256_set1_epi32(4);
4524 let r = _mm256_hsub_epi32(a, b);
4525 let e = _mm256_set1_epi32(0);
4526 assert_eq_m256i(r, e);
4527 }
4528
4529 #[simd_test(enable = "avx2")]
4530 unsafe fn test_mm256_hsubs_epi16() {
4531 let a = _mm256_set1_epi16(2);
4532 let a = _mm256_insert_epi16::<0>(a, 0x7fff);
4533 let a = _mm256_insert_epi16::<1>(a, -1);
4534 let b = _mm256_set1_epi16(4);
4535 let r = _mm256_hsubs_epi16(a, b);
4536 let e = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 0x7FFF);
4537 assert_eq_m256i(r, e);
4538 }
4539
4540 #[simd_test(enable = "avx2")]
4541 unsafe fn test_mm256_madd_epi16() {
4542 let a = _mm256_set1_epi16(2);
4543 let b = _mm256_set1_epi16(4);
4544 let r = _mm256_madd_epi16(a, b);
4545 let e = _mm256_set1_epi32(16);
4546 assert_eq_m256i(r, e);
4547 }
4548
4549 #[simd_test(enable = "avx2")]
4550 unsafe fn test_mm256_inserti128_si256() {
4551 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4552 let b = _mm_setr_epi64x(7, 8);
4553 let r = _mm256_inserti128_si256::<1>(a, b);
4554 let e = _mm256_setr_epi64x(1, 2, 7, 8);
4555 assert_eq_m256i(r, e);
4556 }
4557
4558 #[simd_test(enable = "avx2")]
4559 unsafe fn test_mm256_maddubs_epi16() {
4560 let a = _mm256_set1_epi8(2);
4561 let b = _mm256_set1_epi8(4);
4562 let r = _mm256_maddubs_epi16(a, b);
4563 let e = _mm256_set1_epi16(16);
4564 assert_eq_m256i(r, e);
4565 }
4566
4567 #[simd_test(enable = "avx2")]
4568 unsafe fn test_mm_maskload_epi32() {
4569 let nums = [1, 2, 3, 4];
4570 let a = &nums as *const i32;
4571 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4572 let r = _mm_maskload_epi32(a, mask);
4573 let e = _mm_setr_epi32(1, 0, 0, 4);
4574 assert_eq_m128i(r, e);
4575 }
4576
4577 #[simd_test(enable = "avx2")]
4578 unsafe fn test_mm256_maskload_epi32() {
4579 let nums = [1, 2, 3, 4, 5, 6, 7, 8];
4580 let a = &nums as *const i32;
4581 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4582 let r = _mm256_maskload_epi32(a, mask);
4583 let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4584 assert_eq_m256i(r, e);
4585 }
4586
4587 #[simd_test(enable = "avx2")]
4588 unsafe fn test_mm_maskload_epi64() {
4589 let nums = [1_i64, 2_i64];
4590 let a = &nums as *const i64;
4591 let mask = _mm_setr_epi64x(0, -1);
4592 let r = _mm_maskload_epi64(a, mask);
4593 let e = _mm_setr_epi64x(0, 2);
4594 assert_eq_m128i(r, e);
4595 }
4596
4597 #[simd_test(enable = "avx2")]
4598 unsafe fn test_mm256_maskload_epi64() {
4599 let nums = [1_i64, 2_i64, 3_i64, 4_i64];
4600 let a = &nums as *const i64;
4601 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4602 let r = _mm256_maskload_epi64(a, mask);
4603 let e = _mm256_setr_epi64x(0, 2, 3, 0);
4604 assert_eq_m256i(r, e);
4605 }
4606
4607 #[simd_test(enable = "avx2")]
4608 unsafe fn test_mm_maskstore_epi32() {
4609 let a = _mm_setr_epi32(1, 2, 3, 4);
4610 let mut arr = [-1, -1, -1, -1];
4611 let mask = _mm_setr_epi32(-1, 0, 0, -1);
4612 _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4613 let e = [1, -1, -1, 4];
4614 assert_eq!(arr, e);
4615 }
4616
4617 #[simd_test(enable = "avx2")]
4618 unsafe fn test_mm256_maskstore_epi32() {
4619 let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4620 let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4621 let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4622 _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
4623 let e = [1, -1, -1, 42, -1, 6, 7, -1];
4624 assert_eq!(arr, e);
4625 }
4626
4627 #[simd_test(enable = "avx2")]
4628 unsafe fn test_mm_maskstore_epi64() {
4629 let a = _mm_setr_epi64x(1_i64, 2_i64);
4630 let mut arr = [-1_i64, -1_i64];
4631 let mask = _mm_setr_epi64x(0, -1);
4632 _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4633 let e = [-1, 2];
4634 assert_eq!(arr, e);
4635 }
4636
4637 #[simd_test(enable = "avx2")]
4638 unsafe fn test_mm256_maskstore_epi64() {
4639 let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4640 let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
4641 let mask = _mm256_setr_epi64x(0, -1, -1, 0);
4642 _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
4643 let e = [-1, 2, 3, -1];
4644 assert_eq!(arr, e);
4645 }
4646
4647 #[simd_test(enable = "avx2")]
4648 unsafe fn test_mm256_max_epi16() {
4649 let a = _mm256_set1_epi16(2);
4650 let b = _mm256_set1_epi16(4);
4651 let r = _mm256_max_epi16(a, b);
4652 assert_eq_m256i(r, b);
4653 }
4654
4655 #[simd_test(enable = "avx2")]
4656 unsafe fn test_mm256_max_epi32() {
4657 let a = _mm256_set1_epi32(2);
4658 let b = _mm256_set1_epi32(4);
4659 let r = _mm256_max_epi32(a, b);
4660 assert_eq_m256i(r, b);
4661 }
4662
4663 #[simd_test(enable = "avx2")]
4664 unsafe fn test_mm256_max_epi8() {
4665 let a = _mm256_set1_epi8(2);
4666 let b = _mm256_set1_epi8(4);
4667 let r = _mm256_max_epi8(a, b);
4668 assert_eq_m256i(r, b);
4669 }
4670
4671 #[simd_test(enable = "avx2")]
4672 unsafe fn test_mm256_max_epu16() {
4673 let a = _mm256_set1_epi16(2);
4674 let b = _mm256_set1_epi16(4);
4675 let r = _mm256_max_epu16(a, b);
4676 assert_eq_m256i(r, b);
4677 }
4678
4679 #[simd_test(enable = "avx2")]
4680 unsafe fn test_mm256_max_epu32() {
4681 let a = _mm256_set1_epi32(2);
4682 let b = _mm256_set1_epi32(4);
4683 let r = _mm256_max_epu32(a, b);
4684 assert_eq_m256i(r, b);
4685 }
4686
4687 #[simd_test(enable = "avx2")]
4688 unsafe fn test_mm256_max_epu8() {
4689 let a = _mm256_set1_epi8(2);
4690 let b = _mm256_set1_epi8(4);
4691 let r = _mm256_max_epu8(a, b);
4692 assert_eq_m256i(r, b);
4693 }
4694
4695 #[simd_test(enable = "avx2")]
4696 unsafe fn test_mm256_min_epi16() {
4697 let a = _mm256_set1_epi16(2);
4698 let b = _mm256_set1_epi16(4);
4699 let r = _mm256_min_epi16(a, b);
4700 assert_eq_m256i(r, a);
4701 }
4702
4703 #[simd_test(enable = "avx2")]
4704 unsafe fn test_mm256_min_epi32() {
4705 let a = _mm256_set1_epi32(2);
4706 let b = _mm256_set1_epi32(4);
4707 let r = _mm256_min_epi32(a, b);
4708 assert_eq_m256i(r, a);
4709 }
4710
4711 #[simd_test(enable = "avx2")]
4712 unsafe fn test_mm256_min_epi8() {
4713 let a = _mm256_set1_epi8(2);
4714 let b = _mm256_set1_epi8(4);
4715 let r = _mm256_min_epi8(a, b);
4716 assert_eq_m256i(r, a);
4717 }
4718
4719 #[simd_test(enable = "avx2")]
4720 unsafe fn test_mm256_min_epu16() {
4721 let a = _mm256_set1_epi16(2);
4722 let b = _mm256_set1_epi16(4);
4723 let r = _mm256_min_epu16(a, b);
4724 assert_eq_m256i(r, a);
4725 }
4726
4727 #[simd_test(enable = "avx2")]
4728 unsafe fn test_mm256_min_epu32() {
4729 let a = _mm256_set1_epi32(2);
4730 let b = _mm256_set1_epi32(4);
4731 let r = _mm256_min_epu32(a, b);
4732 assert_eq_m256i(r, a);
4733 }
4734
4735 #[simd_test(enable = "avx2")]
4736 unsafe fn test_mm256_min_epu8() {
4737 let a = _mm256_set1_epi8(2);
4738 let b = _mm256_set1_epi8(4);
4739 let r = _mm256_min_epu8(a, b);
4740 assert_eq_m256i(r, a);
4741 }
4742
4743 #[simd_test(enable = "avx2")]
4744 unsafe fn test_mm256_movemask_epi8() {
4745 let a = _mm256_set1_epi8(-1);
4746 let r = _mm256_movemask_epi8(a);
4747 let e = -1;
4748 assert_eq!(r, e);
4749 }
4750
4751 #[simd_test(enable = "avx2")]
4752 unsafe fn test_mm256_mpsadbw_epu8() {
4753 let a = _mm256_set1_epi8(2);
4754 let b = _mm256_set1_epi8(4);
4755 let r = _mm256_mpsadbw_epu8::<0>(a, b);
4756 let e = _mm256_set1_epi16(8);
4757 assert_eq_m256i(r, e);
4758 }
4759
4760 #[simd_test(enable = "avx2")]
4761 unsafe fn test_mm256_mul_epi32() {
4762 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4763 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4764 let r = _mm256_mul_epi32(a, b);
4765 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4766 assert_eq_m256i(r, e);
4767 }
4768
4769 #[simd_test(enable = "avx2")]
4770 unsafe fn test_mm256_mul_epu32() {
4771 let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4772 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4773 let r = _mm256_mul_epu32(a, b);
4774 let e = _mm256_setr_epi64x(0, 0, 10, 14);
4775 assert_eq_m256i(r, e);
4776 }
4777
4778 #[simd_test(enable = "avx2")]
4779 unsafe fn test_mm256_mulhi_epi16() {
4780 let a = _mm256_set1_epi16(6535);
4781 let b = _mm256_set1_epi16(6535);
4782 let r = _mm256_mulhi_epi16(a, b);
4783 let e = _mm256_set1_epi16(651);
4784 assert_eq_m256i(r, e);
4785 }
4786
4787 #[simd_test(enable = "avx2")]
4788 unsafe fn test_mm256_mulhi_epu16() {
4789 let a = _mm256_set1_epi16(6535);
4790 let b = _mm256_set1_epi16(6535);
4791 let r = _mm256_mulhi_epu16(a, b);
4792 let e = _mm256_set1_epi16(651);
4793 assert_eq_m256i(r, e);
4794 }
4795
4796 #[simd_test(enable = "avx2")]
4797 unsafe fn test_mm256_mullo_epi16() {
4798 let a = _mm256_set1_epi16(2);
4799 let b = _mm256_set1_epi16(4);
4800 let r = _mm256_mullo_epi16(a, b);
4801 let e = _mm256_set1_epi16(8);
4802 assert_eq_m256i(r, e);
4803 }
4804
4805 #[simd_test(enable = "avx2")]
4806 unsafe fn test_mm256_mullo_epi32() {
4807 let a = _mm256_set1_epi32(2);
4808 let b = _mm256_set1_epi32(4);
4809 let r = _mm256_mullo_epi32(a, b);
4810 let e = _mm256_set1_epi32(8);
4811 assert_eq_m256i(r, e);
4812 }
4813
4814 #[simd_test(enable = "avx2")]
4815 unsafe fn test_mm256_mulhrs_epi16() {
4816 let a = _mm256_set1_epi16(2);
4817 let b = _mm256_set1_epi16(4);
4818 let r = _mm256_mullo_epi16(a, b);
4819 let e = _mm256_set1_epi16(8);
4820 assert_eq_m256i(r, e);
4821 }
4822
4823 #[simd_test(enable = "avx2")]
4824 unsafe fn test_mm256_or_si256() {
4825 let a = _mm256_set1_epi8(-1);
4826 let b = _mm256_set1_epi8(0);
4827 let r = _mm256_or_si256(a, b);
4828 assert_eq_m256i(r, a);
4829 }
4830
4831 #[simd_test(enable = "avx2")]
4832 unsafe fn test_mm256_packs_epi16() {
4833 let a = _mm256_set1_epi16(2);
4834 let b = _mm256_set1_epi16(4);
4835 let r = _mm256_packs_epi16(a, b);
4836 #[rustfmt::skip]
4837 let e = _mm256_setr_epi8(
4838 2, 2, 2, 2, 2, 2, 2, 2,
4839 4, 4, 4, 4, 4, 4, 4, 4,
4840 2, 2, 2, 2, 2, 2, 2, 2,
4841 4, 4, 4, 4, 4, 4, 4, 4,
4842 );
4843
4844 assert_eq_m256i(r, e);
4845 }
4846
4847 #[simd_test(enable = "avx2")]
4848 unsafe fn test_mm256_packs_epi32() {
4849 let a = _mm256_set1_epi32(2);
4850 let b = _mm256_set1_epi32(4);
4851 let r = _mm256_packs_epi32(a, b);
4852 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4853
4854 assert_eq_m256i(r, e);
4855 }
4856
4857 #[simd_test(enable = "avx2")]
4858 unsafe fn test_mm256_packus_epi16() {
4859 let a = _mm256_set1_epi16(2);
4860 let b = _mm256_set1_epi16(4);
4861 let r = _mm256_packus_epi16(a, b);
4862 #[rustfmt::skip]
4863 let e = _mm256_setr_epi8(
4864 2, 2, 2, 2, 2, 2, 2, 2,
4865 4, 4, 4, 4, 4, 4, 4, 4,
4866 2, 2, 2, 2, 2, 2, 2, 2,
4867 4, 4, 4, 4, 4, 4, 4, 4,
4868 );
4869
4870 assert_eq_m256i(r, e);
4871 }
4872
4873 #[simd_test(enable = "avx2")]
4874 unsafe fn test_mm256_packus_epi32() {
4875 let a = _mm256_set1_epi32(2);
4876 let b = _mm256_set1_epi32(4);
4877 let r = _mm256_packus_epi32(a, b);
4878 let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
4879
4880 assert_eq_m256i(r, e);
4881 }
4882
4883 #[simd_test(enable = "avx2")]
4884 unsafe fn test_mm256_sad_epu8() {
4885 let a = _mm256_set1_epi8(2);
4886 let b = _mm256_set1_epi8(4);
4887 let r = _mm256_sad_epu8(a, b);
4888 let e = _mm256_set1_epi64x(16);
4889 assert_eq_m256i(r, e);
4890 }
4891
4892 #[simd_test(enable = "avx2")]
4893 unsafe fn test_mm256_shufflehi_epi16() {
4894 #[rustfmt::skip]
4895 let a = _mm256_setr_epi16(
4896 0, 1, 2, 3, 11, 22, 33, 44,
4897 4, 5, 6, 7, 55, 66, 77, 88,
4898 );
4899 #[rustfmt::skip]
4900 let e = _mm256_setr_epi16(
4901 0, 1, 2, 3, 44, 22, 22, 11,
4902 4, 5, 6, 7, 88, 66, 66, 55,
4903 );
4904 let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
4905 assert_eq_m256i(r, e);
4906 }
4907
4908 #[simd_test(enable = "avx2")]
4909 unsafe fn test_mm256_shufflelo_epi16() {
4910 #[rustfmt::skip]
4911 let a = _mm256_setr_epi16(
4912 11, 22, 33, 44, 0, 1, 2, 3,
4913 55, 66, 77, 88, 4, 5, 6, 7,
4914 );
4915 #[rustfmt::skip]
4916 let e = _mm256_setr_epi16(
4917 44, 22, 22, 11, 0, 1, 2, 3,
4918 88, 66, 66, 55, 4, 5, 6, 7,
4919 );
4920 let r = _mm256_shufflelo_epi16::<0b00_01_01_11>(a);
4921 assert_eq_m256i(r, e);
4922 }
4923
4924 #[simd_test(enable = "avx2")]
4925 unsafe fn test_mm256_sign_epi16() {
4926 let a = _mm256_set1_epi16(2);
4927 let b = _mm256_set1_epi16(-1);
4928 let r = _mm256_sign_epi16(a, b);
4929 let e = _mm256_set1_epi16(-2);
4930 assert_eq_m256i(r, e);
4931 }
4932
4933 #[simd_test(enable = "avx2")]
4934 unsafe fn test_mm256_sign_epi32() {
4935 let a = _mm256_set1_epi32(2);
4936 let b = _mm256_set1_epi32(-1);
4937 let r = _mm256_sign_epi32(a, b);
4938 let e = _mm256_set1_epi32(-2);
4939 assert_eq_m256i(r, e);
4940 }
4941
4942 #[simd_test(enable = "avx2")]
4943 unsafe fn test_mm256_sign_epi8() {
4944 let a = _mm256_set1_epi8(2);
4945 let b = _mm256_set1_epi8(-1);
4946 let r = _mm256_sign_epi8(a, b);
4947 let e = _mm256_set1_epi8(-2);
4948 assert_eq_m256i(r, e);
4949 }
4950
4951 #[simd_test(enable = "avx2")]
4952 unsafe fn test_mm256_sll_epi16() {
4953 let a = _mm256_set1_epi16(0xFF);
4954 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
4955 let r = _mm256_sll_epi16(a, b);
4956 assert_eq_m256i(r, _mm256_set1_epi16(0xFF0));
4957 }
4958
4959 #[simd_test(enable = "avx2")]
4960 unsafe fn test_mm256_sll_epi32() {
4961 let a = _mm256_set1_epi32(0xFFFF);
4962 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
4963 let r = _mm256_sll_epi32(a, b);
4964 assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0));
4965 }
4966
4967 #[simd_test(enable = "avx2")]
4968 unsafe fn test_mm256_sll_epi64() {
4969 let a = _mm256_set1_epi64x(0xFFFFFFFF);
4970 let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
4971 let r = _mm256_sll_epi64(a, b);
4972 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0));
4973 }
4974
4975 #[simd_test(enable = "avx2")]
4976 unsafe fn test_mm256_slli_epi16() {
4977 assert_eq_m256i(
4978 _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
4979 _mm256_set1_epi16(0xFF0),
4980 );
4981 }
4982
4983 #[simd_test(enable = "avx2")]
4984 unsafe fn test_mm256_slli_epi32() {
4985 assert_eq_m256i(
4986 _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
4987 _mm256_set1_epi32(0xFFFF0),
4988 );
4989 }
4990
4991 #[simd_test(enable = "avx2")]
4992 unsafe fn test_mm256_slli_epi64() {
4993 assert_eq_m256i(
4994 _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
4995 _mm256_set1_epi64x(0xFFFFFFFF0),
4996 );
4997 }
4998
4999 #[simd_test(enable = "avx2")]
5000 unsafe fn test_mm256_slli_si256() {
5001 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5002 let r = _mm256_slli_si256::<3>(a);
5003 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
5004 }
5005
5006 #[simd_test(enable = "avx2")]
5007 unsafe fn test_mm_sllv_epi32() {
5008 let a = _mm_set1_epi32(2);
5009 let b = _mm_set1_epi32(1);
5010 let r = _mm_sllv_epi32(a, b);
5011 let e = _mm_set1_epi32(4);
5012 assert_eq_m128i(r, e);
5013 }
5014
5015 #[simd_test(enable = "avx2")]
5016 unsafe fn test_mm256_sllv_epi32() {
5017 let a = _mm256_set1_epi32(2);
5018 let b = _mm256_set1_epi32(1);
5019 let r = _mm256_sllv_epi32(a, b);
5020 let e = _mm256_set1_epi32(4);
5021 assert_eq_m256i(r, e);
5022 }
5023
5024 #[simd_test(enable = "avx2")]
5025 unsafe fn test_mm_sllv_epi64() {
5026 let a = _mm_set1_epi64x(2);
5027 let b = _mm_set1_epi64x(1);
5028 let r = _mm_sllv_epi64(a, b);
5029 let e = _mm_set1_epi64x(4);
5030 assert_eq_m128i(r, e);
5031 }
5032
5033 #[simd_test(enable = "avx2")]
5034 unsafe fn test_mm256_sllv_epi64() {
5035 let a = _mm256_set1_epi64x(2);
5036 let b = _mm256_set1_epi64x(1);
5037 let r = _mm256_sllv_epi64(a, b);
5038 let e = _mm256_set1_epi64x(4);
5039 assert_eq_m256i(r, e);
5040 }
5041
5042 #[simd_test(enable = "avx2")]
5043 unsafe fn test_mm256_sra_epi16() {
5044 let a = _mm256_set1_epi16(-1);
5045 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5046 let r = _mm256_sra_epi16(a, b);
5047 assert_eq_m256i(r, _mm256_set1_epi16(-1));
5048 }
5049
5050 #[simd_test(enable = "avx2")]
5051 unsafe fn test_mm256_sra_epi32() {
5052 let a = _mm256_set1_epi32(-1);
5053 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
5054 let r = _mm256_sra_epi32(a, b);
5055 assert_eq_m256i(r, _mm256_set1_epi32(-1));
5056 }
5057
5058 #[simd_test(enable = "avx2")]
5059 unsafe fn test_mm256_srai_epi16() {
5060 assert_eq_m256i(
5061 _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
5062 _mm256_set1_epi16(-1),
5063 );
5064 }
5065
5066 #[simd_test(enable = "avx2")]
5067 unsafe fn test_mm256_srai_epi32() {
5068 assert_eq_m256i(
5069 _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
5070 _mm256_set1_epi32(-1),
5071 );
5072 }
5073
5074 #[simd_test(enable = "avx2")]
5075 unsafe fn test_mm_srav_epi32() {
5076 let a = _mm_set1_epi32(4);
5077 let count = _mm_set1_epi32(1);
5078 let r = _mm_srav_epi32(a, count);
5079 let e = _mm_set1_epi32(2);
5080 assert_eq_m128i(r, e);
5081 }
5082
5083 #[simd_test(enable = "avx2")]
5084 unsafe fn test_mm256_srav_epi32() {
5085 let a = _mm256_set1_epi32(4);
5086 let count = _mm256_set1_epi32(1);
5087 let r = _mm256_srav_epi32(a, count);
5088 let e = _mm256_set1_epi32(2);
5089 assert_eq_m256i(r, e);
5090 }
5091
5092 #[simd_test(enable = "avx2")]
5093 unsafe fn test_mm256_srli_si256() {
5094 #[rustfmt::skip]
5095 let a = _mm256_setr_epi8(
5096 1, 2, 3, 4, 5, 6, 7, 8,
5097 9, 10, 11, 12, 13, 14, 15, 16,
5098 17, 18, 19, 20, 21, 22, 23, 24,
5099 25, 26, 27, 28, 29, 30, 31, 32,
5100 );
5101 let r = _mm256_srli_si256::<3>(a);
5102 #[rustfmt::skip]
5103 let e = _mm256_setr_epi8(
5104 4, 5, 6, 7, 8, 9, 10, 11,
5105 12, 13, 14, 15, 16, 0, 0, 0,
5106 20, 21, 22, 23, 24, 25, 26, 27,
5107 28, 29, 30, 31, 32, 0, 0, 0,
5108 );
5109 assert_eq_m256i(r, e);
5110 }
5111
5112 #[simd_test(enable = "avx2")]
5113 unsafe fn test_mm256_srl_epi16() {
5114 let a = _mm256_set1_epi16(0xFF);
5115 let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
5116 let r = _mm256_srl_epi16(a, b);
5117 assert_eq_m256i(r, _mm256_set1_epi16(0xF));
5118 }
5119
5120 #[simd_test(enable = "avx2")]
5121 unsafe fn test_mm256_srl_epi32() {
5122 let a = _mm256_set1_epi32(0xFFFF);
5123 let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
5124 let r = _mm256_srl_epi32(a, b);
5125 assert_eq_m256i(r, _mm256_set1_epi32(0xFFF));
5126 }
5127
5128 #[simd_test(enable = "avx2")]
5129 unsafe fn test_mm256_srl_epi64() {
5130 let a = _mm256_set1_epi64x(0xFFFFFFFF);
5131 let b = _mm_setr_epi64x(4, 0);
5132 let r = _mm256_srl_epi64(a, b);
5133 assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF));
5134 }
5135
5136 #[simd_test(enable = "avx2")]
5137 unsafe fn test_mm256_srli_epi16() {
5138 assert_eq_m256i(
5139 _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
5140 _mm256_set1_epi16(0xF),
5141 );
5142 }
5143
5144 #[simd_test(enable = "avx2")]
5145 unsafe fn test_mm256_srli_epi32() {
5146 assert_eq_m256i(
5147 _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
5148 _mm256_set1_epi32(0xFFF),
5149 );
5150 }
5151
5152 #[simd_test(enable = "avx2")]
5153 unsafe fn test_mm256_srli_epi64() {
5154 assert_eq_m256i(
5155 _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
5156 _mm256_set1_epi64x(0xFFFFFFF),
5157 );
5158 }
5159
5160 #[simd_test(enable = "avx2")]
5161 unsafe fn test_mm_srlv_epi32() {
5162 let a = _mm_set1_epi32(2);
5163 let count = _mm_set1_epi32(1);
5164 let r = _mm_srlv_epi32(a, count);
5165 let e = _mm_set1_epi32(1);
5166 assert_eq_m128i(r, e);
5167 }
5168
5169 #[simd_test(enable = "avx2")]
5170 unsafe fn test_mm256_srlv_epi32() {
5171 let a = _mm256_set1_epi32(2);
5172 let count = _mm256_set1_epi32(1);
5173 let r = _mm256_srlv_epi32(a, count);
5174 let e = _mm256_set1_epi32(1);
5175 assert_eq_m256i(r, e);
5176 }
5177
5178 #[simd_test(enable = "avx2")]
5179 unsafe fn test_mm_srlv_epi64() {
5180 let a = _mm_set1_epi64x(2);
5181 let count = _mm_set1_epi64x(1);
5182 let r = _mm_srlv_epi64(a, count);
5183 let e = _mm_set1_epi64x(1);
5184 assert_eq_m128i(r, e);
5185 }
5186
5187 #[simd_test(enable = "avx2")]
5188 unsafe fn test_mm256_srlv_epi64() {
5189 let a = _mm256_set1_epi64x(2);
5190 let count = _mm256_set1_epi64x(1);
5191 let r = _mm256_srlv_epi64(a, count);
5192 let e = _mm256_set1_epi64x(1);
5193 assert_eq_m256i(r, e);
5194 }
5195
5196 #[simd_test(enable = "avx2")]
5197 unsafe fn test_mm256_sub_epi16() {
5198 let a = _mm256_set1_epi16(4);
5199 let b = _mm256_set1_epi16(2);
5200 let r = _mm256_sub_epi16(a, b);
5201 assert_eq_m256i(r, b);
5202 }
5203
5204 #[simd_test(enable = "avx2")]
5205 unsafe fn test_mm256_sub_epi32() {
5206 let a = _mm256_set1_epi32(4);
5207 let b = _mm256_set1_epi32(2);
5208 let r = _mm256_sub_epi32(a, b);
5209 assert_eq_m256i(r, b);
5210 }
5211
5212 #[simd_test(enable = "avx2")]
5213 unsafe fn test_mm256_sub_epi64() {
5214 let a = _mm256_set1_epi64x(4);
5215 let b = _mm256_set1_epi64x(2);
5216 let r = _mm256_sub_epi64(a, b);
5217 assert_eq_m256i(r, b);
5218 }
5219
5220 #[simd_test(enable = "avx2")]
5221 unsafe fn test_mm256_sub_epi8() {
5222 let a = _mm256_set1_epi8(4);
5223 let b = _mm256_set1_epi8(2);
5224 let r = _mm256_sub_epi8(a, b);
5225 assert_eq_m256i(r, b);
5226 }
5227
5228 #[simd_test(enable = "avx2")]
5229 unsafe fn test_mm256_subs_epi16() {
5230 let a = _mm256_set1_epi16(4);
5231 let b = _mm256_set1_epi16(2);
5232 let r = _mm256_subs_epi16(a, b);
5233 assert_eq_m256i(r, b);
5234 }
5235
5236 #[simd_test(enable = "avx2")]
5237 unsafe fn test_mm256_subs_epi8() {
5238 let a = _mm256_set1_epi8(4);
5239 let b = _mm256_set1_epi8(2);
5240 let r = _mm256_subs_epi8(a, b);
5241 assert_eq_m256i(r, b);
5242 }
5243
5244 #[simd_test(enable = "avx2")]
5245 unsafe fn test_mm256_subs_epu16() {
5246 let a = _mm256_set1_epi16(4);
5247 let b = _mm256_set1_epi16(2);
5248 let r = _mm256_subs_epu16(a, b);
5249 assert_eq_m256i(r, b);
5250 }
5251
5252 #[simd_test(enable = "avx2")]
5253 unsafe fn test_mm256_subs_epu8() {
5254 let a = _mm256_set1_epi8(4);
5255 let b = _mm256_set1_epi8(2);
5256 let r = _mm256_subs_epu8(a, b);
5257 assert_eq_m256i(r, b);
5258 }
5259
5260 #[simd_test(enable = "avx2")]
5261 unsafe fn test_mm256_xor_si256() {
5262 let a = _mm256_set1_epi8(5);
5263 let b = _mm256_set1_epi8(3);
5264 let r = _mm256_xor_si256(a, b);
5265 assert_eq_m256i(r, _mm256_set1_epi8(6));
5266 }
5267
5268 #[simd_test(enable = "avx2")]
5269 unsafe fn test_mm256_alignr_epi8() {
5270 #[rustfmt::skip]
5271 let a = _mm256_setr_epi8(
5272 1, 2, 3, 4, 5, 6, 7, 8,
5273 9, 10, 11, 12, 13, 14, 15, 16,
5274 17, 18, 19, 20, 21, 22, 23, 24,
5275 25, 26, 27, 28, 29, 30, 31, 32,
5276 );
5277 #[rustfmt::skip]
5278 let b = _mm256_setr_epi8(
5279 -1, -2, -3, -4, -5, -6, -7, -8,
5280 -9, -10, -11, -12, -13, -14, -15, -16,
5281 -17, -18, -19, -20, -21, -22, -23, -24,
5282 -25, -26, -27, -28, -29, -30, -31, -32,
5283 );
5284 let r = _mm256_alignr_epi8::<33>(a, b);
5285 assert_eq_m256i(r, _mm256_set1_epi8(0));
5286
5287 let r = _mm256_alignr_epi8::<17>(a, b);
5288 #[rustfmt::skip]
5289 let expected = _mm256_setr_epi8(
5290 2, 3, 4, 5, 6, 7, 8, 9,
5291 10, 11, 12, 13, 14, 15, 16, 0,
5292 18, 19, 20, 21, 22, 23, 24, 25,
5293 26, 27, 28, 29, 30, 31, 32, 0,
5294 );
5295 assert_eq_m256i(r, expected);
5296
5297 let r = _mm256_alignr_epi8::<4>(a, b);
5298 #[rustfmt::skip]
5299 let expected = _mm256_setr_epi8(
5300 -5, -6, -7, -8, -9, -10, -11, -12,
5301 -13, -14, -15, -16, 1, 2, 3, 4,
5302 -21, -22, -23, -24, -25, -26, -27, -28,
5303 -29, -30, -31, -32, 17, 18, 19, 20,
5304 );
5305 assert_eq_m256i(r, expected);
5306
5307 #[rustfmt::skip]
5308 let expected = _mm256_setr_epi8(
5309 -1, -2, -3, -4, -5, -6, -7, -8,
5310 -9, -10, -11, -12, -13, -14, -15, -16, -17,
5311 -18, -19, -20, -21, -22, -23, -24, -25,
5312 -26, -27, -28, -29, -30, -31, -32,
5313 );
5314 let r = _mm256_alignr_epi8::<16>(a, b);
5315 assert_eq_m256i(r, expected);
5316
5317 let r = _mm256_alignr_epi8::<15>(a, b);
5318 #[rustfmt::skip]
5319 let expected = _mm256_setr_epi8(
5320 -16, 1, 2, 3, 4, 5, 6, 7,
5321 8, 9, 10, 11, 12, 13, 14, 15,
5322 -32, 17, 18, 19, 20, 21, 22, 23,
5323 24, 25, 26, 27, 28, 29, 30, 31,
5324 );
5325 assert_eq_m256i(r, expected);
5326
5327 let r = _mm256_alignr_epi8::<0>(a, b);
5328 assert_eq_m256i(r, b);
5329 }
5330
5331 #[simd_test(enable = "avx2")]
5332 unsafe fn test_mm256_shuffle_epi8() {
5333 #[rustfmt::skip]
5334 let a = _mm256_setr_epi8(
5335 1, 2, 3, 4, 5, 6, 7, 8,
5336 9, 10, 11, 12, 13, 14, 15, 16,
5337 17, 18, 19, 20, 21, 22, 23, 24,
5338 25, 26, 27, 28, 29, 30, 31, 32,
5339 );
5340 #[rustfmt::skip]
5341 let b = _mm256_setr_epi8(
5342 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5343 12, 5, 5, 10, 4, 1, 8, 0,
5344 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5345 12, 5, 5, 10, 4, 1, 8, 0,
5346 );
5347 #[rustfmt::skip]
5348 let expected = _mm256_setr_epi8(
5349 5, 0, 5, 4, 9, 13, 7, 4,
5350 13, 6, 6, 11, 5, 2, 9, 1,
5351 21, 0, 21, 20, 25, 29, 23, 20,
5352 29, 22, 22, 27, 21, 18, 25, 17,
5353 );
5354 let r = _mm256_shuffle_epi8(a, b);
5355 assert_eq_m256i(r, expected);
5356 }
5357
5358 #[simd_test(enable = "avx2")]
5359 unsafe fn test_mm256_permutevar8x32_epi32() {
5360 let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5361 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5362 let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5363 let r = _mm256_permutevar8x32_epi32(a, b);
5364 assert_eq_m256i(r, expected);
5365 }
5366
5367 #[simd_test(enable = "avx2")]
5368 unsafe fn test_mm256_permute4x64_epi64() {
5369 let a = _mm256_setr_epi64x(100, 200, 300, 400);
5370 let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5371 let r = _mm256_permute4x64_epi64::<0b00010011>(a);
5372 assert_eq_m256i(r, expected);
5373 }
5374
5375 #[simd_test(enable = "avx2")]
5376 unsafe fn test_mm256_permute2x128_si256() {
5377 let a = _mm256_setr_epi64x(100, 200, 500, 600);
5378 let b = _mm256_setr_epi64x(300, 400, 700, 800);
5379 let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
5380 let e = _mm256_setr_epi64x(700, 800, 500, 600);
5381 assert_eq_m256i(r, e);
5382 }
5383
5384 #[simd_test(enable = "avx2")]
5385 unsafe fn test_mm256_permute4x64_pd() {
5386 let a = _mm256_setr_pd(1., 2., 3., 4.);
5387 let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
5388 let e = _mm256_setr_pd(4., 1., 2., 1.);
5389 assert_eq_m256d(r, e);
5390 }
5391
5392 #[simd_test(enable = "avx2")]
5393 unsafe fn test_mm256_permutevar8x32_ps() {
5394 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5395 let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5396 let r = _mm256_permutevar8x32_ps(a, b);
5397 let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5398 assert_eq_m256(r, e);
5399 }
5400
5401 #[simd_test(enable = "avx2")]
5402 unsafe fn test_mm_i32gather_epi32() {
5403 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5404 // A multiplier of 4 is word-addressing
5405 let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5406 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5407 }
5408
5409 #[simd_test(enable = "avx2")]
5410 unsafe fn test_mm_mask_i32gather_epi32() {
5411 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5412 // A multiplier of 4 is word-addressing
5413 let r = _mm_mask_i32gather_epi32::<4>(
5414 _mm_set1_epi32(256),
5415 arr.as_ptr(),
5416 _mm_setr_epi32(0, 16, 64, 96),
5417 _mm_setr_epi32(-1, -1, -1, 0),
5418 );
5419 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5420 }
5421
5422 #[simd_test(enable = "avx2")]
5423 unsafe fn test_mm256_i32gather_epi32() {
5424 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5425 // A multiplier of 4 is word-addressing
5426 let r =
5427 _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5428 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5429 }
5430
5431 #[simd_test(enable = "avx2")]
5432 unsafe fn test_mm256_mask_i32gather_epi32() {
5433 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5434 // A multiplier of 4 is word-addressing
5435 let r = _mm256_mask_i32gather_epi32::<4>(
5436 _mm256_set1_epi32(256),
5437 arr.as_ptr(),
5438 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5439 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5440 );
5441 assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5442 }
5443
5444 #[simd_test(enable = "avx2")]
5445 unsafe fn test_mm_i32gather_ps() {
5446 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5447 // A multiplier of 4 is word-addressing for f32s
5448 let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5449 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5450 }
5451
5452 #[simd_test(enable = "avx2")]
5453 unsafe fn test_mm_mask_i32gather_ps() {
5454 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5455 // A multiplier of 4 is word-addressing for f32s
5456 let r = _mm_mask_i32gather_ps::<4>(
5457 _mm_set1_ps(256.0),
5458 arr.as_ptr(),
5459 _mm_setr_epi32(0, 16, 64, 96),
5460 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5461 );
5462 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5463 }
5464
5465 #[simd_test(enable = "avx2")]
5466 unsafe fn test_mm256_i32gather_ps() {
5467 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5468 // A multiplier of 4 is word-addressing for f32s
5469 let r =
5470 _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5471 assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5472 }
5473
5474 #[simd_test(enable = "avx2")]
5475 unsafe fn test_mm256_mask_i32gather_ps() {
5476 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5477 // A multiplier of 4 is word-addressing for f32s
5478 let r = _mm256_mask_i32gather_ps::<4>(
5479 _mm256_set1_ps(256.0),
5480 arr.as_ptr(),
5481 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5482 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5483 );
5484 assert_eq_m256(
5485 r,
5486 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5487 );
5488 }
5489
5490 #[simd_test(enable = "avx2")]
5491 unsafe fn test_mm_i32gather_epi64() {
5492 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5493 // A multiplier of 8 is word-addressing for i64s
5494 let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5495 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5496 }
5497
5498 #[simd_test(enable = "avx2")]
5499 unsafe fn test_mm_mask_i32gather_epi64() {
5500 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5501 // A multiplier of 8 is word-addressing for i64s
5502 let r = _mm_mask_i32gather_epi64::<8>(
5503 _mm_set1_epi64x(256),
5504 arr.as_ptr(),
5505 _mm_setr_epi32(16, 16, 16, 16),
5506 _mm_setr_epi64x(-1, 0),
5507 );
5508 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5509 }
5510
5511 #[simd_test(enable = "avx2")]
5512 unsafe fn test_mm256_i32gather_epi64() {
5513 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5514 // A multiplier of 8 is word-addressing for i64s
5515 let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5516 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5517 }
5518
5519 #[simd_test(enable = "avx2")]
5520 unsafe fn test_mm256_mask_i32gather_epi64() {
5521 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5522 // A multiplier of 8 is word-addressing for i64s
5523 let r = _mm256_mask_i32gather_epi64::<8>(
5524 _mm256_set1_epi64x(256),
5525 arr.as_ptr(),
5526 _mm_setr_epi32(0, 16, 64, 96),
5527 _mm256_setr_epi64x(-1, -1, -1, 0),
5528 );
5529 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5530 }
5531
5532 #[simd_test(enable = "avx2")]
5533 unsafe fn test_mm_i32gather_pd() {
5534 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5535 // A multiplier of 8 is word-addressing for f64s
5536 let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
5537 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5538 }
5539
5540 #[simd_test(enable = "avx2")]
5541 unsafe fn test_mm_mask_i32gather_pd() {
5542 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5543 // A multiplier of 8 is word-addressing for f64s
5544 let r = _mm_mask_i32gather_pd::<8>(
5545 _mm_set1_pd(256.0),
5546 arr.as_ptr(),
5547 _mm_setr_epi32(16, 16, 16, 16),
5548 _mm_setr_pd(-1.0, 0.0),
5549 );
5550 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5551 }
5552
5553 #[simd_test(enable = "avx2")]
5554 unsafe fn test_mm256_i32gather_pd() {
5555 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5556 // A multiplier of 8 is word-addressing for f64s
5557 let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
5558 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5559 }
5560
5561 #[simd_test(enable = "avx2")]
5562 unsafe fn test_mm256_mask_i32gather_pd() {
5563 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5564 // A multiplier of 8 is word-addressing for f64s
5565 let r = _mm256_mask_i32gather_pd::<8>(
5566 _mm256_set1_pd(256.0),
5567 arr.as_ptr(),
5568 _mm_setr_epi32(0, 16, 64, 96),
5569 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5570 );
5571 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5572 }
5573
5574 #[simd_test(enable = "avx2")]
5575 unsafe fn test_mm_i64gather_epi32() {
5576 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5577 // A multiplier of 4 is word-addressing
5578 let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5579 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
5580 }
5581
5582 #[simd_test(enable = "avx2")]
5583 unsafe fn test_mm_mask_i64gather_epi32() {
5584 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5585 // A multiplier of 4 is word-addressing
5586 let r = _mm_mask_i64gather_epi32::<4>(
5587 _mm_set1_epi32(256),
5588 arr.as_ptr(),
5589 _mm_setr_epi64x(0, 16),
5590 _mm_setr_epi32(-1, 0, -1, 0),
5591 );
5592 assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
5593 }
5594
5595 #[simd_test(enable = "avx2")]
5596 unsafe fn test_mm256_i64gather_epi32() {
5597 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5598 // A multiplier of 4 is word-addressing
5599 let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5600 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
5601 }
5602
5603 #[simd_test(enable = "avx2")]
5604 unsafe fn test_mm256_mask_i64gather_epi32() {
5605 let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
5606 // A multiplier of 4 is word-addressing
5607 let r = _mm256_mask_i64gather_epi32::<4>(
5608 _mm_set1_epi32(256),
5609 arr.as_ptr(),
5610 _mm256_setr_epi64x(0, 16, 64, 96),
5611 _mm_setr_epi32(-1, -1, -1, 0),
5612 );
5613 assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
5614 }
5615
5616 #[simd_test(enable = "avx2")]
5617 unsafe fn test_mm_i64gather_ps() {
5618 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5619 // A multiplier of 4 is word-addressing for f32s
5620 let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5621 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5622 }
5623
5624 #[simd_test(enable = "avx2")]
5625 unsafe fn test_mm_mask_i64gather_ps() {
5626 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5627 // A multiplier of 4 is word-addressing for f32s
5628 let r = _mm_mask_i64gather_ps::<4>(
5629 _mm_set1_ps(256.0),
5630 arr.as_ptr(),
5631 _mm_setr_epi64x(0, 16),
5632 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5633 );
5634 assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5635 }
5636
5637 #[simd_test(enable = "avx2")]
5638 unsafe fn test_mm256_i64gather_ps() {
5639 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5640 // A multiplier of 4 is word-addressing for f32s
5641 let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5642 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5643 }
5644
5645 #[simd_test(enable = "avx2")]
5646 unsafe fn test_mm256_mask_i64gather_ps() {
5647 let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
5648 // A multiplier of 4 is word-addressing for f32s
5649 let r = _mm256_mask_i64gather_ps::<4>(
5650 _mm_set1_ps(256.0),
5651 arr.as_ptr(),
5652 _mm256_setr_epi64x(0, 16, 64, 96),
5653 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5654 );
5655 assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5656 }
5657
5658 #[simd_test(enable = "avx2")]
5659 unsafe fn test_mm_i64gather_epi64() {
5660 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5661 // A multiplier of 8 is word-addressing for i64s
5662 let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5663 assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
5664 }
5665
5666 #[simd_test(enable = "avx2")]
5667 unsafe fn test_mm_mask_i64gather_epi64() {
5668 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5669 // A multiplier of 8 is word-addressing for i64s
5670 let r = _mm_mask_i64gather_epi64::<8>(
5671 _mm_set1_epi64x(256),
5672 arr.as_ptr(),
5673 _mm_setr_epi64x(16, 16),
5674 _mm_setr_epi64x(-1, 0),
5675 );
5676 assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
5677 }
5678
5679 #[simd_test(enable = "avx2")]
5680 unsafe fn test_mm256_i64gather_epi64() {
5681 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5682 // A multiplier of 8 is word-addressing for i64s
5683 let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5684 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
5685 }
5686
5687 #[simd_test(enable = "avx2")]
5688 unsafe fn test_mm256_mask_i64gather_epi64() {
5689 let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
5690 // A multiplier of 8 is word-addressing for i64s
5691 let r = _mm256_mask_i64gather_epi64::<8>(
5692 _mm256_set1_epi64x(256),
5693 arr.as_ptr(),
5694 _mm256_setr_epi64x(0, 16, 64, 96),
5695 _mm256_setr_epi64x(-1, -1, -1, 0),
5696 );
5697 assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
5698 }
5699
5700 #[simd_test(enable = "avx2")]
5701 unsafe fn test_mm_i64gather_pd() {
5702 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5703 // A multiplier of 8 is word-addressing for f64s
5704 let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
5705 assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
5706 }
5707
5708 #[simd_test(enable = "avx2")]
5709 unsafe fn test_mm_mask_i64gather_pd() {
5710 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5711 // A multiplier of 8 is word-addressing for f64s
5712 let r = _mm_mask_i64gather_pd::<8>(
5713 _mm_set1_pd(256.0),
5714 arr.as_ptr(),
5715 _mm_setr_epi64x(16, 16),
5716 _mm_setr_pd(-1.0, 0.0),
5717 );
5718 assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
5719 }
5720
5721 #[simd_test(enable = "avx2")]
5722 unsafe fn test_mm256_i64gather_pd() {
5723 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5724 // A multiplier of 8 is word-addressing for f64s
5725 let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
5726 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5727 }
5728
5729 #[simd_test(enable = "avx2")]
5730 unsafe fn test_mm256_mask_i64gather_pd() {
5731 let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
5732 // A multiplier of 8 is word-addressing for f64s
5733 let r = _mm256_mask_i64gather_pd::<8>(
5734 _mm256_set1_pd(256.0),
5735 arr.as_ptr(),
5736 _mm256_setr_epi64x(0, 16, 64, 96),
5737 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5738 );
5739 assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5740 }
5741
5742 #[simd_test(enable = "avx")]
5743 unsafe fn test_mm256_extract_epi8() {
5744 #[rustfmt::skip]
5745 let a = _mm256_setr_epi8(
5746 -1, 1, 2, 3, 4, 5, 6, 7,
5747 8, 9, 10, 11, 12, 13, 14, 15,
5748 16, 17, 18, 19, 20, 21, 22, 23,
5749 24, 25, 26, 27, 28, 29, 30, 31
5750 );
5751 let r1 = _mm256_extract_epi8::<0>(a);
5752 let r2 = _mm256_extract_epi8::<3>(a);
5753 assert_eq!(r1, 0xFF);
5754 assert_eq!(r2, 3);
5755 }
5756
5757 #[simd_test(enable = "avx2")]
5758 unsafe fn test_mm256_extract_epi16() {
5759 #[rustfmt::skip]
5760 let a = _mm256_setr_epi16(
5761 -1, 1, 2, 3, 4, 5, 6, 7,
5762 8, 9, 10, 11, 12, 13, 14, 15,
5763 );
5764 let r1 = _mm256_extract_epi16::<0>(a);
5765 let r2 = _mm256_extract_epi16::<3>(a);
5766 assert_eq!(r1, 0xFFFF);
5767 assert_eq!(r2, 3);
5768 }
5769
5770 #[simd_test(enable = "avx2")]
5771 unsafe fn test_mm256_extract_epi32() {
5772 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
5773 let r1 = _mm256_extract_epi32::<0>(a);
5774 let r2 = _mm256_extract_epi32::<3>(a);
5775 assert_eq!(r1, -1);
5776 assert_eq!(r2, 3);
5777 }
5778
5779 #[simd_test(enable = "avx2")]
5780 unsafe fn test_mm256_cvtsd_f64() {
5781 let a = _mm256_setr_pd(1., 2., 3., 4.);
5782 let r = _mm256_cvtsd_f64(a);
5783 assert_eq!(r, 1.);
5784 }
5785
5786 #[simd_test(enable = "avx2")]
5787 unsafe fn test_mm256_cvtsi256_si32() {
5788 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5789 let r = _mm256_cvtsi256_si32(a);
5790 assert_eq!(r, 1);
5791 }
5792}
5793