1//! Advanced Vector Extensions (AVX)
2//!
3//! The references are:
4//!
5//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
6//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
7//! Programmer's Manual, Volume 3: General-Purpose and System
8//! Instructions][amd64_ref].
9//!
10//! [Wikipedia][wiki] provides a quick overview of the instructions available.
11//!
12//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
13//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
14//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
15
16use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25/// Adds packed double-precision (64-bit) floating-point elements
26/// in `a` and `b`.
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd)
29#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
34 simd_add(x:a, y:b)
35}
36
37/// Adds packed single-precision (32-bit) floating-point elements in `a` and
38/// `b`.
39///
40/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps)
41#[inline]
42#[target_feature(enable = "avx")]
43#[cfg_attr(test, assert_instr(vaddps))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
46 simd_add(x:a, y:b)
47}
48
49/// Computes the bitwise AND of a packed double-precision (64-bit)
50/// floating-point elements in `a` and `b`.
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
53#[inline]
54#[target_feature(enable = "avx")]
55// FIXME: Should be 'vandpd' instruction.
56// See https://github.com/rust-lang/stdarch/issues/71
57#[cfg_attr(test, assert_instr(vandps))]
58#[stable(feature = "simd_x86", since = "1.27.0")]
59pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
60 let a: u64x4 = transmute(src:a);
61 let b: u64x4 = transmute(src:b);
62 transmute(src:simd_and(x:a, y:b))
63}
64
65/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
66/// elements in `a` and `b`.
67///
68/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps)
69#[inline]
70#[target_feature(enable = "avx")]
71#[cfg_attr(test, assert_instr(vandps))]
72#[stable(feature = "simd_x86", since = "1.27.0")]
73pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
74 let a: u32x8 = transmute(src:a);
75 let b: u32x8 = transmute(src:b);
76 transmute(src:simd_and(x:a, y:b))
77}
78
79/// Computes the bitwise OR packed double-precision (64-bit) floating-point
80/// elements in `a` and `b`.
81///
82/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
83#[inline]
84#[target_feature(enable = "avx")]
85// FIXME: should be `vorpd` instruction.
86// See <https://github.com/rust-lang/stdarch/issues/71>.
87#[cfg_attr(test, assert_instr(vorps))]
88#[stable(feature = "simd_x86", since = "1.27.0")]
89pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
90 let a: u64x4 = transmute(src:a);
91 let b: u64x4 = transmute(src:b);
92 transmute(src:simd_or(x:a, y:b))
93}
94
95/// Computes the bitwise OR packed single-precision (32-bit) floating-point
96/// elements in `a` and `b`.
97///
98/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps)
99#[inline]
100#[target_feature(enable = "avx")]
101#[cfg_attr(test, assert_instr(vorps))]
102#[stable(feature = "simd_x86", since = "1.27.0")]
103pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
104 let a: u32x8 = transmute(src:a);
105 let b: u32x8 = transmute(src:b);
106 transmute(src:simd_or(x:a, y:b))
107}
108
109/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
110/// lanes using the control in `imm8`.
111///
112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd)
113#[inline]
114#[target_feature(enable = "avx")]
115#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
116#[rustc_legacy_const_generics(2)]
117#[stable(feature = "simd_x86", since = "1.27.0")]
118pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
119 static_assert_uimm_bits!(MASK, 8);
120 simd_shuffle!(
121 a,
122 b,
123 [
124 MASK as u32 & 0b1,
125 ((MASK as u32 >> 1) & 0b1) + 4,
126 ((MASK as u32 >> 2) & 0b1) + 2,
127 ((MASK as u32 >> 3) & 0b1) + 6,
128 ],
129 )
130}
131
132/// Shuffles single-precision (32-bit) floating-point elements in `a` within
133/// 128-bit lanes using the control in `imm8`.
134///
135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps)
136#[inline]
137#[target_feature(enable = "avx")]
138#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
139#[rustc_legacy_const_generics(2)]
140#[stable(feature = "simd_x86", since = "1.27.0")]
141pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
142 static_assert_uimm_bits!(MASK, 8);
143 simd_shuffle!(
144 a,
145 b,
146 [
147 MASK as u32 & 0b11,
148 (MASK as u32 >> 2) & 0b11,
149 ((MASK as u32 >> 4) & 0b11) + 8,
150 ((MASK as u32 >> 6) & 0b11) + 8,
151 (MASK as u32 & 0b11) + 4,
152 ((MASK as u32 >> 2) & 0b11) + 4,
153 ((MASK as u32 >> 4) & 0b11) + 12,
154 ((MASK as u32 >> 6) & 0b11) + 12,
155 ],
156 )
157}
158
159/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
160/// elements in `a`, and then AND with `b`.
161///
162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
163#[inline]
164#[target_feature(enable = "avx")]
165// FIXME: should be `vandnpd` instruction.
166#[cfg_attr(test, assert_instr(vandnps))]
167#[stable(feature = "simd_x86", since = "1.27.0")]
168pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
169 let a: u64x4 = transmute(src:a);
170 let b: u64x4 = transmute(src:b);
171 transmute(src:simd_and(x:simd_xor(u64x4::splat(!(0_u64)), a), y:b))
172}
173
174/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
175/// elements in `a`
176/// and then AND with `b`.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps)
179#[inline]
180#[target_feature(enable = "avx")]
181#[cfg_attr(test, assert_instr(vandnps))]
182#[stable(feature = "simd_x86", since = "1.27.0")]
183pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
184 let a: u32x8 = transmute(src:a);
185 let b: u32x8 = transmute(src:b);
186 transmute(src:simd_and(x:simd_xor(u32x8::splat(!(0_u32)), a), y:b))
187}
188
189/// Compares packed double-precision (64-bit) floating-point elements
190/// in `a` and `b`, and returns packed maximum values
191///
192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)
193#[inline]
194#[target_feature(enable = "avx")]
195#[cfg_attr(test, assert_instr(vmaxpd))]
196#[stable(feature = "simd_x86", since = "1.27.0")]
197pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
198 vmaxpd(a, b)
199}
200
201/// Compares packed single-precision (32-bit) floating-point elements in `a`
202/// and `b`, and returns packed maximum values
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)
205#[inline]
206#[target_feature(enable = "avx")]
207#[cfg_attr(test, assert_instr(vmaxps))]
208#[stable(feature = "simd_x86", since = "1.27.0")]
209pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
210 vmaxps(a, b)
211}
212
213/// Compares packed double-precision (64-bit) floating-point elements
214/// in `a` and `b`, and returns packed minimum values
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)
217#[inline]
218#[target_feature(enable = "avx")]
219#[cfg_attr(test, assert_instr(vminpd))]
220#[stable(feature = "simd_x86", since = "1.27.0")]
221pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
222 vminpd(a, b)
223}
224
225/// Compares packed single-precision (32-bit) floating-point elements in `a`
226/// and `b`, and returns packed minimum values
227///
228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)
229#[inline]
230#[target_feature(enable = "avx")]
231#[cfg_attr(test, assert_instr(vminps))]
232#[stable(feature = "simd_x86", since = "1.27.0")]
233pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
234 vminps(a, b)
235}
236
237/// Multiplies packed double-precision (64-bit) floating-point elements
238/// in `a` and `b`.
239///
240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd)
241#[inline]
242#[target_feature(enable = "avx")]
243#[cfg_attr(test, assert_instr(vmulpd))]
244#[stable(feature = "simd_x86", since = "1.27.0")]
245pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
246 simd_mul(x:a, y:b)
247}
248
249/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
250/// `b`.
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps)
253#[inline]
254#[target_feature(enable = "avx")]
255#[cfg_attr(test, assert_instr(vmulps))]
256#[stable(feature = "simd_x86", since = "1.27.0")]
257pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
258 simd_mul(x:a, y:b)
259}
260
261/// Alternatively adds and subtracts packed double-precision (64-bit)
262/// floating-point elements in `a` to/from packed elements in `b`.
263///
264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd)
265#[inline]
266#[target_feature(enable = "avx")]
267#[cfg_attr(test, assert_instr(vaddsubpd))]
268#[stable(feature = "simd_x86", since = "1.27.0")]
269pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
270 let a: f64x4 = a.as_f64x4();
271 let b: f64x4 = b.as_f64x4();
272 let add: f64x4 = simd_add(x:a, y:b);
273 let sub: f64x4 = simd_sub(lhs:a, rhs:b);
274 simd_shuffle!(add, sub, [4, 1, 6, 3])
275}
276
277/// Alternatively adds and subtracts packed single-precision (32-bit)
278/// floating-point elements in `a` to/from packed elements in `b`.
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps)
281#[inline]
282#[target_feature(enable = "avx")]
283#[cfg_attr(test, assert_instr(vaddsubps))]
284#[stable(feature = "simd_x86", since = "1.27.0")]
285pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
286 let a: f32x8 = a.as_f32x8();
287 let b: f32x8 = b.as_f32x8();
288 let add: f32x8 = simd_add(x:a, y:b);
289 let sub: f32x8 = simd_sub(lhs:a, rhs:b);
290 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
291}
292
293/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
294/// from packed elements in `a`.
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd)
297#[inline]
298#[target_feature(enable = "avx")]
299#[cfg_attr(test, assert_instr(vsubpd))]
300#[stable(feature = "simd_x86", since = "1.27.0")]
301pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
302 simd_sub(lhs:a, rhs:b)
303}
304
305/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
306/// from packed elements in `a`.
307///
308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps)
309#[inline]
310#[target_feature(enable = "avx")]
311#[cfg_attr(test, assert_instr(vsubps))]
312#[stable(feature = "simd_x86", since = "1.27.0")]
313pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
314 simd_sub(lhs:a, rhs:b)
315}
316
317/// Computes the division of each of the 8 packed 32-bit floating-point elements
318/// in `a` by the corresponding packed elements in `b`.
319///
320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps)
321#[inline]
322#[target_feature(enable = "avx")]
323#[cfg_attr(test, assert_instr(vdivps))]
324#[stable(feature = "simd_x86", since = "1.27.0")]
325pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
326 simd_div(lhs:a, rhs:b)
327}
328
329/// Computes the division of each of the 4 packed 64-bit floating-point elements
330/// in `a` by the corresponding packed elements in `b`.
331///
332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd)
333#[inline]
334#[target_feature(enable = "avx")]
335#[cfg_attr(test, assert_instr(vdivpd))]
336#[stable(feature = "simd_x86", since = "1.27.0")]
337pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
338 simd_div(lhs:a, rhs:b)
339}
340
341/// Rounds packed double-precision (64-bit) floating point elements in `a`
342/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
343///
344/// - `0x00`: Round to the nearest whole number.
345/// - `0x01`: Round down, toward negative infinity.
346/// - `0x02`: Round up, toward positive infinity.
347/// - `0x03`: Truncate the values.
348///
349/// For a complete list of options, check [the LLVM docs][llvm_docs].
350///
351/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)
354#[inline]
355#[target_feature(enable = "avx")]
356#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
357#[rustc_legacy_const_generics(1)]
358#[stable(feature = "simd_x86", since = "1.27.0")]
359pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
360 static_assert_uimm_bits!(ROUNDING, 4);
361 roundpd256(a, ROUNDING)
362}
363
364/// Rounds packed double-precision (64-bit) floating point elements in `a`
365/// toward positive infinity.
366///
367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd)
368#[inline]
369#[target_feature(enable = "avx")]
370#[cfg_attr(test, assert_instr(vroundpd))]
371#[stable(feature = "simd_x86", since = "1.27.0")]
372pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
373 simd_ceil(a)
374}
375
376/// Rounds packed double-precision (64-bit) floating point elements in `a`
377/// toward negative infinity.
378///
379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd)
380#[inline]
381#[target_feature(enable = "avx")]
382#[cfg_attr(test, assert_instr(vroundpd))]
383#[stable(feature = "simd_x86", since = "1.27.0")]
384pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
385 simd_floor(a)
386}
387
388/// Rounds packed single-precision (32-bit) floating point elements in `a`
389/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
390///
391/// - `0x00`: Round to the nearest whole number.
392/// - `0x01`: Round down, toward negative infinity.
393/// - `0x02`: Round up, toward positive infinity.
394/// - `0x03`: Truncate the values.
395///
396/// For a complete list of options, check [the LLVM docs][llvm_docs].
397///
398/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
399///
400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)
401#[inline]
402#[target_feature(enable = "avx")]
403#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
404#[rustc_legacy_const_generics(1)]
405#[stable(feature = "simd_x86", since = "1.27.0")]
406pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
407 static_assert_uimm_bits!(ROUNDING, 4);
408 roundps256(a, ROUNDING)
409}
410
411/// Rounds packed single-precision (32-bit) floating point elements in `a`
412/// toward positive infinity.
413///
414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps)
415#[inline]
416#[target_feature(enable = "avx")]
417#[cfg_attr(test, assert_instr(vroundps))]
418#[stable(feature = "simd_x86", since = "1.27.0")]
419pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
420 simd_ceil(a)
421}
422
423/// Rounds packed single-precision (32-bit) floating point elements in `a`
424/// toward negative infinity.
425///
426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps)
427#[inline]
428#[target_feature(enable = "avx")]
429#[cfg_attr(test, assert_instr(vroundps))]
430#[stable(feature = "simd_x86", since = "1.27.0")]
431pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
432 simd_floor(a)
433}
434
435/// Returns the square root of packed single-precision (32-bit) floating point
436/// elements in `a`.
437///
438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)
439#[inline]
440#[target_feature(enable = "avx")]
441#[cfg_attr(test, assert_instr(vsqrtps))]
442#[stable(feature = "simd_x86", since = "1.27.0")]
443pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
444 sqrtps256(a)
445}
446
447/// Returns the square root of packed double-precision (64-bit) floating point
448/// elements in `a`.
449///
450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)
451#[inline]
452#[target_feature(enable = "avx")]
453#[cfg_attr(test, assert_instr(vsqrtpd))]
454#[stable(feature = "simd_x86", since = "1.27.0")]
455pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
456 simd_fsqrt(a)
457}
458
459/// Blends packed double-precision (64-bit) floating-point elements from
460/// `a` and `b` using control mask `imm8`.
461///
462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)
463#[inline]
464#[target_feature(enable = "avx")]
465// Note: LLVM7 prefers single-precision blend instructions when
466// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
467// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
468#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
469#[rustc_legacy_const_generics(2)]
470#[stable(feature = "simd_x86", since = "1.27.0")]
471pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
472 static_assert_uimm_bits!(IMM4, 4);
473 simd_shuffle!(
474 a,
475 b,
476 [
477 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
478 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
479 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
480 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
481 ],
482 )
483}
484
485/// Blends packed single-precision (32-bit) floating-point elements from
486/// `a` and `b` using control mask `imm8`.
487///
488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps)
489#[inline]
490#[target_feature(enable = "avx")]
491#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
492#[rustc_legacy_const_generics(2)]
493#[stable(feature = "simd_x86", since = "1.27.0")]
494pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
495 static_assert_uimm_bits!(IMM8, 8);
496 simd_shuffle!(
497 a,
498 b,
499 [
500 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
501 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
502 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
503 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
504 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
505 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
506 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
507 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
508 ],
509 )
510}
511
512/// Blends packed double-precision (64-bit) floating-point elements from
513/// `a` and `b` using `c` as a mask.
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd)
516#[inline]
517#[target_feature(enable = "avx")]
518#[cfg_attr(test, assert_instr(vblendvpd))]
519#[stable(feature = "simd_x86", since = "1.27.0")]
520pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
521 let mask: i64x4 = simd_lt(x:transmute::<_, i64x4>(c), y:i64x4::splat(0));
522 transmute(src:simd_select(mask, if_true:b.as_f64x4(), if_false:a.as_f64x4()))
523}
524
525/// Blends packed single-precision (32-bit) floating-point elements from
526/// `a` and `b` using `c` as a mask.
527///
528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
529#[inline]
530#[target_feature(enable = "avx")]
531#[cfg_attr(test, assert_instr(vblendvps))]
532#[stable(feature = "simd_x86", since = "1.27.0")]
533pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
534 let mask: i32x8 = simd_lt(x:transmute::<_, i32x8>(c), y:i32x8::splat(0));
535 transmute(src:simd_select(mask, if_true:b.as_f32x8(), if_false:a.as_f32x8()))
536}
537
538/// Conditionally multiplies the packed single-precision (32-bit) floating-point
539/// elements in `a` and `b` using the high 4 bits in `imm8`,
540/// sum the four products, and conditionally return the sum
541/// using the low 4 bits of `imm8`.
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)
544#[inline]
545#[target_feature(enable = "avx")]
546#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
547#[rustc_legacy_const_generics(2)]
548#[stable(feature = "simd_x86", since = "1.27.0")]
549pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
550 static_assert_uimm_bits!(IMM8, 8);
551 vdpps(a, b, IMM8)
552}
553
554/// Horizontal addition of adjacent pairs in the two packed vectors
555/// of 4 64-bit floating points `a` and `b`.
556/// In the result, sums of elements from `a` are returned in even locations,
557/// while sums of elements from `b` are returned in odd locations.
558///
559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd)
560#[inline]
561#[target_feature(enable = "avx")]
562#[cfg_attr(test, assert_instr(vhaddpd))]
563#[stable(feature = "simd_x86", since = "1.27.0")]
564pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
565 vhaddpd(a, b)
566}
567
568/// Horizontal addition of adjacent pairs in the two packed vectors
569/// of 8 32-bit floating points `a` and `b`.
570/// In the result, sums of elements from `a` are returned in locations of
571/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
572/// 2, 3, 6, 7.
573///
574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps)
575#[inline]
576#[target_feature(enable = "avx")]
577#[cfg_attr(test, assert_instr(vhaddps))]
578#[stable(feature = "simd_x86", since = "1.27.0")]
579pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
580 vhaddps(a, b)
581}
582
583/// Horizontal subtraction of adjacent pairs in the two packed vectors
584/// of 4 64-bit floating points `a` and `b`.
585/// In the result, sums of elements from `a` are returned in even locations,
586/// while sums of elements from `b` are returned in odd locations.
587///
588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd)
589#[inline]
590#[target_feature(enable = "avx")]
591#[cfg_attr(test, assert_instr(vhsubpd))]
592#[stable(feature = "simd_x86", since = "1.27.0")]
593pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
594 vhsubpd(a, b)
595}
596
597/// Horizontal subtraction of adjacent pairs in the two packed vectors
598/// of 8 32-bit floating points `a` and `b`.
599/// In the result, sums of elements from `a` are returned in locations of
600/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
601/// 2, 3, 6, 7.
602///
603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps)
604#[inline]
605#[target_feature(enable = "avx")]
606#[cfg_attr(test, assert_instr(vhsubps))]
607#[stable(feature = "simd_x86", since = "1.27.0")]
608pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
609 vhsubps(a, b)
610}
611
612/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
613/// elements in `a` and `b`.
614///
615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
616#[inline]
617#[target_feature(enable = "avx")]
618// FIXME Should be 'vxorpd' instruction.
619#[cfg_attr(test, assert_instr(vxorps))]
620#[stable(feature = "simd_x86", since = "1.27.0")]
621pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
622 let a: u64x4 = transmute(src:a);
623 let b: u64x4 = transmute(src:b);
624 transmute(src:simd_xor(x:a, y:b))
625}
626
627/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
628/// elements in `a` and `b`.
629///
630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps)
631#[inline]
632#[target_feature(enable = "avx")]
633#[cfg_attr(test, assert_instr(vxorps))]
634#[stable(feature = "simd_x86", since = "1.27.0")]
635pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
636 let a: u32x8 = transmute(src:a);
637 let b: u32x8 = transmute(src:b);
638 transmute(src:simd_xor(x:a, y:b))
639}
640
641/// Equal (ordered, non-signaling)
642#[stable(feature = "simd_x86", since = "1.27.0")]
643pub const _CMP_EQ_OQ: i32 = 0x00;
644/// Less-than (ordered, signaling)
645#[stable(feature = "simd_x86", since = "1.27.0")]
646pub const _CMP_LT_OS: i32 = 0x01;
647/// Less-than-or-equal (ordered, signaling)
648#[stable(feature = "simd_x86", since = "1.27.0")]
649pub const _CMP_LE_OS: i32 = 0x02;
650/// Unordered (non-signaling)
651#[stable(feature = "simd_x86", since = "1.27.0")]
652pub const _CMP_UNORD_Q: i32 = 0x03;
653/// Not-equal (unordered, non-signaling)
654#[stable(feature = "simd_x86", since = "1.27.0")]
655pub const _CMP_NEQ_UQ: i32 = 0x04;
656/// Not-less-than (unordered, signaling)
657#[stable(feature = "simd_x86", since = "1.27.0")]
658pub const _CMP_NLT_US: i32 = 0x05;
659/// Not-less-than-or-equal (unordered, signaling)
660#[stable(feature = "simd_x86", since = "1.27.0")]
661pub const _CMP_NLE_US: i32 = 0x06;
662/// Ordered (non-signaling)
663#[stable(feature = "simd_x86", since = "1.27.0")]
664pub const _CMP_ORD_Q: i32 = 0x07;
665/// Equal (unordered, non-signaling)
666#[stable(feature = "simd_x86", since = "1.27.0")]
667pub const _CMP_EQ_UQ: i32 = 0x08;
668/// Not-greater-than-or-equal (unordered, signaling)
669#[stable(feature = "simd_x86", since = "1.27.0")]
670pub const _CMP_NGE_US: i32 = 0x09;
671/// Not-greater-than (unordered, signaling)
672#[stable(feature = "simd_x86", since = "1.27.0")]
673pub const _CMP_NGT_US: i32 = 0x0a;
674/// False (ordered, non-signaling)
675#[stable(feature = "simd_x86", since = "1.27.0")]
676pub const _CMP_FALSE_OQ: i32 = 0x0b;
677/// Not-equal (ordered, non-signaling)
678#[stable(feature = "simd_x86", since = "1.27.0")]
679pub const _CMP_NEQ_OQ: i32 = 0x0c;
680/// Greater-than-or-equal (ordered, signaling)
681#[stable(feature = "simd_x86", since = "1.27.0")]
682pub const _CMP_GE_OS: i32 = 0x0d;
683/// Greater-than (ordered, signaling)
684#[stable(feature = "simd_x86", since = "1.27.0")]
685pub const _CMP_GT_OS: i32 = 0x0e;
686/// True (unordered, non-signaling)
687#[stable(feature = "simd_x86", since = "1.27.0")]
688pub const _CMP_TRUE_UQ: i32 = 0x0f;
689/// Equal (ordered, signaling)
690#[stable(feature = "simd_x86", since = "1.27.0")]
691pub const _CMP_EQ_OS: i32 = 0x10;
692/// Less-than (ordered, non-signaling)
693#[stable(feature = "simd_x86", since = "1.27.0")]
694pub const _CMP_LT_OQ: i32 = 0x11;
695/// Less-than-or-equal (ordered, non-signaling)
696#[stable(feature = "simd_x86", since = "1.27.0")]
697pub const _CMP_LE_OQ: i32 = 0x12;
698/// Unordered (signaling)
699#[stable(feature = "simd_x86", since = "1.27.0")]
700pub const _CMP_UNORD_S: i32 = 0x13;
701/// Not-equal (unordered, signaling)
702#[stable(feature = "simd_x86", since = "1.27.0")]
703pub const _CMP_NEQ_US: i32 = 0x14;
704/// Not-less-than (unordered, non-signaling)
705#[stable(feature = "simd_x86", since = "1.27.0")]
706pub const _CMP_NLT_UQ: i32 = 0x15;
707/// Not-less-than-or-equal (unordered, non-signaling)
708#[stable(feature = "simd_x86", since = "1.27.0")]
709pub const _CMP_NLE_UQ: i32 = 0x16;
710/// Ordered (signaling)
711#[stable(feature = "simd_x86", since = "1.27.0")]
712pub const _CMP_ORD_S: i32 = 0x17;
713/// Equal (unordered, signaling)
714#[stable(feature = "simd_x86", since = "1.27.0")]
715pub const _CMP_EQ_US: i32 = 0x18;
716/// Not-greater-than-or-equal (unordered, non-signaling)
717#[stable(feature = "simd_x86", since = "1.27.0")]
718pub const _CMP_NGE_UQ: i32 = 0x19;
719/// Not-greater-than (unordered, non-signaling)
720#[stable(feature = "simd_x86", since = "1.27.0")]
721pub const _CMP_NGT_UQ: i32 = 0x1a;
722/// False (ordered, signaling)
723#[stable(feature = "simd_x86", since = "1.27.0")]
724pub const _CMP_FALSE_OS: i32 = 0x1b;
725/// Not-equal (ordered, signaling)
726#[stable(feature = "simd_x86", since = "1.27.0")]
727pub const _CMP_NEQ_OS: i32 = 0x1c;
728/// Greater-than-or-equal (ordered, non-signaling)
729#[stable(feature = "simd_x86", since = "1.27.0")]
730pub const _CMP_GE_OQ: i32 = 0x1d;
731/// Greater-than (ordered, non-signaling)
732#[stable(feature = "simd_x86", since = "1.27.0")]
733pub const _CMP_GT_OQ: i32 = 0x1e;
734/// True (unordered, signaling)
735#[stable(feature = "simd_x86", since = "1.27.0")]
736pub const _CMP_TRUE_US: i32 = 0x1f;
737
738/// Compares packed double-precision (64-bit) floating-point
739/// elements in `a` and `b` based on the comparison operand
740/// specified by `IMM5`.
741///
742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
743#[inline]
744#[target_feature(enable = "avx,sse2")]
745#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
746#[rustc_legacy_const_generics(2)]
747#[stable(feature = "simd_x86", since = "1.27.0")]
748pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
749 static_assert_uimm_bits!(IMM5, 5);
750 vcmppd(a, b, imm8:const { IMM5 as i8 })
751}
752
753/// Compares packed double-precision (64-bit) floating-point
754/// elements in `a` and `b` based on the comparison operand
755/// specified by `IMM5`.
756///
757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)
758#[inline]
759#[target_feature(enable = "avx")]
760#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
761#[rustc_legacy_const_generics(2)]
762#[stable(feature = "simd_x86", since = "1.27.0")]
763pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
764 static_assert_uimm_bits!(IMM5, 5);
765 vcmppd256(a, b, IMM5 as u8)
766}
767
768/// Compares packed single-precision (32-bit) floating-point
769/// elements in `a` and `b` based on the comparison operand
770/// specified by `IMM5`.
771///
772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
773#[inline]
774#[target_feature(enable = "avx,sse")]
775#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
776#[rustc_legacy_const_generics(2)]
777#[stable(feature = "simd_x86", since = "1.27.0")]
778pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
779 static_assert_uimm_bits!(IMM5, 5);
780 vcmpps(a, b, imm8:const { IMM5 as i8 })
781}
782
783/// Compares packed single-precision (32-bit) floating-point
784/// elements in `a` and `b` based on the comparison operand
785/// specified by `IMM5`.
786///
787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)
788#[inline]
789#[target_feature(enable = "avx")]
790#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
791#[rustc_legacy_const_generics(2)]
792#[stable(feature = "simd_x86", since = "1.27.0")]
793pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
794 static_assert_uimm_bits!(IMM5, 5);
795 vcmpps256(a, b, imm8:const { IMM5 as u8 })
796}
797
798/// Compares the lower double-precision (64-bit) floating-point element in
799/// `a` and `b` based on the comparison operand specified by `IMM5`,
800/// store the result in the lower element of returned vector,
801/// and copies the upper element from `a` to the upper element of returned
802/// vector.
803///
804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
805#[inline]
806#[target_feature(enable = "avx,sse2")]
807#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd
808#[rustc_legacy_const_generics(2)]
809#[stable(feature = "simd_x86", since = "1.27.0")]
810pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
811 static_assert_uimm_bits!(IMM5, 5);
812 vcmpsd(a, b, IMM5 as i8)
813}
814
815/// Compares the lower single-precision (32-bit) floating-point element in
816/// `a` and `b` based on the comparison operand specified by `IMM5`,
817/// store the result in the lower element of returned vector,
818/// and copies the upper 3 packed elements from `a` to the upper elements of
819/// returned vector.
820///
821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
822#[inline]
823#[target_feature(enable = "avx,sse")]
824#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss
825#[rustc_legacy_const_generics(2)]
826#[stable(feature = "simd_x86", since = "1.27.0")]
827pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
828 static_assert_uimm_bits!(IMM5, 5);
829 vcmpss(a, b, IMM5 as i8)
830}
831
832/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
833/// floating-point elements.
834///
835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd)
836#[inline]
837#[target_feature(enable = "avx")]
838#[cfg_attr(test, assert_instr(vcvtdq2pd))]
839#[stable(feature = "simd_x86", since = "1.27.0")]
840pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
841 simd_cast(a.as_i32x4())
842}
843
844/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
845/// floating-point elements.
846///
847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps)
848#[inline]
849#[target_feature(enable = "avx")]
850#[cfg_attr(test, assert_instr(vcvtdq2ps))]
851#[stable(feature = "simd_x86", since = "1.27.0")]
852pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
853 simd_cast(a.as_i32x8())
854}
855
856/// Converts packed double-precision (64-bit) floating-point elements in `a`
857/// to packed single-precision (32-bit) floating-point elements.
858///
859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps)
860#[inline]
861#[target_feature(enable = "avx")]
862#[cfg_attr(test, assert_instr(vcvtpd2ps))]
863#[stable(feature = "simd_x86", since = "1.27.0")]
864pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
865 simd_cast(a)
866}
867
868/// Converts packed single-precision (32-bit) floating-point elements in `a`
869/// to packed 32-bit integers.
870///
871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)
872#[inline]
873#[target_feature(enable = "avx")]
874#[cfg_attr(test, assert_instr(vcvtps2dq))]
875#[stable(feature = "simd_x86", since = "1.27.0")]
876pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
877 transmute(src:vcvtps2dq(a))
878}
879
880/// Converts packed single-precision (32-bit) floating-point elements in `a`
881/// to packed double-precision (64-bit) floating-point elements.
882///
883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd)
884#[inline]
885#[target_feature(enable = "avx")]
886#[cfg_attr(test, assert_instr(vcvtps2pd))]
887#[stable(feature = "simd_x86", since = "1.27.0")]
888pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
889 simd_cast(a)
890}
891
892/// Converts packed double-precision (64-bit) floating-point elements in `a`
893/// to packed 32-bit integers with truncation.
894///
895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)
896#[inline]
897#[target_feature(enable = "avx")]
898#[cfg_attr(test, assert_instr(vcvttpd2dq))]
899#[stable(feature = "simd_x86", since = "1.27.0")]
900pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
901 transmute(src:vcvttpd2dq(a))
902}
903
904/// Converts packed double-precision (64-bit) floating-point elements in `a`
905/// to packed 32-bit integers.
906///
907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)
908#[inline]
909#[target_feature(enable = "avx")]
910#[cfg_attr(test, assert_instr(vcvtpd2dq))]
911#[stable(feature = "simd_x86", since = "1.27.0")]
912pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
913 transmute(src:vcvtpd2dq(a))
914}
915
916/// Converts packed single-precision (32-bit) floating-point elements in `a`
917/// to packed 32-bit integers with truncation.
918///
919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)
920#[inline]
921#[target_feature(enable = "avx")]
922#[cfg_attr(test, assert_instr(vcvttps2dq))]
923#[stable(feature = "simd_x86", since = "1.27.0")]
924pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
925 transmute(src:vcvttps2dq(a))
926}
927
928/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
929/// floating-point elements) from `a`, selected with `imm8`.
930///
931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps)
932#[inline]
933#[target_feature(enable = "avx")]
934#[cfg_attr(
935 all(test, not(target_os = "windows")),
936 assert_instr(vextractf128, IMM1 = 1)
937)]
938#[rustc_legacy_const_generics(1)]
939#[stable(feature = "simd_x86", since = "1.27.0")]
940pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
941 static_assert_uimm_bits!(IMM1, 1);
942 simd_shuffle!(
943 a,
944 _mm256_undefined_ps(),
945 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
946 )
947}
948
949/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
950/// floating-point elements) from `a`, selected with `imm8`.
951///
952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd)
953#[inline]
954#[target_feature(enable = "avx")]
955#[cfg_attr(
956 all(test, not(target_os = "windows")),
957 assert_instr(vextractf128, IMM1 = 1)
958)]
959#[rustc_legacy_const_generics(1)]
960#[stable(feature = "simd_x86", since = "1.27.0")]
961pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
962 static_assert_uimm_bits!(IMM1, 1);
963 simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
964}
965
966/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
967///
968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256)
969#[inline]
970#[target_feature(enable = "avx")]
971#[cfg_attr(
972 all(test, not(target_os = "windows")),
973 assert_instr(vextractf128, IMM1 = 1)
974)]
975#[rustc_legacy_const_generics(1)]
976#[stable(feature = "simd_x86", since = "1.27.0")]
977pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
978 static_assert_uimm_bits!(IMM1, 1);
979 let dst: i64x2 = simd_shuffle!(
980 a.as_i64x4(),
981 _mm256_undefined_si256().as_i64x4(),
982 [[0, 1], [2, 3]][IMM1 as usize],
983 );
984 transmute(src:dst)
985}
986
987/// Zeroes the contents of all XMM or YMM registers.
988///
989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
990#[inline]
991#[target_feature(enable = "avx")]
992#[cfg_attr(test, assert_instr(vzeroall))]
993#[stable(feature = "simd_x86", since = "1.27.0")]
994pub unsafe fn _mm256_zeroall() {
995 vzeroall()
996}
997
998/// Zeroes the upper 128 bits of all YMM registers;
999/// the lower 128-bits of the registers are unmodified.
1000///
1001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
1002#[inline]
1003#[target_feature(enable = "avx")]
1004#[cfg_attr(test, assert_instr(vzeroupper))]
1005#[stable(feature = "simd_x86", since = "1.27.0")]
1006pub unsafe fn _mm256_zeroupper() {
1007 vzeroupper()
1008}
1009
1010/// Shuffles single-precision (32-bit) floating-point elements in `a`
1011/// within 128-bit lanes using the control in `b`.
1012///
1013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)
1014#[inline]
1015#[target_feature(enable = "avx")]
1016#[cfg_attr(test, assert_instr(vpermilps))]
1017#[stable(feature = "simd_x86", since = "1.27.0")]
1018pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1019 vpermilps256(a, b:b.as_i32x8())
1020}
1021
1022/// Shuffles single-precision (32-bit) floating-point elements in `a`
1023/// using the control in `b`.
1024///
1025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)
1026#[inline]
1027#[target_feature(enable = "avx")]
1028#[cfg_attr(test, assert_instr(vpermilps))]
1029#[stable(feature = "simd_x86", since = "1.27.0")]
1030pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1031 vpermilps(a, b:b.as_i32x4())
1032}
1033
1034/// Shuffles single-precision (32-bit) floating-point elements in `a`
1035/// within 128-bit lanes using the control in `imm8`.
1036///
1037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps)
1038#[inline]
1039#[target_feature(enable = "avx")]
1040#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1041#[rustc_legacy_const_generics(1)]
1042#[stable(feature = "simd_x86", since = "1.27.0")]
1043pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1044 static_assert_uimm_bits!(IMM8, 8);
1045 simd_shuffle!(
1046 a,
1047 _mm256_undefined_ps(),
1048 [
1049 (IMM8 as u32 >> 0) & 0b11,
1050 (IMM8 as u32 >> 2) & 0b11,
1051 (IMM8 as u32 >> 4) & 0b11,
1052 (IMM8 as u32 >> 6) & 0b11,
1053 ((IMM8 as u32 >> 0) & 0b11) + 4,
1054 ((IMM8 as u32 >> 2) & 0b11) + 4,
1055 ((IMM8 as u32 >> 4) & 0b11) + 4,
1056 ((IMM8 as u32 >> 6) & 0b11) + 4,
1057 ],
1058 )
1059}
1060
1061/// Shuffles single-precision (32-bit) floating-point elements in `a`
1062/// using the control in `imm8`.
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
1065#[inline]
1066#[target_feature(enable = "avx,sse")]
1067#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1068#[rustc_legacy_const_generics(1)]
1069#[stable(feature = "simd_x86", since = "1.27.0")]
1070pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1071 static_assert_uimm_bits!(IMM8, 8);
1072 simd_shuffle!(
1073 a,
1074 _mm_undefined_ps(),
1075 [
1076 (IMM8 as u32 >> 0) & 0b11,
1077 (IMM8 as u32 >> 2) & 0b11,
1078 (IMM8 as u32 >> 4) & 0b11,
1079 (IMM8 as u32 >> 6) & 0b11,
1080 ],
1081 )
1082}
1083
1084/// Shuffles double-precision (64-bit) floating-point elements in `a`
1085/// within 256-bit lanes using the control in `b`.
1086///
1087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)
1088#[inline]
1089#[target_feature(enable = "avx")]
1090#[cfg_attr(test, assert_instr(vpermilpd))]
1091#[stable(feature = "simd_x86", since = "1.27.0")]
1092pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1093 vpermilpd256(a, b:b.as_i64x4())
1094}
1095
1096/// Shuffles double-precision (64-bit) floating-point elements in `a`
1097/// using the control in `b`.
1098///
1099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)
1100#[inline]
1101#[target_feature(enable = "avx")]
1102#[cfg_attr(test, assert_instr(vpermilpd))]
1103#[stable(feature = "simd_x86", since = "1.27.0")]
1104pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1105 vpermilpd(a, b:b.as_i64x2())
1106}
1107
1108/// Shuffles double-precision (64-bit) floating-point elements in `a`
1109/// within 128-bit lanes using the control in `imm8`.
1110///
1111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd)
1112#[inline]
1113#[target_feature(enable = "avx")]
1114#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1115#[rustc_legacy_const_generics(1)]
1116#[stable(feature = "simd_x86", since = "1.27.0")]
1117pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1118 static_assert_uimm_bits!(IMM4, 4);
1119 simd_shuffle!(
1120 a,
1121 _mm256_undefined_pd(),
1122 [
1123 ((IMM4 as u32 >> 0) & 1),
1124 ((IMM4 as u32 >> 1) & 1),
1125 ((IMM4 as u32 >> 2) & 1) + 2,
1126 ((IMM4 as u32 >> 3) & 1) + 2,
1127 ],
1128 )
1129}
1130
1131/// Shuffles double-precision (64-bit) floating-point elements in `a`
1132/// using the control in `imm8`.
1133///
1134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
1135#[inline]
1136#[target_feature(enable = "avx,sse2")]
1137#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1138#[rustc_legacy_const_generics(1)]
1139#[stable(feature = "simd_x86", since = "1.27.0")]
1140pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1141 static_assert_uimm_bits!(IMM2, 2);
1142 simd_shuffle!(
1143 a,
1144 _mm_undefined_pd(),
1145 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1146 )
1147}
1148
1149/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
1150/// floating-point elements) selected by `imm8` from `a` and `b`.
1151///
1152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps)
1153#[inline]
1154#[target_feature(enable = "avx")]
1155#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1156#[rustc_legacy_const_generics(2)]
1157#[stable(feature = "simd_x86", since = "1.27.0")]
1158pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1159 static_assert_uimm_bits!(IMM8, 8);
1160 vperm2f128ps256(a, b, IMM8 as i8)
1161}
1162
1163/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
1164/// floating-point elements) selected by `imm8` from `a` and `b`.
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd)
1167#[inline]
1168#[target_feature(enable = "avx")]
1169#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1170#[rustc_legacy_const_generics(2)]
1171#[stable(feature = "simd_x86", since = "1.27.0")]
1172pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1173 static_assert_uimm_bits!(IMM8, 8);
1174 vperm2f128pd256(a, b, IMM8 as i8)
1175}
1176
1177/// Shuffles 128-bits (composed of integer data) selected by `imm8`
1178/// from `a` and `b`.
1179///
1180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256)
1181#[inline]
1182#[target_feature(enable = "avx")]
1183#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1184#[rustc_legacy_const_generics(2)]
1185#[stable(feature = "simd_x86", since = "1.27.0")]
1186pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1187 static_assert_uimm_bits!(IMM8, 8);
1188 transmute(src:vperm2f128si256(a:a.as_i32x8(), b:b.as_i32x8(), IMM8 as i8))
1189}
1190
1191/// Broadcasts a single-precision (32-bit) floating-point element from memory
1192/// to all elements of the returned vector.
1193///
1194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss)
1195#[inline]
1196#[target_feature(enable = "avx")]
1197#[cfg_attr(test, assert_instr(vbroadcastss))]
1198#[stable(feature = "simd_x86", since = "1.27.0")]
1199#[allow(clippy::trivially_copy_pass_by_ref)]
1200pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1201 _mm256_set1_ps(*f)
1202}
1203
1204/// Broadcasts a single-precision (32-bit) floating-point element from memory
1205/// to all elements of the returned vector.
1206///
1207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss)
1208#[inline]
1209#[target_feature(enable = "avx")]
1210#[cfg_attr(test, assert_instr(vbroadcastss))]
1211#[stable(feature = "simd_x86", since = "1.27.0")]
1212#[allow(clippy::trivially_copy_pass_by_ref)]
1213pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1214 _mm_set1_ps(*f)
1215}
1216
1217/// Broadcasts a double-precision (64-bit) floating-point element from memory
1218/// to all elements of the returned vector.
1219///
1220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd)
1221#[inline]
1222#[target_feature(enable = "avx")]
1223#[cfg_attr(test, assert_instr(vbroadcastsd))]
1224#[stable(feature = "simd_x86", since = "1.27.0")]
1225#[allow(clippy::trivially_copy_pass_by_ref)]
1226pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1227 _mm256_set1_pd(*f)
1228}
1229
1230/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
1231/// (32-bit) floating-point elements) to all elements of the returned vector.
1232///
1233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps)
1234#[inline]
1235#[target_feature(enable = "avx")]
1236#[cfg_attr(test, assert_instr(vbroadcastf128))]
1237#[stable(feature = "simd_x86", since = "1.27.0")]
1238pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1239 simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3])
1240}
1241
1242/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
1243/// (64-bit) floating-point elements) to all elements of the returned vector.
1244///
1245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd)
1246#[inline]
1247#[target_feature(enable = "avx")]
1248#[cfg_attr(test, assert_instr(vbroadcastf128))]
1249#[stable(feature = "simd_x86", since = "1.27.0")]
1250pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1251 simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1])
1252}
1253
1254/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
1255/// single-precision (32-bit) floating-point elements) from `b` into result
1256/// at the location specified by `imm8`.
1257///
1258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps)
1259#[inline]
1260#[target_feature(enable = "avx")]
1261#[cfg_attr(
1262 all(test, not(target_os = "windows")),
1263 assert_instr(vinsertf128, IMM1 = 1)
1264)]
1265#[rustc_legacy_const_generics(2)]
1266#[stable(feature = "simd_x86", since = "1.27.0")]
1267pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1268 static_assert_uimm_bits!(IMM1, 1);
1269 simd_shuffle!(
1270 a,
1271 _mm256_castps128_ps256(b),
1272 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1273 )
1274}
1275
1276/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
1277/// double-precision (64-bit) floating-point elements) from `b` into result
1278/// at the location specified by `imm8`.
1279///
1280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd)
1281#[inline]
1282#[target_feature(enable = "avx")]
1283#[cfg_attr(
1284 all(test, not(target_os = "windows")),
1285 assert_instr(vinsertf128, IMM1 = 1)
1286)]
1287#[rustc_legacy_const_generics(2)]
1288#[stable(feature = "simd_x86", since = "1.27.0")]
1289pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1290 static_assert_uimm_bits!(IMM1, 1);
1291 simd_shuffle!(
1292 a,
1293 _mm256_castpd128_pd256(b),
1294 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1295 )
1296}
1297
1298/// Copies `a` to result, then inserts 128 bits from `b` into result
1299/// at the location specified by `imm8`.
1300///
1301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256)
1302#[inline]
1303#[target_feature(enable = "avx")]
1304#[cfg_attr(
1305 all(test, not(target_os = "windows")),
1306 assert_instr(vinsertf128, IMM1 = 1)
1307)]
1308#[rustc_legacy_const_generics(2)]
1309#[stable(feature = "simd_x86", since = "1.27.0")]
1310pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1311 static_assert_uimm_bits!(IMM1, 1);
1312 let dst: i64x4 = simd_shuffle!(
1313 a.as_i64x4(),
1314 _mm256_castsi128_si256(b).as_i64x4(),
1315 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1316 );
1317 transmute(src:dst)
1318}
1319
1320/// Copies `a` to result, and inserts the 8-bit integer `i` into result
1321/// at the location specified by `index`.
1322///
1323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8)
1324#[inline]
1325#[target_feature(enable = "avx")]
1326// This intrinsic has no corresponding instruction.
1327#[rustc_legacy_const_generics(2)]
1328#[stable(feature = "simd_x86", since = "1.27.0")]
1329pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1330 static_assert_uimm_bits!(INDEX, 5);
1331 transmute(src:simd_insert!(a.as_i8x32(), INDEX as u32, i))
1332}
1333
1334/// Copies `a` to result, and inserts the 16-bit integer `i` into result
1335/// at the location specified by `index`.
1336///
1337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16)
1338#[inline]
1339#[target_feature(enable = "avx")]
1340// This intrinsic has no corresponding instruction.
1341#[rustc_legacy_const_generics(2)]
1342#[stable(feature = "simd_x86", since = "1.27.0")]
1343pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1344 static_assert_uimm_bits!(INDEX, 4);
1345 transmute(src:simd_insert!(a.as_i16x16(), INDEX as u32, i))
1346}
1347
1348/// Copies `a` to result, and inserts the 32-bit integer `i` into result
1349/// at the location specified by `index`.
1350///
1351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32)
1352#[inline]
1353#[target_feature(enable = "avx")]
1354// This intrinsic has no corresponding instruction.
1355#[rustc_legacy_const_generics(2)]
1356#[stable(feature = "simd_x86", since = "1.27.0")]
1357pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1358 static_assert_uimm_bits!(INDEX, 3);
1359 transmute(src:simd_insert!(a.as_i32x8(), INDEX as u32, i))
1360}
1361
1362/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1363/// floating-point elements) from memory into result.
1364/// `mem_addr` must be aligned on a 32-byte boundary or a
1365/// general-protection exception may be generated.
1366///
1367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1368#[inline]
1369#[target_feature(enable = "avx")]
1370#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1371#[stable(feature = "simd_x86", since = "1.27.0")]
1372#[allow(clippy::cast_ptr_alignment)]
1373pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1374 *(mem_addr as *const __m256d)
1375}
1376
1377/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1378/// floating-point elements) from `a` into memory.
1379/// `mem_addr` must be aligned on a 32-byte boundary or a
1380/// general-protection exception may be generated.
1381///
1382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1383#[inline]
1384#[target_feature(enable = "avx")]
1385#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1386#[stable(feature = "simd_x86", since = "1.27.0")]
1387#[allow(clippy::cast_ptr_alignment)]
1388pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1389 *(mem_addr as *mut __m256d) = a;
1390}
1391
1392/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1393/// floating-point elements) from memory into result.
1394/// `mem_addr` must be aligned on a 32-byte boundary or a
1395/// general-protection exception may be generated.
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
1398#[inline]
1399#[target_feature(enable = "avx")]
1400#[cfg_attr(test, assert_instr(vmovaps))]
1401#[stable(feature = "simd_x86", since = "1.27.0")]
1402#[allow(clippy::cast_ptr_alignment)]
1403pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1404 *(mem_addr as *const __m256)
1405}
1406
1407/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1408/// floating-point elements) from `a` into memory.
1409/// `mem_addr` must be aligned on a 32-byte boundary or a
1410/// general-protection exception may be generated.
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
1413#[inline]
1414#[target_feature(enable = "avx")]
1415#[cfg_attr(test, assert_instr(vmovaps))]
1416#[stable(feature = "simd_x86", since = "1.27.0")]
1417#[allow(clippy::cast_ptr_alignment)]
1418pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1419 *(mem_addr as *mut __m256) = a;
1420}
1421
1422/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1423/// floating-point elements) from memory into result.
1424/// `mem_addr` does not need to be aligned on any particular boundary.
1425///
1426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1427#[inline]
1428#[target_feature(enable = "avx")]
1429#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1430#[stable(feature = "simd_x86", since = "1.27.0")]
1431pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1432 let mut dst: __m256d = _mm256_undefined_pd();
1433 ptr::copy_nonoverlapping(
1434 src:mem_addr as *const u8,
1435 dst:ptr::addr_of_mut!(dst) as *mut u8,
1436 count:mem::size_of::<__m256d>(),
1437 );
1438 dst
1439}
1440
1441/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1442/// floating-point elements) from `a` into memory.
1443/// `mem_addr` does not need to be aligned on any particular boundary.
1444///
1445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1446#[inline]
1447#[target_feature(enable = "avx")]
1448#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1449#[stable(feature = "simd_x86", since = "1.27.0")]
1450pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1451 mem_addr.cast::<__m256d>().write_unaligned(val:a);
1452}
1453
1454/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1455/// floating-point elements) from memory into result.
1456/// `mem_addr` does not need to be aligned on any particular boundary.
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps)
1459#[inline]
1460#[target_feature(enable = "avx")]
1461#[cfg_attr(test, assert_instr(vmovups))]
1462#[stable(feature = "simd_x86", since = "1.27.0")]
1463pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1464 let mut dst: __m256 = _mm256_undefined_ps();
1465 ptr::copy_nonoverlapping(
1466 src:mem_addr as *const u8,
1467 dst:ptr::addr_of_mut!(dst) as *mut u8,
1468 count:mem::size_of::<__m256>(),
1469 );
1470 dst
1471}
1472
1473/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1474/// floating-point elements) from `a` into memory.
1475/// `mem_addr` does not need to be aligned on any particular boundary.
1476///
1477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps)
1478#[inline]
1479#[target_feature(enable = "avx")]
1480#[cfg_attr(test, assert_instr(vmovups))]
1481#[stable(feature = "simd_x86", since = "1.27.0")]
1482pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1483 mem_addr.cast::<__m256>().write_unaligned(val:a);
1484}
1485
1486/// Loads 256-bits of integer data from memory into result.
1487/// `mem_addr` must be aligned on a 32-byte boundary or a
1488/// general-protection exception may be generated.
1489///
1490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
1491#[inline]
1492#[target_feature(enable = "avx")]
1493#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1494#[stable(feature = "simd_x86", since = "1.27.0")]
1495pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1496 *mem_addr
1497}
1498
1499/// Stores 256-bits of integer data from `a` into memory.
1500/// `mem_addr` must be aligned on a 32-byte boundary or a
1501/// general-protection exception may be generated.
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
1504#[inline]
1505#[target_feature(enable = "avx")]
1506#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1507#[stable(feature = "simd_x86", since = "1.27.0")]
1508pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1509 *mem_addr = a;
1510}
1511
1512/// Loads 256-bits of integer data from memory into result.
1513/// `mem_addr` does not need to be aligned on any particular boundary.
1514///
1515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
1516#[inline]
1517#[target_feature(enable = "avx")]
1518#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1519#[stable(feature = "simd_x86", since = "1.27.0")]
1520pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1521 let mut dst: __m256i = _mm256_undefined_si256();
1522 ptr::copy_nonoverlapping(
1523 src:mem_addr as *const u8,
1524 dst:ptr::addr_of_mut!(dst) as *mut u8,
1525 count:mem::size_of::<__m256i>(),
1526 );
1527 dst
1528}
1529
1530/// Stores 256-bits of integer data from `a` into memory.
1531/// `mem_addr` does not need to be aligned on any particular boundary.
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
1534#[inline]
1535#[target_feature(enable = "avx")]
1536#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1537#[stable(feature = "simd_x86", since = "1.27.0")]
1538pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1539 mem_addr.write_unaligned(val:a);
1540}
1541
1542/// Loads packed double-precision (64-bit) floating-point elements from memory
1543/// into result using `mask` (elements are zeroed out when the high bit of the
1544/// corresponding element is not set).
1545///
1546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd)
1547#[inline]
1548#[target_feature(enable = "avx")]
1549#[cfg_attr(test, assert_instr(vmaskmovpd))]
1550#[stable(feature = "simd_x86", since = "1.27.0")]
1551pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1552 maskloadpd256(mem_addr as *const i8, mask:mask.as_i64x4())
1553}
1554
1555/// Stores packed double-precision (64-bit) floating-point elements from `a`
1556/// into memory using `mask`.
1557///
1558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd)
1559#[inline]
1560#[target_feature(enable = "avx")]
1561#[cfg_attr(test, assert_instr(vmaskmovpd))]
1562#[stable(feature = "simd_x86", since = "1.27.0")]
1563pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1564 maskstorepd256(mem_addr as *mut i8, mask:mask.as_i64x4(), a);
1565}
1566
1567/// Loads packed double-precision (64-bit) floating-point elements from memory
1568/// into result using `mask` (elements are zeroed out when the high bit of the
1569/// corresponding element is not set).
1570///
1571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd)
1572#[inline]
1573#[target_feature(enable = "avx")]
1574#[cfg_attr(test, assert_instr(vmaskmovpd))]
1575#[stable(feature = "simd_x86", since = "1.27.0")]
1576pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1577 maskloadpd(mem_addr as *const i8, mask:mask.as_i64x2())
1578}
1579
1580/// Stores packed double-precision (64-bit) floating-point elements from `a`
1581/// into memory using `mask`.
1582///
1583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd)
1584#[inline]
1585#[target_feature(enable = "avx")]
1586#[cfg_attr(test, assert_instr(vmaskmovpd))]
1587#[stable(feature = "simd_x86", since = "1.27.0")]
1588pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1589 maskstorepd(mem_addr as *mut i8, mask:mask.as_i64x2(), a);
1590}
1591
1592/// Loads packed single-precision (32-bit) floating-point elements from memory
1593/// into result using `mask` (elements are zeroed out when the high bit of the
1594/// corresponding element is not set).
1595///
1596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps)
1597#[inline]
1598#[target_feature(enable = "avx")]
1599#[cfg_attr(test, assert_instr(vmaskmovps))]
1600#[stable(feature = "simd_x86", since = "1.27.0")]
1601pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1602 maskloadps256(mem_addr as *const i8, mask:mask.as_i32x8())
1603}
1604
1605/// Stores packed single-precision (32-bit) floating-point elements from `a`
1606/// into memory using `mask`.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps)
1609#[inline]
1610#[target_feature(enable = "avx")]
1611#[cfg_attr(test, assert_instr(vmaskmovps))]
1612#[stable(feature = "simd_x86", since = "1.27.0")]
1613pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1614 maskstoreps256(mem_addr as *mut i8, mask:mask.as_i32x8(), a);
1615}
1616
1617/// Loads packed single-precision (32-bit) floating-point elements from memory
1618/// into result using `mask` (elements are zeroed out when the high bit of the
1619/// corresponding element is not set).
1620///
1621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps)
1622#[inline]
1623#[target_feature(enable = "avx")]
1624#[cfg_attr(test, assert_instr(vmaskmovps))]
1625#[stable(feature = "simd_x86", since = "1.27.0")]
1626pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1627 maskloadps(mem_addr as *const i8, mask:mask.as_i32x4())
1628}
1629
1630/// Stores packed single-precision (32-bit) floating-point elements from `a`
1631/// into memory using `mask`.
1632///
1633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps)
1634#[inline]
1635#[target_feature(enable = "avx")]
1636#[cfg_attr(test, assert_instr(vmaskmovps))]
1637#[stable(feature = "simd_x86", since = "1.27.0")]
1638pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1639 maskstoreps(mem_addr as *mut i8, mask:mask.as_i32x4(), a);
1640}
1641
1642/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
1643/// from `a`, and returns the results.
1644///
1645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps)
1646#[inline]
1647#[target_feature(enable = "avx")]
1648#[cfg_attr(test, assert_instr(vmovshdup))]
1649#[stable(feature = "simd_x86", since = "1.27.0")]
1650pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1651 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
1652}
1653
1654/// Duplicate even-indexed single-precision (32-bit) floating-point elements
1655/// from `a`, and returns the results.
1656///
1657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps)
1658#[inline]
1659#[target_feature(enable = "avx")]
1660#[cfg_attr(test, assert_instr(vmovsldup))]
1661#[stable(feature = "simd_x86", since = "1.27.0")]
1662pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1663 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
1664}
1665
1666/// Duplicate even-indexed double-precision (64-bit) floating-point elements
1667/// from `a`, and returns the results.
1668///
1669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd)
1670#[inline]
1671#[target_feature(enable = "avx")]
1672#[cfg_attr(test, assert_instr(vmovddup))]
1673#[stable(feature = "simd_x86", since = "1.27.0")]
1674pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1675 simd_shuffle!(a, a, [0, 0, 2, 2])
1676}
1677
1678/// Loads 256-bits of integer data from unaligned memory into result.
1679/// This intrinsic may perform better than `_mm256_loadu_si256` when the
1680/// data crosses a cache line boundary.
1681///
1682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)
1683#[inline]
1684#[target_feature(enable = "avx")]
1685#[cfg_attr(test, assert_instr(vlddqu))]
1686#[stable(feature = "simd_x86", since = "1.27.0")]
1687pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1688 transmute(src:vlddqu(mem_addr as *const i8))
1689}
1690
1691/// Moves integer data from a 256-bit integer vector to a 32-byte
1692/// aligned memory location. To minimize caching, the data is flagged as
1693/// non-temporal (unlikely to be used again soon)
1694///
1695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
1696///
1697/// # Safety of non-temporal stores
1698///
1699/// After using this intrinsic, but before any other access to the memory that this intrinsic
1700/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1701/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1702/// return.
1703///
1704/// See [`_mm_sfence`] for details.
1705#[inline]
1706#[target_feature(enable = "avx")]
1707#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
1708#[stable(feature = "simd_x86", since = "1.27.0")]
1709pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1710 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1711}
1712
1713/// Moves double-precision values from a 256-bit vector of `[4 x double]`
1714/// to a 32-byte aligned memory location. To minimize caching, the data is
1715/// flagged as non-temporal (unlikely to be used again soon).
1716///
1717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
1718///
1719/// # Safety of non-temporal stores
1720///
1721/// After using this intrinsic, but before any other access to the memory that this intrinsic
1722/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1723/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1724/// return.
1725///
1726/// See [`_mm_sfence`] for details.
1727#[inline]
1728#[target_feature(enable = "avx")]
1729#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
1730#[stable(feature = "simd_x86", since = "1.27.0")]
1731#[allow(clippy::cast_ptr_alignment)]
1732pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1733 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256d, val:a);
1734}
1735
1736/// Moves single-precision floating point values from a 256-bit vector
1737/// of `[8 x float]` to a 32-byte aligned memory location. To minimize
1738/// caching, the data is flagged as non-temporal (unlikely to be used again
1739/// soon).
1740///
1741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
1742///
1743/// # Safety of non-temporal stores
1744///
1745/// After using this intrinsic, but before any other access to the memory that this intrinsic
1746/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1747/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1748/// return.
1749///
1750/// See [`_mm_sfence`] for details.
1751#[inline]
1752#[target_feature(enable = "avx")]
1753#[cfg_attr(test, assert_instr(vmovntps))]
1754#[stable(feature = "simd_x86", since = "1.27.0")]
1755#[allow(clippy::cast_ptr_alignment)]
1756pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1757 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256, val:a);
1758}
1759
1760/// Computes the approximate reciprocal of packed single-precision (32-bit)
1761/// floating-point elements in `a`, and returns the results. The maximum
1762/// relative error for this approximation is less than 1.5*2^-12.
1763///
1764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)
1765#[inline]
1766#[target_feature(enable = "avx")]
1767#[cfg_attr(test, assert_instr(vrcpps))]
1768#[stable(feature = "simd_x86", since = "1.27.0")]
1769pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1770 vrcpps(a)
1771}
1772
1773/// Computes the approximate reciprocal square root of packed single-precision
1774/// (32-bit) floating-point elements in `a`, and returns the results.
1775/// The maximum relative error for this approximation is less than 1.5*2^-12.
1776///
1777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)
1778#[inline]
1779#[target_feature(enable = "avx")]
1780#[cfg_attr(test, assert_instr(vrsqrtps))]
1781#[stable(feature = "simd_x86", since = "1.27.0")]
1782pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1783 vrsqrtps(a)
1784}
1785
1786/// Unpacks and interleave double-precision (64-bit) floating-point elements
1787/// from the high half of each 128-bit lane in `a` and `b`.
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd)
1790#[inline]
1791#[target_feature(enable = "avx")]
1792#[cfg_attr(test, assert_instr(vunpckhpd))]
1793#[stable(feature = "simd_x86", since = "1.27.0")]
1794pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1795 simd_shuffle!(a, b, [1, 5, 3, 7])
1796}
1797
1798/// Unpacks and interleave single-precision (32-bit) floating-point elements
1799/// from the high half of each 128-bit lane in `a` and `b`.
1800///
1801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps)
1802#[inline]
1803#[target_feature(enable = "avx")]
1804#[cfg_attr(test, assert_instr(vunpckhps))]
1805#[stable(feature = "simd_x86", since = "1.27.0")]
1806pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1807 simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
1808}
1809
1810/// Unpacks and interleave double-precision (64-bit) floating-point elements
1811/// from the low half of each 128-bit lane in `a` and `b`.
1812///
1813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd)
1814#[inline]
1815#[target_feature(enable = "avx")]
1816#[cfg_attr(test, assert_instr(vunpcklpd))]
1817#[stable(feature = "simd_x86", since = "1.27.0")]
1818pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1819 simd_shuffle!(a, b, [0, 4, 2, 6])
1820}
1821
1822/// Unpacks and interleave single-precision (32-bit) floating-point elements
1823/// from the low half of each 128-bit lane in `a` and `b`.
1824///
1825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps)
1826#[inline]
1827#[target_feature(enable = "avx")]
1828#[cfg_attr(test, assert_instr(vunpcklps))]
1829#[stable(feature = "simd_x86", since = "1.27.0")]
1830pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1831 simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
1832}
1833
1834/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1835/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1836/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1837/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
1838///
1839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
1840#[inline]
1841#[target_feature(enable = "avx")]
1842#[cfg_attr(test, assert_instr(vptest))]
1843#[stable(feature = "simd_x86", since = "1.27.0")]
1844pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1845 ptestz256(a:a.as_i64x4(), b:b.as_i64x4())
1846}
1847
1848/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1849/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1850/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1851/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256)
1854#[inline]
1855#[target_feature(enable = "avx")]
1856#[cfg_attr(test, assert_instr(vptest))]
1857#[stable(feature = "simd_x86", since = "1.27.0")]
1858pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1859 ptestc256(a:a.as_i64x4(), b:b.as_i64x4())
1860}
1861
1862/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1863/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1864/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1865/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
1866/// `CF` values are zero, otherwise return 0.
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)
1869#[inline]
1870#[target_feature(enable = "avx")]
1871#[cfg_attr(test, assert_instr(vptest))]
1872#[stable(feature = "simd_x86", since = "1.27.0")]
1873pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1874 ptestnzc256(a:a.as_i64x4(), b:b.as_i64x4())
1875}
1876
1877/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1878/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1879/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1880/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1881/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1882/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1883/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1884///
1885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)
1886#[inline]
1887#[target_feature(enable = "avx")]
1888#[cfg_attr(test, assert_instr(vtestpd))]
1889#[stable(feature = "simd_x86", since = "1.27.0")]
1890pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1891 vtestzpd256(a, b)
1892}
1893
1894/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1895/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1896/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1897/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1898/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1899/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1900/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1901///
1902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)
1903#[inline]
1904#[target_feature(enable = "avx")]
1905#[cfg_attr(test, assert_instr(vtestpd))]
1906#[stable(feature = "simd_x86", since = "1.27.0")]
1907pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1908 vtestcpd256(a, b)
1909}
1910
1911/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1912/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1913/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1914/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1915/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1916/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1917/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1918/// are zero, otherwise return 0.
1919///
1920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)
1921#[inline]
1922#[target_feature(enable = "avx")]
1923#[cfg_attr(test, assert_instr(vtestpd))]
1924#[stable(feature = "simd_x86", since = "1.27.0")]
1925pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1926 vtestnzcpd256(a, b)
1927}
1928
1929/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1930/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1931/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1932/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1933/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1934/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1935/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd)
1938#[inline]
1939#[target_feature(enable = "avx")]
1940#[cfg_attr(test, assert_instr(vtestpd))]
1941#[stable(feature = "simd_x86", since = "1.27.0")]
1942pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1943 vtestzpd(a, b)
1944}
1945
1946/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1947/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1948/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1949/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1950/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1951/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1952/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1953///
1954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd)
1955#[inline]
1956#[target_feature(enable = "avx")]
1957#[cfg_attr(test, assert_instr(vtestpd))]
1958#[stable(feature = "simd_x86", since = "1.27.0")]
1959pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
1960 vtestcpd(a, b)
1961}
1962
1963/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1964/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1965/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1966/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1967/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1968/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1969/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1970/// are zero, otherwise return 0.
1971///
1972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)
1973#[inline]
1974#[target_feature(enable = "avx")]
1975#[cfg_attr(test, assert_instr(vtestpd))]
1976#[stable(feature = "simd_x86", since = "1.27.0")]
1977pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
1978 vtestnzcpd(a, b)
1979}
1980
1981/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1982/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1983/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1984/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1985/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1986/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1987/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1988///
1989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)
1990#[inline]
1991#[target_feature(enable = "avx")]
1992#[cfg_attr(test, assert_instr(vtestps))]
1993#[stable(feature = "simd_x86", since = "1.27.0")]
1994pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
1995 vtestzps256(a, b)
1996}
1997
1998/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1999/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2000/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2001/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2002/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2003/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2004/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2005///
2006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)
2007#[inline]
2008#[target_feature(enable = "avx")]
2009#[cfg_attr(test, assert_instr(vtestps))]
2010#[stable(feature = "simd_x86", since = "1.27.0")]
2011pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2012 vtestcps256(a, b)
2013}
2014
2015/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
2016/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2017/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2018/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2019/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2020/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2021/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2022/// are zero, otherwise return 0.
2023///
2024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)
2025#[inline]
2026#[target_feature(enable = "avx")]
2027#[cfg_attr(test, assert_instr(vtestps))]
2028#[stable(feature = "simd_x86", since = "1.27.0")]
2029pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2030 vtestnzcps256(a, b)
2031}
2032
2033/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2034/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2035/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2036/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2037/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2038/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2039/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2040///
2041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps)
2042#[inline]
2043#[target_feature(enable = "avx")]
2044#[cfg_attr(test, assert_instr(vtestps))]
2045#[stable(feature = "simd_x86", since = "1.27.0")]
2046pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2047 vtestzps(a, b)
2048}
2049
2050/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2051/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2052/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2053/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2054/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2055/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2056/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2057///
2058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps)
2059#[inline]
2060#[target_feature(enable = "avx")]
2061#[cfg_attr(test, assert_instr(vtestps))]
2062#[stable(feature = "simd_x86", since = "1.27.0")]
2063pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2064 vtestcps(a, b)
2065}
2066
2067/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2068/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2069/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2070/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2071/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2072/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2073/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2074/// are zero, otherwise return 0.
2075///
2076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)
2077#[inline]
2078#[target_feature(enable = "avx")]
2079#[cfg_attr(test, assert_instr(vtestps))]
2080#[stable(feature = "simd_x86", since = "1.27.0")]
2081pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2082 vtestnzcps(a, b)
2083}
2084
2085/// Sets each bit of the returned mask based on the most significant bit of the
2086/// corresponding packed double-precision (64-bit) floating-point element in
2087/// `a`.
2088///
2089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd)
2090#[inline]
2091#[target_feature(enable = "avx")]
2092#[cfg_attr(test, assert_instr(vmovmskpd))]
2093#[stable(feature = "simd_x86", since = "1.27.0")]
2094pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2095 // Propagate the highest bit to the rest, because simd_bitmask
2096 // requires all-1 or all-0.
2097 let mask: i64x4 = simd_lt(x:transmute(a), y:i64x4::splat(0));
2098 simd_bitmask::<i64x4, u8>(mask).into()
2099}
2100
2101/// Sets each bit of the returned mask based on the most significant bit of the
2102/// corresponding packed single-precision (32-bit) floating-point element in
2103/// `a`.
2104///
2105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
2106#[inline]
2107#[target_feature(enable = "avx")]
2108#[cfg_attr(test, assert_instr(vmovmskps))]
2109#[stable(feature = "simd_x86", since = "1.27.0")]
2110pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2111 // Propagate the highest bit to the rest, because simd_bitmask
2112 // requires all-1 or all-0.
2113 let mask: i32x8 = simd_lt(x:transmute(a), y:i32x8::splat(0));
2114 simd_bitmask::<i32x8, u8>(mask).into()
2115}
2116
2117/// Returns vector of type __m256d with all elements set to zero.
2118///
2119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2120#[inline]
2121#[target_feature(enable = "avx")]
2122#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
2123#[stable(feature = "simd_x86", since = "1.27.0")]
2124pub unsafe fn _mm256_setzero_pd() -> __m256d {
2125 _mm256_set1_pd(0.0)
2126}
2127
2128/// Returns vector of type __m256 with all elements set to zero.
2129///
2130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps)
2131#[inline]
2132#[target_feature(enable = "avx")]
2133#[cfg_attr(test, assert_instr(vxorps))]
2134#[stable(feature = "simd_x86", since = "1.27.0")]
2135pub unsafe fn _mm256_setzero_ps() -> __m256 {
2136 _mm256_set1_ps(0.0)
2137}
2138
2139/// Returns vector of type __m256i with all elements set to zero.
2140///
2141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
2142#[inline]
2143#[target_feature(enable = "avx")]
2144#[cfg_attr(test, assert_instr(vxor))]
2145#[stable(feature = "simd_x86", since = "1.27.0")]
2146pub unsafe fn _mm256_setzero_si256() -> __m256i {
2147 _mm256_set1_epi8(0)
2148}
2149
2150/// Sets packed double-precision (64-bit) floating-point elements in returned
2151/// vector with the supplied values.
2152///
2153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd)
2154#[inline]
2155#[target_feature(enable = "avx")]
2156// This intrinsic has no corresponding instruction.
2157#[cfg_attr(test, assert_instr(vinsertf128))]
2158#[stable(feature = "simd_x86", since = "1.27.0")]
2159pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2160 _mm256_setr_pd(a:d, b:c, c:b, d:a)
2161}
2162
2163/// Sets packed single-precision (32-bit) floating-point elements in returned
2164/// vector with the supplied values.
2165///
2166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps)
2167#[inline]
2168#[target_feature(enable = "avx")]
2169// This intrinsic has no corresponding instruction.
2170#[stable(feature = "simd_x86", since = "1.27.0")]
2171pub unsafe fn _mm256_set_ps(
2172 a: f32,
2173 b: f32,
2174 c: f32,
2175 d: f32,
2176 e: f32,
2177 f: f32,
2178 g: f32,
2179 h: f32,
2180) -> __m256 {
2181 _mm256_setr_ps(a:h, b:g, c:f, d:e, e:d, f:c, g:b, h:a)
2182}
2183
2184/// Sets packed 8-bit integers in returned vector with the supplied values.
2185///
2186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8)
2187#[inline]
2188#[target_feature(enable = "avx")]
2189// This intrinsic has no corresponding instruction.
2190#[stable(feature = "simd_x86", since = "1.27.0")]
2191pub unsafe fn _mm256_set_epi8(
2192 e00: i8,
2193 e01: i8,
2194 e02: i8,
2195 e03: i8,
2196 e04: i8,
2197 e05: i8,
2198 e06: i8,
2199 e07: i8,
2200 e08: i8,
2201 e09: i8,
2202 e10: i8,
2203 e11: i8,
2204 e12: i8,
2205 e13: i8,
2206 e14: i8,
2207 e15: i8,
2208 e16: i8,
2209 e17: i8,
2210 e18: i8,
2211 e19: i8,
2212 e20: i8,
2213 e21: i8,
2214 e22: i8,
2215 e23: i8,
2216 e24: i8,
2217 e25: i8,
2218 e26: i8,
2219 e27: i8,
2220 e28: i8,
2221 e29: i8,
2222 e30: i8,
2223 e31: i8,
2224) -> __m256i {
2225 #[rustfmt::skip]
2226 _mm256_setr_epi8(
2227 e00:e31, e01:e30, e02:e29, e03:e28, e04:e27, e05:e26, e06:e25, e07:e24,
2228 e08:e23, e09:e22, e10:e21, e11:e20, e12:e19, e13:e18, e14:e17, e15:e16,
2229 e16:e15, e17:e14, e18:e13, e19:e12, e20:e11, e21:e10, e22:e09, e23:e08,
2230 e24:e07, e25:e06, e26:e05, e27:e04, e28:e03, e29:e02, e30:e01, e31:e00,
2231 )
2232}
2233
2234/// Sets packed 16-bit integers in returned vector with the supplied values.
2235///
2236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16)
2237#[inline]
2238#[target_feature(enable = "avx")]
2239// This intrinsic has no corresponding instruction.
2240#[stable(feature = "simd_x86", since = "1.27.0")]
2241pub unsafe fn _mm256_set_epi16(
2242 e00: i16,
2243 e01: i16,
2244 e02: i16,
2245 e03: i16,
2246 e04: i16,
2247 e05: i16,
2248 e06: i16,
2249 e07: i16,
2250 e08: i16,
2251 e09: i16,
2252 e10: i16,
2253 e11: i16,
2254 e12: i16,
2255 e13: i16,
2256 e14: i16,
2257 e15: i16,
2258) -> __m256i {
2259 #[rustfmt::skip]
2260 _mm256_setr_epi16(
2261 e00:e15, e01:e14, e02:e13, e03:e12,
2262 e04:e11, e05:e10, e06:e09, e07:e08,
2263 e08:e07, e09:e06, e10:e05, e11:e04,
2264 e12:e03, e13:e02, e14:e01, e15:e00,
2265 )
2266}
2267
2268/// Sets packed 32-bit integers in returned vector with the supplied values.
2269///
2270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32)
2271#[inline]
2272#[target_feature(enable = "avx")]
2273// This intrinsic has no corresponding instruction.
2274#[stable(feature = "simd_x86", since = "1.27.0")]
2275pub unsafe fn _mm256_set_epi32(
2276 e0: i32,
2277 e1: i32,
2278 e2: i32,
2279 e3: i32,
2280 e4: i32,
2281 e5: i32,
2282 e6: i32,
2283 e7: i32,
2284) -> __m256i {
2285 _mm256_setr_epi32(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
2286}
2287
2288/// Sets packed 64-bit integers in returned vector with the supplied values.
2289///
2290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
2291#[inline]
2292#[target_feature(enable = "avx")]
2293// This intrinsic has no corresponding instruction.
2294#[stable(feature = "simd_x86", since = "1.27.0")]
2295pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2296 _mm256_setr_epi64x(a:d, b:c, c:b, d:a)
2297}
2298
2299/// Sets packed double-precision (64-bit) floating-point elements in returned
2300/// vector with the supplied values in reverse order.
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd)
2303#[inline]
2304#[target_feature(enable = "avx")]
2305// This intrinsic has no corresponding instruction.
2306#[stable(feature = "simd_x86", since = "1.27.0")]
2307pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2308 __m256d(a, b, c, d)
2309}
2310
2311/// Sets packed single-precision (32-bit) floating-point elements in returned
2312/// vector with the supplied values in reverse order.
2313///
2314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps)
2315#[inline]
2316#[target_feature(enable = "avx")]
2317// This intrinsic has no corresponding instruction.
2318#[stable(feature = "simd_x86", since = "1.27.0")]
2319pub unsafe fn _mm256_setr_ps(
2320 a: f32,
2321 b: f32,
2322 c: f32,
2323 d: f32,
2324 e: f32,
2325 f: f32,
2326 g: f32,
2327 h: f32,
2328) -> __m256 {
2329 __m256(a, b, c, d, e, f, g, h)
2330}
2331
2332/// Sets packed 8-bit integers in returned vector with the supplied values in
2333/// reverse order.
2334///
2335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8)
2336#[inline]
2337#[target_feature(enable = "avx")]
2338// This intrinsic has no corresponding instruction.
2339#[stable(feature = "simd_x86", since = "1.27.0")]
2340pub unsafe fn _mm256_setr_epi8(
2341 e00: i8,
2342 e01: i8,
2343 e02: i8,
2344 e03: i8,
2345 e04: i8,
2346 e05: i8,
2347 e06: i8,
2348 e07: i8,
2349 e08: i8,
2350 e09: i8,
2351 e10: i8,
2352 e11: i8,
2353 e12: i8,
2354 e13: i8,
2355 e14: i8,
2356 e15: i8,
2357 e16: i8,
2358 e17: i8,
2359 e18: i8,
2360 e19: i8,
2361 e20: i8,
2362 e21: i8,
2363 e22: i8,
2364 e23: i8,
2365 e24: i8,
2366 e25: i8,
2367 e26: i8,
2368 e27: i8,
2369 e28: i8,
2370 e29: i8,
2371 e30: i8,
2372 e31: i8,
2373) -> __m256i {
2374 #[rustfmt::skip]
2375 transmute(src:i8x32::new(
2376 x0:e00, x1:e01, x2:e02, x3:e03, x4:e04, x5:e05, x6:e06, x7:e07,
2377 x8:e08, x9:e09, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
2378 x16:e16, x17:e17, x18:e18, x19:e19, x20:e20, x21:e21, x22:e22, x23:e23,
2379 x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
2380 ))
2381}
2382
2383/// Sets packed 16-bit integers in returned vector with the supplied values in
2384/// reverse order.
2385///
2386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16)
2387#[inline]
2388#[target_feature(enable = "avx")]
2389// This intrinsic has no corresponding instruction.
2390#[stable(feature = "simd_x86", since = "1.27.0")]
2391pub unsafe fn _mm256_setr_epi16(
2392 e00: i16,
2393 e01: i16,
2394 e02: i16,
2395 e03: i16,
2396 e04: i16,
2397 e05: i16,
2398 e06: i16,
2399 e07: i16,
2400 e08: i16,
2401 e09: i16,
2402 e10: i16,
2403 e11: i16,
2404 e12: i16,
2405 e13: i16,
2406 e14: i16,
2407 e15: i16,
2408) -> __m256i {
2409 #[rustfmt::skip]
2410 transmute(src:i16x16::new(
2411 x0:e00, x1:e01, x2:e02, x3:e03,
2412 x4:e04, x5:e05, x6:e06, x7:e07,
2413 x8:e08, x9:e09, x10:e10, x11:e11,
2414 x12:e12, x13:e13, x14:e14, x15:e15,
2415 ))
2416}
2417
2418/// Sets packed 32-bit integers in returned vector with the supplied values in
2419/// reverse order.
2420///
2421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32)
2422#[inline]
2423#[target_feature(enable = "avx")]
2424// This intrinsic has no corresponding instruction.
2425#[stable(feature = "simd_x86", since = "1.27.0")]
2426pub unsafe fn _mm256_setr_epi32(
2427 e0: i32,
2428 e1: i32,
2429 e2: i32,
2430 e3: i32,
2431 e4: i32,
2432 e5: i32,
2433 e6: i32,
2434 e7: i32,
2435) -> __m256i {
2436 transmute(src:i32x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
2437}
2438
2439/// Sets packed 64-bit integers in returned vector with the supplied values in
2440/// reverse order.
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x)
2443#[inline]
2444#[target_feature(enable = "avx")]
2445// This intrinsic has no corresponding instruction.
2446#[stable(feature = "simd_x86", since = "1.27.0")]
2447pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2448 transmute(src:i64x4::new(x0:a, x1:b, x2:c, x3:d))
2449}
2450
2451/// Broadcasts double-precision (64-bit) floating-point value `a` to all
2452/// elements of returned vector.
2453///
2454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd)
2455#[inline]
2456#[target_feature(enable = "avx")]
2457// This intrinsic has no corresponding instruction.
2458#[stable(feature = "simd_x86", since = "1.27.0")]
2459pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2460 _mm256_setr_pd(a, b:a, c:a, d:a)
2461}
2462
2463/// Broadcasts single-precision (32-bit) floating-point value `a` to all
2464/// elements of returned vector.
2465///
2466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps)
2467#[inline]
2468#[target_feature(enable = "avx")]
2469// This intrinsic has no corresponding instruction.
2470#[stable(feature = "simd_x86", since = "1.27.0")]
2471pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2472 _mm256_setr_ps(a, b:a, c:a, d:a, e:a, f:a, g:a, h:a)
2473}
2474
2475/// Broadcasts 8-bit integer `a` to all elements of returned vector.
2476/// This intrinsic may generate the `vpbroadcastb`.
2477///
2478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8)
2479#[inline]
2480#[target_feature(enable = "avx")]
2481// This intrinsic has no corresponding instruction.
2482#[stable(feature = "simd_x86", since = "1.27.0")]
2483pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2484 #[rustfmt::skip]
2485 _mm256_setr_epi8(
2486 e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a,
2487 e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a,
2488 e16:a, e17:a, e18:a, e19:a, e20:a, e21:a, e22:a, e23:a,
2489 e24:a, e25:a, e26:a, e27:a, e28:a, e29:a, e30:a, e31:a,
2490 )
2491}
2492
2493/// Broadcasts 16-bit integer `a` to all elements of returned vector.
2494/// This intrinsic may generate the `vpbroadcastw`.
2495///
2496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
2497#[inline]
2498#[target_feature(enable = "avx")]
2499//#[cfg_attr(test, assert_instr(vpshufb))]
2500#[cfg_attr(test, assert_instr(vinsertf128))]
2501// This intrinsic has no corresponding instruction.
2502#[stable(feature = "simd_x86", since = "1.27.0")]
2503pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2504 _mm256_setr_epi16(e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a, e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a)
2505}
2506
2507/// Broadcasts 32-bit integer `a` to all elements of returned vector.
2508/// This intrinsic may generate the `vpbroadcastd`.
2509///
2510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32)
2511#[inline]
2512#[target_feature(enable = "avx")]
2513// This intrinsic has no corresponding instruction.
2514#[stable(feature = "simd_x86", since = "1.27.0")]
2515pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2516 _mm256_setr_epi32(e0:a, e1:a, e2:a, e3:a, e4:a, e5:a, e6:a, e7:a)
2517}
2518
2519/// Broadcasts 64-bit integer `a` to all elements of returned vector.
2520/// This intrinsic may generate the `vpbroadcastq`.
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
2523#[inline]
2524#[target_feature(enable = "avx")]
2525#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2526#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2527// This intrinsic has no corresponding instruction.
2528#[stable(feature = "simd_x86", since = "1.27.0")]
2529pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2530 _mm256_setr_epi64x(a, b:a, c:a, d:a)
2531}
2532
2533/// Cast vector of type __m256d to type __m256.
2534///
2535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps)
2536#[inline]
2537#[target_feature(enable = "avx")]
2538// This intrinsic is only used for compilation and does not generate any
2539// instructions, thus it has zero latency.
2540#[stable(feature = "simd_x86", since = "1.27.0")]
2541pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2542 transmute(src:a)
2543}
2544
2545/// Cast vector of type __m256 to type __m256d.
2546///
2547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd)
2548#[inline]
2549#[target_feature(enable = "avx")]
2550// This intrinsic is only used for compilation and does not generate any
2551// instructions, thus it has zero latency.
2552#[stable(feature = "simd_x86", since = "1.27.0")]
2553pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2554 transmute(src:a)
2555}
2556
2557/// Casts vector of type __m256 to type __m256i.
2558///
2559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
2560#[inline]
2561#[target_feature(enable = "avx")]
2562// This intrinsic is only used for compilation and does not generate any
2563// instructions, thus it has zero latency.
2564#[stable(feature = "simd_x86", since = "1.27.0")]
2565pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2566 transmute(src:a)
2567}
2568
2569/// Casts vector of type __m256i to type __m256.
2570///
2571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
2572#[inline]
2573#[target_feature(enable = "avx")]
2574// This intrinsic is only used for compilation and does not generate any
2575// instructions, thus it has zero latency.
2576#[stable(feature = "simd_x86", since = "1.27.0")]
2577pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2578 transmute(src:a)
2579}
2580
2581/// Casts vector of type __m256d to type __m256i.
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256)
2584#[inline]
2585#[target_feature(enable = "avx")]
2586// This intrinsic is only used for compilation and does not generate any
2587// instructions, thus it has zero latency.
2588#[stable(feature = "simd_x86", since = "1.27.0")]
2589pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2590 transmute(src:a)
2591}
2592
2593/// Casts vector of type __m256i to type __m256d.
2594///
2595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd)
2596#[inline]
2597#[target_feature(enable = "avx")]
2598// This intrinsic is only used for compilation and does not generate any
2599// instructions, thus it has zero latency.
2600#[stable(feature = "simd_x86", since = "1.27.0")]
2601pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2602 transmute(src:a)
2603}
2604
2605/// Casts vector of type __m256 to type __m128.
2606///
2607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128)
2608#[inline]
2609#[target_feature(enable = "avx")]
2610// This intrinsic is only used for compilation and does not generate any
2611// instructions, thus it has zero latency.
2612#[stable(feature = "simd_x86", since = "1.27.0")]
2613pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2614 simd_shuffle!(a, a, [0, 1, 2, 3])
2615}
2616
2617/// Casts vector of type __m256d to type __m128d.
2618///
2619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128)
2620#[inline]
2621#[target_feature(enable = "avx")]
2622// This intrinsic is only used for compilation and does not generate any
2623// instructions, thus it has zero latency.
2624#[stable(feature = "simd_x86", since = "1.27.0")]
2625pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2626 simd_shuffle!(a, a, [0, 1])
2627}
2628
2629/// Casts vector of type __m256i to type __m128i.
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128)
2632#[inline]
2633#[target_feature(enable = "avx")]
2634// This intrinsic is only used for compilation and does not generate any
2635// instructions, thus it has zero latency.
2636#[stable(feature = "simd_x86", since = "1.27.0")]
2637pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2638 let a: i64x4 = a.as_i64x4();
2639 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2640 transmute(src:dst)
2641}
2642
2643/// Casts vector of type __m128 to type __m256;
2644/// the upper 128 bits of the result are undefined.
2645///
2646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
2647#[inline]
2648#[target_feature(enable = "avx")]
2649// This intrinsic is only used for compilation and does not generate any
2650// instructions, thus it has zero latency.
2651#[stable(feature = "simd_x86", since = "1.27.0")]
2652pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2653 // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2654 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
2655}
2656
2657/// Casts vector of type __m128d to type __m256d;
2658/// the upper 128 bits of the result are undefined.
2659///
2660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
2661#[inline]
2662#[target_feature(enable = "avx")]
2663// This intrinsic is only used for compilation and does not generate any
2664// instructions, thus it has zero latency.
2665#[stable(feature = "simd_x86", since = "1.27.0")]
2666pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2667 // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2668 simd_shuffle!(a, a, [0, 1, 0, 0])
2669}
2670
2671/// Casts vector of type __m128i to type __m256i;
2672/// the upper 128 bits of the result are undefined.
2673///
2674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
2675#[inline]
2676#[target_feature(enable = "avx")]
2677// This intrinsic is only used for compilation and does not generate any
2678// instructions, thus it has zero latency.
2679#[stable(feature = "simd_x86", since = "1.27.0")]
2680pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2681 let a: i64x2 = a.as_i64x2();
2682 // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2683 let dst: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 0]);
2684 transmute(src:dst)
2685}
2686
2687/// Constructs a 256-bit floating-point vector of `[8 x float]` from a
2688/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain
2689/// the value of the source vector. The upper 128 bits are set to zero.
2690///
2691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
2692#[inline]
2693#[target_feature(enable = "avx,sse")]
2694// This intrinsic is only used for compilation and does not generate any
2695// instructions, thus it has zero latency.
2696#[stable(feature = "simd_x86", since = "1.27.0")]
2697pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2698 simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
2699}
2700
2701/// Constructs a 256-bit integer vector from a 128-bit integer vector.
2702/// The lower 128 bits contain the value of the source vector. The upper
2703/// 128 bits are set to zero.
2704///
2705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
2706#[inline]
2707#[target_feature(enable = "avx,sse2")]
2708// This intrinsic is only used for compilation and does not generate any
2709// instructions, thus it has zero latency.
2710#[stable(feature = "simd_x86", since = "1.27.0")]
2711pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2712 let b: i64x2 = _mm_setzero_si128().as_i64x2();
2713 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2714 transmute(src:dst)
2715}
2716
2717/// Constructs a 256-bit floating-point vector of `[4 x double]` from a
2718/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits
2719/// contain the value of the source vector. The upper 128 bits are set
2720/// to zero.
2721///
2722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
2723#[inline]
2724#[target_feature(enable = "avx,sse2")]
2725// This intrinsic is only used for compilation and does not generate any
2726// instructions, thus it has zero latency.
2727#[stable(feature = "simd_x86", since = "1.27.0")]
2728pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2729 simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3])
2730}
2731
2732/// Returns vector of type `__m256` with indeterminate elements.
2733/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2734/// In practice, this is equivalent to [`mem::zeroed`].
2735///
2736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps)
2737#[inline]
2738#[target_feature(enable = "avx")]
2739// This intrinsic has no corresponding instruction.
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm256_undefined_ps() -> __m256 {
2742 _mm256_set1_ps(0.0)
2743}
2744
2745/// Returns vector of type `__m256d` with indeterminate elements.
2746/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2747/// In practice, this is equivalent to [`mem::zeroed`].
2748///
2749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd)
2750#[inline]
2751#[target_feature(enable = "avx")]
2752// This intrinsic has no corresponding instruction.
2753#[stable(feature = "simd_x86", since = "1.27.0")]
2754pub unsafe fn _mm256_undefined_pd() -> __m256d {
2755 _mm256_set1_pd(0.0)
2756}
2757
2758/// Returns vector of type __m256i with with indeterminate elements.
2759/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2760/// In practice, this is equivalent to [`mem::zeroed`].
2761///
2762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256)
2763#[inline]
2764#[target_feature(enable = "avx")]
2765// This intrinsic has no corresponding instruction.
2766#[stable(feature = "simd_x86", since = "1.27.0")]
2767pub unsafe fn _mm256_undefined_si256() -> __m256i {
2768 __m256i(0, 0, 0, 0)
2769}
2770
2771/// Sets packed __m256 returned vector with the supplied values.
2772///
2773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128)
2774#[inline]
2775#[target_feature(enable = "avx")]
2776#[cfg_attr(test, assert_instr(vinsertf128))]
2777#[stable(feature = "simd_x86", since = "1.27.0")]
2778pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2779 simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
2780}
2781
2782/// Sets packed __m256d returned vector with the supplied values.
2783///
2784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d)
2785#[inline]
2786#[target_feature(enable = "avx")]
2787#[cfg_attr(test, assert_instr(vinsertf128))]
2788#[stable(feature = "simd_x86", since = "1.27.0")]
2789pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2790 let hi: __m128 = transmute(src:hi);
2791 let lo: __m128 = transmute(src:lo);
2792 transmute(src:_mm256_set_m128(hi, lo))
2793}
2794
2795/// Sets packed __m256i returned vector with the supplied values.
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
2798#[inline]
2799#[target_feature(enable = "avx")]
2800#[cfg_attr(test, assert_instr(vinsertf128))]
2801#[stable(feature = "simd_x86", since = "1.27.0")]
2802pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2803 let hi: __m128 = transmute(src:hi);
2804 let lo: __m128 = transmute(src:lo);
2805 transmute(src:_mm256_set_m128(hi, lo))
2806}
2807
2808/// Sets packed __m256 returned vector with the supplied values.
2809///
2810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128)
2811#[inline]
2812#[target_feature(enable = "avx")]
2813#[cfg_attr(test, assert_instr(vinsertf128))]
2814#[stable(feature = "simd_x86", since = "1.27.0")]
2815pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2816 _mm256_set_m128(hi, lo)
2817}
2818
2819/// Sets packed __m256d returned vector with the supplied values.
2820///
2821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d)
2822#[inline]
2823#[target_feature(enable = "avx")]
2824#[cfg_attr(test, assert_instr(vinsertf128))]
2825#[stable(feature = "simd_x86", since = "1.27.0")]
2826pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2827 _mm256_set_m128d(hi, lo)
2828}
2829
2830/// Sets packed __m256i returned vector with the supplied values.
2831///
2832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i)
2833#[inline]
2834#[target_feature(enable = "avx")]
2835#[cfg_attr(test, assert_instr(vinsertf128))]
2836#[stable(feature = "simd_x86", since = "1.27.0")]
2837pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2838 _mm256_set_m128i(hi, lo)
2839}
2840
2841/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
2842/// floating-point elements) from memory, and combine them into a 256-bit
2843/// value.
2844/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2845///
2846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
2847#[inline]
2848#[target_feature(enable = "avx,sse")]
2849// This intrinsic has no corresponding instruction.
2850#[stable(feature = "simd_x86", since = "1.27.0")]
2851pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2852 let a: __m256 = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2853 _mm256_insertf128_ps::<1>(a, b:_mm_loadu_ps(hiaddr))
2854}
2855
2856/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
2857/// floating-point elements) from memory, and combine them into a 256-bit
2858/// value.
2859/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
2862#[inline]
2863#[target_feature(enable = "avx,sse2")]
2864// This intrinsic has no corresponding instruction.
2865#[stable(feature = "simd_x86", since = "1.27.0")]
2866pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2867 let a: __m256d = _mm256_castpd128_pd256(_mm_loadu_pd(mem_addr:loaddr));
2868 _mm256_insertf128_pd::<1>(a, b:_mm_loadu_pd(mem_addr:hiaddr))
2869}
2870
2871/// Loads two 128-bit values (composed of integer data) from memory, and combine
2872/// them into a 256-bit value.
2873/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2874///
2875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
2876#[inline]
2877#[target_feature(enable = "avx,sse2")]
2878// This intrinsic has no corresponding instruction.
2879#[stable(feature = "simd_x86", since = "1.27.0")]
2880pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2881 let a: __m256i = _mm256_castsi128_si256(_mm_loadu_si128(mem_addr:loaddr));
2882 _mm256_insertf128_si256::<1>(a, b:_mm_loadu_si128(mem_addr:hiaddr))
2883}
2884
2885/// Stores the high and low 128-bit halves (each composed of 4 packed
2886/// single-precision (32-bit) floating-point elements) from `a` into memory two
2887/// different 128-bit locations.
2888/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2889///
2890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
2891#[inline]
2892#[target_feature(enable = "avx,sse")]
2893// This intrinsic has no corresponding instruction.
2894#[stable(feature = "simd_x86", since = "1.27.0")]
2895pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2896 let lo: __m128 = _mm256_castps256_ps128(a);
2897 _mm_storeu_ps(p:loaddr, a:lo);
2898 let hi: __m128 = _mm256_extractf128_ps::<1>(a);
2899 _mm_storeu_ps(p:hiaddr, a:hi);
2900}
2901
2902/// Stores the high and low 128-bit halves (each composed of 2 packed
2903/// double-precision (64-bit) floating-point elements) from `a` into memory two
2904/// different 128-bit locations.
2905/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2906///
2907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
2908#[inline]
2909#[target_feature(enable = "avx,sse2")]
2910// This intrinsic has no corresponding instruction.
2911#[stable(feature = "simd_x86", since = "1.27.0")]
2912pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2913 let lo: __m128d = _mm256_castpd256_pd128(a);
2914 _mm_storeu_pd(mem_addr:loaddr, a:lo);
2915 let hi: __m128d = _mm256_extractf128_pd::<1>(a);
2916 _mm_storeu_pd(mem_addr:hiaddr, a:hi);
2917}
2918
2919/// Stores the high and low 128-bit halves (each composed of integer data) from
2920/// `a` into memory two different 128-bit locations.
2921/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2922///
2923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
2924#[inline]
2925#[target_feature(enable = "avx,sse2")]
2926// This intrinsic has no corresponding instruction.
2927#[stable(feature = "simd_x86", since = "1.27.0")]
2928pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2929 let lo: __m128i = _mm256_castsi256_si128(a);
2930 _mm_storeu_si128(mem_addr:loaddr, a:lo);
2931 let hi: __m128i = _mm256_extractf128_si256::<1>(a);
2932 _mm_storeu_si128(mem_addr:hiaddr, a:hi);
2933}
2934
2935/// Returns the first element of the input vector of `[8 x float]`.
2936///
2937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32)
2938#[inline]
2939#[target_feature(enable = "avx")]
2940//#[cfg_attr(test, assert_instr(movss))] FIXME
2941#[stable(feature = "simd_x86", since = "1.27.0")]
2942pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2943 simd_extract!(a, 0)
2944}
2945
2946// LLVM intrinsics used in the above functions
2947#[allow(improper_ctypes)]
2948extern "C" {
2949 #[link_name = "llvm.x86.avx.round.pd.256"]
2950 fn roundpd256(a: __m256d, b: i32) -> __m256d;
2951 #[link_name = "llvm.x86.avx.round.ps.256"]
2952 fn roundps256(a: __m256, b: i32) -> __m256;
2953 #[link_name = "llvm.x86.avx.sqrt.ps.256"]
2954 fn sqrtps256(a: __m256) -> __m256;
2955 #[link_name = "llvm.x86.avx.dp.ps.256"]
2956 fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2957 #[link_name = "llvm.x86.avx.hadd.pd.256"]
2958 fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2959 #[link_name = "llvm.x86.avx.hadd.ps.256"]
2960 fn vhaddps(a: __m256, b: __m256) -> __m256;
2961 #[link_name = "llvm.x86.avx.hsub.pd.256"]
2962 fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
2963 #[link_name = "llvm.x86.avx.hsub.ps.256"]
2964 fn vhsubps(a: __m256, b: __m256) -> __m256;
2965 #[link_name = "llvm.x86.sse2.cmp.pd"]
2966 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2967 #[link_name = "llvm.x86.avx.cmp.pd.256"]
2968 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
2969 #[link_name = "llvm.x86.sse.cmp.ps"]
2970 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
2971 #[link_name = "llvm.x86.avx.cmp.ps.256"]
2972 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
2973 #[link_name = "llvm.x86.sse2.cmp.sd"]
2974 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2975 #[link_name = "llvm.x86.sse.cmp.ss"]
2976 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2977 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
2978 fn vcvtps2dq(a: __m256) -> i32x8;
2979 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
2980 fn vcvttpd2dq(a: __m256d) -> i32x4;
2981 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
2982 fn vcvtpd2dq(a: __m256d) -> i32x4;
2983 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
2984 fn vcvttps2dq(a: __m256) -> i32x8;
2985 #[link_name = "llvm.x86.avx.vzeroall"]
2986 fn vzeroall();
2987 #[link_name = "llvm.x86.avx.vzeroupper"]
2988 fn vzeroupper();
2989 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
2990 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
2991 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
2992 fn vpermilps(a: __m128, b: i32x4) -> __m128;
2993 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
2994 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
2995 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
2996 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
2997 #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
2998 fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
2999 #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
3000 fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
3001 #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
3002 fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
3003 #[link_name = "llvm.x86.avx.maskload.pd.256"]
3004 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
3005 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
3006 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
3007 #[link_name = "llvm.x86.avx.maskload.pd"]
3008 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
3009 #[link_name = "llvm.x86.avx.maskstore.pd"]
3010 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
3011 #[link_name = "llvm.x86.avx.maskload.ps.256"]
3012 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
3013 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
3014 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
3015 #[link_name = "llvm.x86.avx.maskload.ps"]
3016 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
3017 #[link_name = "llvm.x86.avx.maskstore.ps"]
3018 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3019 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3020 fn vlddqu(mem_addr: *const i8) -> i8x32;
3021 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3022 fn vrcpps(a: __m256) -> __m256;
3023 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3024 fn vrsqrtps(a: __m256) -> __m256;
3025 #[link_name = "llvm.x86.avx.ptestz.256"]
3026 fn ptestz256(a: i64x4, b: i64x4) -> i32;
3027 #[link_name = "llvm.x86.avx.ptestc.256"]
3028 fn ptestc256(a: i64x4, b: i64x4) -> i32;
3029 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3030 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3031 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3032 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3033 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3034 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3035 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3036 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3037 #[link_name = "llvm.x86.avx.vtestz.pd"]
3038 fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3039 #[link_name = "llvm.x86.avx.vtestc.pd"]
3040 fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3041 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3042 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3043 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3044 fn vtestzps256(a: __m256, b: __m256) -> i32;
3045 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3046 fn vtestcps256(a: __m256, b: __m256) -> i32;
3047 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3048 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3049 #[link_name = "llvm.x86.avx.vtestz.ps"]
3050 fn vtestzps(a: __m128, b: __m128) -> i32;
3051 #[link_name = "llvm.x86.avx.vtestc.ps"]
3052 fn vtestcps(a: __m128, b: __m128) -> i32;
3053 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3054 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3055 #[link_name = "llvm.x86.avx.min.ps.256"]
3056 fn vminps(a: __m256, b: __m256) -> __m256;
3057 #[link_name = "llvm.x86.avx.max.ps.256"]
3058 fn vmaxps(a: __m256, b: __m256) -> __m256;
3059 #[link_name = "llvm.x86.avx.min.pd.256"]
3060 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3061 #[link_name = "llvm.x86.avx.max.pd.256"]
3062 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3063}
3064
3065#[cfg(test)]
3066mod tests {
3067 use crate::hint::black_box;
3068 use crate::ptr;
3069 use stdarch_test::simd_test;
3070
3071 use crate::core_arch::x86::*;
3072
3073 #[simd_test(enable = "avx")]
3074 unsafe fn test_mm256_add_pd() {
3075 let a = _mm256_setr_pd(1., 2., 3., 4.);
3076 let b = _mm256_setr_pd(5., 6., 7., 8.);
3077 let r = _mm256_add_pd(a, b);
3078 let e = _mm256_setr_pd(6., 8., 10., 12.);
3079 assert_eq_m256d(r, e);
3080 }
3081
3082 #[simd_test(enable = "avx")]
3083 unsafe fn test_mm256_add_ps() {
3084 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3085 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3086 let r = _mm256_add_ps(a, b);
3087 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3088 assert_eq_m256(r, e);
3089 }
3090
3091 #[simd_test(enable = "avx")]
3092 unsafe fn test_mm256_and_pd() {
3093 let a = _mm256_set1_pd(1.);
3094 let b = _mm256_set1_pd(0.6);
3095 let r = _mm256_and_pd(a, b);
3096 let e = _mm256_set1_pd(0.5);
3097 assert_eq_m256d(r, e);
3098 }
3099
3100 #[simd_test(enable = "avx")]
3101 unsafe fn test_mm256_and_ps() {
3102 let a = _mm256_set1_ps(1.);
3103 let b = _mm256_set1_ps(0.6);
3104 let r = _mm256_and_ps(a, b);
3105 let e = _mm256_set1_ps(0.5);
3106 assert_eq_m256(r, e);
3107 }
3108
3109 #[simd_test(enable = "avx")]
3110 unsafe fn test_mm256_or_pd() {
3111 let a = _mm256_set1_pd(1.);
3112 let b = _mm256_set1_pd(0.6);
3113 let r = _mm256_or_pd(a, b);
3114 let e = _mm256_set1_pd(1.2);
3115 assert_eq_m256d(r, e);
3116 }
3117
3118 #[simd_test(enable = "avx")]
3119 unsafe fn test_mm256_or_ps() {
3120 let a = _mm256_set1_ps(1.);
3121 let b = _mm256_set1_ps(0.6);
3122 let r = _mm256_or_ps(a, b);
3123 let e = _mm256_set1_ps(1.2);
3124 assert_eq_m256(r, e);
3125 }
3126
3127 #[simd_test(enable = "avx")]
3128 unsafe fn test_mm256_shuffle_pd() {
3129 let a = _mm256_setr_pd(1., 4., 5., 8.);
3130 let b = _mm256_setr_pd(2., 3., 6., 7.);
3131 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3132 let e = _mm256_setr_pd(4., 3., 8., 7.);
3133 assert_eq_m256d(r, e);
3134 }
3135
3136 #[simd_test(enable = "avx")]
3137 unsafe fn test_mm256_shuffle_ps() {
3138 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3139 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3140 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3141 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3142 assert_eq_m256(r, e);
3143 }
3144
3145 #[simd_test(enable = "avx")]
3146 unsafe fn test_mm256_andnot_pd() {
3147 let a = _mm256_set1_pd(0.);
3148 let b = _mm256_set1_pd(0.6);
3149 let r = _mm256_andnot_pd(a, b);
3150 assert_eq_m256d(r, b);
3151 }
3152
3153 #[simd_test(enable = "avx")]
3154 unsafe fn test_mm256_andnot_ps() {
3155 let a = _mm256_set1_ps(0.);
3156 let b = _mm256_set1_ps(0.6);
3157 let r = _mm256_andnot_ps(a, b);
3158 assert_eq_m256(r, b);
3159 }
3160
3161 #[simd_test(enable = "avx")]
3162 unsafe fn test_mm256_max_pd() {
3163 let a = _mm256_setr_pd(1., 4., 5., 8.);
3164 let b = _mm256_setr_pd(2., 3., 6., 7.);
3165 let r = _mm256_max_pd(a, b);
3166 let e = _mm256_setr_pd(2., 4., 6., 8.);
3167 assert_eq_m256d(r, e);
3168 // > If the values being compared are both 0.0s (of either sign), the
3169 // > value in the second operand (source operand) is returned.
3170 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3171 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3172 let wu: [u64; 4] = transmute(w);
3173 let xu: [u64; 4] = transmute(x);
3174 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3175 assert_eq!(xu, [0u64; 4]);
3176 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3177 // > second operand (source operand), either a NaN or a valid
3178 // > floating-point value, is written to the result.
3179 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3180 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3181 let yf: [f64; 4] = transmute(y);
3182 let zf: [f64; 4] = transmute(z);
3183 assert_eq!(yf, [0.0; 4]);
3184 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3185 }
3186
3187 #[simd_test(enable = "avx")]
3188 unsafe fn test_mm256_max_ps() {
3189 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3190 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3191 let r = _mm256_max_ps(a, b);
3192 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3193 assert_eq_m256(r, e);
3194 // > If the values being compared are both 0.0s (of either sign), the
3195 // > value in the second operand (source operand) is returned.
3196 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3197 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3198 let wu: [u32; 8] = transmute(w);
3199 let xu: [u32; 8] = transmute(x);
3200 assert_eq!(wu, [0x8000_0000u32; 8]);
3201 assert_eq!(xu, [0u32; 8]);
3202 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3203 // > second operand (source operand), either a NaN or a valid
3204 // > floating-point value, is written to the result.
3205 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3206 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3207 let yf: [f32; 8] = transmute(y);
3208 let zf: [f32; 8] = transmute(z);
3209 assert_eq!(yf, [0.0; 8]);
3210 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3211 }
3212
3213 #[simd_test(enable = "avx")]
3214 unsafe fn test_mm256_min_pd() {
3215 let a = _mm256_setr_pd(1., 4., 5., 8.);
3216 let b = _mm256_setr_pd(2., 3., 6., 7.);
3217 let r = _mm256_min_pd(a, b);
3218 let e = _mm256_setr_pd(1., 3., 5., 7.);
3219 assert_eq_m256d(r, e);
3220 // > If the values being compared are both 0.0s (of either sign), the
3221 // > value in the second operand (source operand) is returned.
3222 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3223 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3224 let wu: [u64; 4] = transmute(w);
3225 let xu: [u64; 4] = transmute(x);
3226 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3227 assert_eq!(xu, [0u64; 4]);
3228 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3229 // > second operand (source operand), either a NaN or a valid
3230 // > floating-point value, is written to the result.
3231 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3232 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3233 let yf: [f64; 4] = transmute(y);
3234 let zf: [f64; 4] = transmute(z);
3235 assert_eq!(yf, [0.0; 4]);
3236 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3237 }
3238
3239 #[simd_test(enable = "avx")]
3240 unsafe fn test_mm256_min_ps() {
3241 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3242 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3243 let r = _mm256_min_ps(a, b);
3244 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3245 assert_eq_m256(r, e);
3246 // > If the values being compared are both 0.0s (of either sign), the
3247 // > value in the second operand (source operand) is returned.
3248 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3249 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3250 let wu: [u32; 8] = transmute(w);
3251 let xu: [u32; 8] = transmute(x);
3252 assert_eq!(wu, [0x8000_0000u32; 8]);
3253 assert_eq!(xu, [0u32; 8]);
3254 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3255 // > second operand (source operand), either a NaN or a valid
3256 // > floating-point value, is written to the result.
3257 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3258 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3259 let yf: [f32; 8] = transmute(y);
3260 let zf: [f32; 8] = transmute(z);
3261 assert_eq!(yf, [0.0; 8]);
3262 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3263 }
3264
3265 #[simd_test(enable = "avx")]
3266 unsafe fn test_mm256_mul_pd() {
3267 let a = _mm256_setr_pd(1., 2., 3., 4.);
3268 let b = _mm256_setr_pd(5., 6., 7., 8.);
3269 let r = _mm256_mul_pd(a, b);
3270 let e = _mm256_setr_pd(5., 12., 21., 32.);
3271 assert_eq_m256d(r, e);
3272 }
3273
3274 #[simd_test(enable = "avx")]
3275 unsafe fn test_mm256_mul_ps() {
3276 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3277 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3278 let r = _mm256_mul_ps(a, b);
3279 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3280 assert_eq_m256(r, e);
3281 }
3282
3283 #[simd_test(enable = "avx")]
3284 unsafe fn test_mm256_addsub_pd() {
3285 let a = _mm256_setr_pd(1., 2., 3., 4.);
3286 let b = _mm256_setr_pd(5., 6., 7., 8.);
3287 let r = _mm256_addsub_pd(a, b);
3288 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3289 assert_eq_m256d(r, e);
3290 }
3291
3292 #[simd_test(enable = "avx")]
3293 unsafe fn test_mm256_addsub_ps() {
3294 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3295 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3296 let r = _mm256_addsub_ps(a, b);
3297 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3298 assert_eq_m256(r, e);
3299 }
3300
3301 #[simd_test(enable = "avx")]
3302 unsafe fn test_mm256_sub_pd() {
3303 let a = _mm256_setr_pd(1., 2., 3., 4.);
3304 let b = _mm256_setr_pd(5., 6., 7., 8.);
3305 let r = _mm256_sub_pd(a, b);
3306 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3307 assert_eq_m256d(r, e);
3308 }
3309
3310 #[simd_test(enable = "avx")]
3311 unsafe fn test_mm256_sub_ps() {
3312 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3313 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3314 let r = _mm256_sub_ps(a, b);
3315 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3316 assert_eq_m256(r, e);
3317 }
3318
3319 #[simd_test(enable = "avx")]
3320 unsafe fn test_mm256_round_pd() {
3321 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3322 let result_closest = _mm256_round_pd::<0b0000>(a);
3323 let result_down = _mm256_round_pd::<0b0001>(a);
3324 let result_up = _mm256_round_pd::<0b0010>(a);
3325 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3326 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3327 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3328 assert_eq_m256d(result_closest, expected_closest);
3329 assert_eq_m256d(result_down, expected_down);
3330 assert_eq_m256d(result_up, expected_up);
3331 }
3332
3333 #[simd_test(enable = "avx")]
3334 unsafe fn test_mm256_floor_pd() {
3335 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3336 let result_down = _mm256_floor_pd(a);
3337 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3338 assert_eq_m256d(result_down, expected_down);
3339 }
3340
3341 #[simd_test(enable = "avx")]
3342 unsafe fn test_mm256_ceil_pd() {
3343 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3344 let result_up = _mm256_ceil_pd(a);
3345 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3346 assert_eq_m256d(result_up, expected_up);
3347 }
3348
3349 #[simd_test(enable = "avx")]
3350 unsafe fn test_mm256_round_ps() {
3351 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3352 let result_closest = _mm256_round_ps::<0b0000>(a);
3353 let result_down = _mm256_round_ps::<0b0001>(a);
3354 let result_up = _mm256_round_ps::<0b0010>(a);
3355 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3356 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3357 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3358 assert_eq_m256(result_closest, expected_closest);
3359 assert_eq_m256(result_down, expected_down);
3360 assert_eq_m256(result_up, expected_up);
3361 }
3362
3363 #[simd_test(enable = "avx")]
3364 unsafe fn test_mm256_floor_ps() {
3365 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3366 let result_down = _mm256_floor_ps(a);
3367 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3368 assert_eq_m256(result_down, expected_down);
3369 }
3370
3371 #[simd_test(enable = "avx")]
3372 unsafe fn test_mm256_ceil_ps() {
3373 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3374 let result_up = _mm256_ceil_ps(a);
3375 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3376 assert_eq_m256(result_up, expected_up);
3377 }
3378
3379 #[simd_test(enable = "avx")]
3380 unsafe fn test_mm256_sqrt_pd() {
3381 let a = _mm256_setr_pd(4., 9., 16., 25.);
3382 let r = _mm256_sqrt_pd(a);
3383 let e = _mm256_setr_pd(2., 3., 4., 5.);
3384 assert_eq_m256d(r, e);
3385 }
3386
3387 #[simd_test(enable = "avx")]
3388 unsafe fn test_mm256_sqrt_ps() {
3389 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3390 let r = _mm256_sqrt_ps(a);
3391 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3392 assert_eq_m256(r, e);
3393 }
3394
3395 #[simd_test(enable = "avx")]
3396 unsafe fn test_mm256_div_ps() {
3397 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3398 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3399 let r = _mm256_div_ps(a, b);
3400 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3401 assert_eq_m256(r, e);
3402 }
3403
3404 #[simd_test(enable = "avx")]
3405 unsafe fn test_mm256_div_pd() {
3406 let a = _mm256_setr_pd(4., 9., 16., 25.);
3407 let b = _mm256_setr_pd(4., 3., 2., 5.);
3408 let r = _mm256_div_pd(a, b);
3409 let e = _mm256_setr_pd(1., 3., 8., 5.);
3410 assert_eq_m256d(r, e);
3411 }
3412
3413 #[simd_test(enable = "avx")]
3414 unsafe fn test_mm256_blend_pd() {
3415 let a = _mm256_setr_pd(4., 9., 16., 25.);
3416 let b = _mm256_setr_pd(4., 3., 2., 5.);
3417 let r = _mm256_blend_pd::<0x0>(a, b);
3418 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3419 let r = _mm256_blend_pd::<0x3>(a, b);
3420 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3421 let r = _mm256_blend_pd::<0xF>(a, b);
3422 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3423 }
3424
3425 #[simd_test(enable = "avx")]
3426 unsafe fn test_mm256_blend_ps() {
3427 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3428 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3429 let r = _mm256_blend_ps::<0x0>(a, b);
3430 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3431 let r = _mm256_blend_ps::<0x3>(a, b);
3432 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3433 let r = _mm256_blend_ps::<0xF>(a, b);
3434 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3435 }
3436
3437 #[simd_test(enable = "avx")]
3438 unsafe fn test_mm256_blendv_pd() {
3439 let a = _mm256_setr_pd(4., 9., 16., 25.);
3440 let b = _mm256_setr_pd(4., 3., 2., 5.);
3441 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3442 let r = _mm256_blendv_pd(a, b, c);
3443 let e = _mm256_setr_pd(4., 9., 2., 5.);
3444 assert_eq_m256d(r, e);
3445 }
3446
3447 #[simd_test(enable = "avx")]
3448 unsafe fn test_mm256_blendv_ps() {
3449 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3450 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3451 #[rustfmt::skip]
3452 let c = _mm256_setr_ps(
3453 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3454 );
3455 let r = _mm256_blendv_ps(a, b, c);
3456 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3457 assert_eq_m256(r, e);
3458 }
3459
3460 #[simd_test(enable = "avx")]
3461 unsafe fn test_mm256_dp_ps() {
3462 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3463 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3464 let r = _mm256_dp_ps::<0xFF>(a, b);
3465 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3466 assert_eq_m256(r, e);
3467 }
3468
3469 #[simd_test(enable = "avx")]
3470 unsafe fn test_mm256_hadd_pd() {
3471 let a = _mm256_setr_pd(4., 9., 16., 25.);
3472 let b = _mm256_setr_pd(4., 3., 2., 5.);
3473 let r = _mm256_hadd_pd(a, b);
3474 let e = _mm256_setr_pd(13., 7., 41., 7.);
3475 assert_eq_m256d(r, e);
3476
3477 let a = _mm256_setr_pd(1., 2., 3., 4.);
3478 let b = _mm256_setr_pd(5., 6., 7., 8.);
3479 let r = _mm256_hadd_pd(a, b);
3480 let e = _mm256_setr_pd(3., 11., 7., 15.);
3481 assert_eq_m256d(r, e);
3482 }
3483
3484 #[simd_test(enable = "avx")]
3485 unsafe fn test_mm256_hadd_ps() {
3486 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3487 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3488 let r = _mm256_hadd_ps(a, b);
3489 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3490 assert_eq_m256(r, e);
3491
3492 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3493 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3494 let r = _mm256_hadd_ps(a, b);
3495 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3496 assert_eq_m256(r, e);
3497 }
3498
3499 #[simd_test(enable = "avx")]
3500 unsafe fn test_mm256_hsub_pd() {
3501 let a = _mm256_setr_pd(4., 9., 16., 25.);
3502 let b = _mm256_setr_pd(4., 3., 2., 5.);
3503 let r = _mm256_hsub_pd(a, b);
3504 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3505 assert_eq_m256d(r, e);
3506
3507 let a = _mm256_setr_pd(1., 2., 3., 4.);
3508 let b = _mm256_setr_pd(5., 6., 7., 8.);
3509 let r = _mm256_hsub_pd(a, b);
3510 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3511 assert_eq_m256d(r, e);
3512 }
3513
3514 #[simd_test(enable = "avx")]
3515 unsafe fn test_mm256_hsub_ps() {
3516 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3517 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3518 let r = _mm256_hsub_ps(a, b);
3519 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3520 assert_eq_m256(r, e);
3521
3522 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3523 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3524 let r = _mm256_hsub_ps(a, b);
3525 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3526 assert_eq_m256(r, e);
3527 }
3528
3529 #[simd_test(enable = "avx")]
3530 unsafe fn test_mm256_xor_pd() {
3531 let a = _mm256_setr_pd(4., 9., 16., 25.);
3532 let b = _mm256_set1_pd(0.);
3533 let r = _mm256_xor_pd(a, b);
3534 assert_eq_m256d(r, a);
3535 }
3536
3537 #[simd_test(enable = "avx")]
3538 unsafe fn test_mm256_xor_ps() {
3539 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3540 let b = _mm256_set1_ps(0.);
3541 let r = _mm256_xor_ps(a, b);
3542 assert_eq_m256(r, a);
3543 }
3544
3545 #[simd_test(enable = "avx")]
3546 unsafe fn test_mm_cmp_pd() {
3547 let a = _mm_setr_pd(4., 9.);
3548 let b = _mm_setr_pd(4., 3.);
3549 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3550 assert!(get_m128d(r, 0).is_nan());
3551 assert!(get_m128d(r, 1).is_nan());
3552 }
3553
3554 #[simd_test(enable = "avx")]
3555 unsafe fn test_mm256_cmp_pd() {
3556 let a = _mm256_setr_pd(1., 2., 3., 4.);
3557 let b = _mm256_setr_pd(5., 6., 7., 8.);
3558 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3559 let e = _mm256_set1_pd(0.);
3560 assert_eq_m256d(r, e);
3561 }
3562
3563 #[simd_test(enable = "avx")]
3564 unsafe fn test_mm_cmp_ps() {
3565 let a = _mm_setr_ps(4., 3., 2., 5.);
3566 let b = _mm_setr_ps(4., 9., 16., 25.);
3567 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3568 assert!(get_m128(r, 0).is_nan());
3569 assert_eq!(get_m128(r, 1), 0.);
3570 assert_eq!(get_m128(r, 2), 0.);
3571 assert_eq!(get_m128(r, 3), 0.);
3572 }
3573
3574 #[simd_test(enable = "avx")]
3575 unsafe fn test_mm256_cmp_ps() {
3576 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3577 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3578 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3579 let e = _mm256_set1_ps(0.);
3580 assert_eq_m256(r, e);
3581 }
3582
3583 #[simd_test(enable = "avx")]
3584 unsafe fn test_mm_cmp_sd() {
3585 let a = _mm_setr_pd(4., 9.);
3586 let b = _mm_setr_pd(4., 3.);
3587 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3588 assert!(get_m128d(r, 0).is_nan());
3589 assert_eq!(get_m128d(r, 1), 9.);
3590 }
3591
3592 #[simd_test(enable = "avx")]
3593 unsafe fn test_mm_cmp_ss() {
3594 let a = _mm_setr_ps(4., 3., 2., 5.);
3595 let b = _mm_setr_ps(4., 9., 16., 25.);
3596 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3597 assert!(get_m128(r, 0).is_nan());
3598 assert_eq!(get_m128(r, 1), 3.);
3599 assert_eq!(get_m128(r, 2), 2.);
3600 assert_eq!(get_m128(r, 3), 5.);
3601 }
3602
3603 #[simd_test(enable = "avx")]
3604 unsafe fn test_mm256_cvtepi32_pd() {
3605 let a = _mm_setr_epi32(4, 9, 16, 25);
3606 let r = _mm256_cvtepi32_pd(a);
3607 let e = _mm256_setr_pd(4., 9., 16., 25.);
3608 assert_eq_m256d(r, e);
3609 }
3610
3611 #[simd_test(enable = "avx")]
3612 unsafe fn test_mm256_cvtepi32_ps() {
3613 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3614 let r = _mm256_cvtepi32_ps(a);
3615 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3616 assert_eq_m256(r, e);
3617 }
3618
3619 #[simd_test(enable = "avx")]
3620 unsafe fn test_mm256_cvtpd_ps() {
3621 let a = _mm256_setr_pd(4., 9., 16., 25.);
3622 let r = _mm256_cvtpd_ps(a);
3623 let e = _mm_setr_ps(4., 9., 16., 25.);
3624 assert_eq_m128(r, e);
3625 }
3626
3627 #[simd_test(enable = "avx")]
3628 unsafe fn test_mm256_cvtps_epi32() {
3629 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3630 let r = _mm256_cvtps_epi32(a);
3631 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3632 assert_eq_m256i(r, e);
3633 }
3634
3635 #[simd_test(enable = "avx")]
3636 unsafe fn test_mm256_cvtps_pd() {
3637 let a = _mm_setr_ps(4., 9., 16., 25.);
3638 let r = _mm256_cvtps_pd(a);
3639 let e = _mm256_setr_pd(4., 9., 16., 25.);
3640 assert_eq_m256d(r, e);
3641 }
3642
3643 #[simd_test(enable = "avx")]
3644 unsafe fn test_mm256_cvttpd_epi32() {
3645 let a = _mm256_setr_pd(4., 9., 16., 25.);
3646 let r = _mm256_cvttpd_epi32(a);
3647 let e = _mm_setr_epi32(4, 9, 16, 25);
3648 assert_eq_m128i(r, e);
3649 }
3650
3651 #[simd_test(enable = "avx")]
3652 unsafe fn test_mm256_cvtpd_epi32() {
3653 let a = _mm256_setr_pd(4., 9., 16., 25.);
3654 let r = _mm256_cvtpd_epi32(a);
3655 let e = _mm_setr_epi32(4, 9, 16, 25);
3656 assert_eq_m128i(r, e);
3657 }
3658
3659 #[simd_test(enable = "avx")]
3660 unsafe fn test_mm256_cvttps_epi32() {
3661 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3662 let r = _mm256_cvttps_epi32(a);
3663 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3664 assert_eq_m256i(r, e);
3665 }
3666
3667 #[simd_test(enable = "avx")]
3668 unsafe fn test_mm256_extractf128_ps() {
3669 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3670 let r = _mm256_extractf128_ps::<0>(a);
3671 let e = _mm_setr_ps(4., 3., 2., 5.);
3672 assert_eq_m128(r, e);
3673 }
3674
3675 #[simd_test(enable = "avx")]
3676 unsafe fn test_mm256_extractf128_pd() {
3677 let a = _mm256_setr_pd(4., 3., 2., 5.);
3678 let r = _mm256_extractf128_pd::<0>(a);
3679 let e = _mm_setr_pd(4., 3.);
3680 assert_eq_m128d(r, e);
3681 }
3682
3683 #[simd_test(enable = "avx")]
3684 unsafe fn test_mm256_extractf128_si256() {
3685 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3686 let r = _mm256_extractf128_si256::<0>(a);
3687 let e = _mm_setr_epi64x(4, 3);
3688 assert_eq_m128i(r, e);
3689 }
3690
3691 #[simd_test(enable = "avx")]
3692 unsafe fn test_mm256_zeroall() {
3693 _mm256_zeroall();
3694 }
3695
3696 #[simd_test(enable = "avx")]
3697 unsafe fn test_mm256_zeroupper() {
3698 _mm256_zeroupper();
3699 }
3700
3701 #[simd_test(enable = "avx")]
3702 unsafe fn test_mm256_permutevar_ps() {
3703 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3704 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3705 let r = _mm256_permutevar_ps(a, b);
3706 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3707 assert_eq_m256(r, e);
3708 }
3709
3710 #[simd_test(enable = "avx")]
3711 unsafe fn test_mm_permutevar_ps() {
3712 let a = _mm_setr_ps(4., 3., 2., 5.);
3713 let b = _mm_setr_epi32(1, 2, 3, 4);
3714 let r = _mm_permutevar_ps(a, b);
3715 let e = _mm_setr_ps(3., 2., 5., 4.);
3716 assert_eq_m128(r, e);
3717 }
3718
3719 #[simd_test(enable = "avx")]
3720 unsafe fn test_mm256_permute_ps() {
3721 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3722 let r = _mm256_permute_ps::<0x1b>(a);
3723 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3724 assert_eq_m256(r, e);
3725 }
3726
3727 #[simd_test(enable = "avx")]
3728 unsafe fn test_mm_permute_ps() {
3729 let a = _mm_setr_ps(4., 3., 2., 5.);
3730 let r = _mm_permute_ps::<0x1b>(a);
3731 let e = _mm_setr_ps(5., 2., 3., 4.);
3732 assert_eq_m128(r, e);
3733 }
3734
3735 #[simd_test(enable = "avx")]
3736 unsafe fn test_mm256_permutevar_pd() {
3737 let a = _mm256_setr_pd(4., 3., 2., 5.);
3738 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3739 let r = _mm256_permutevar_pd(a, b);
3740 let e = _mm256_setr_pd(4., 3., 5., 2.);
3741 assert_eq_m256d(r, e);
3742 }
3743
3744 #[simd_test(enable = "avx")]
3745 unsafe fn test_mm_permutevar_pd() {
3746 let a = _mm_setr_pd(4., 3.);
3747 let b = _mm_setr_epi64x(3, 0);
3748 let r = _mm_permutevar_pd(a, b);
3749 let e = _mm_setr_pd(3., 4.);
3750 assert_eq_m128d(r, e);
3751 }
3752
3753 #[simd_test(enable = "avx")]
3754 unsafe fn test_mm256_permute_pd() {
3755 let a = _mm256_setr_pd(4., 3., 2., 5.);
3756 let r = _mm256_permute_pd::<5>(a);
3757 let e = _mm256_setr_pd(3., 4., 5., 2.);
3758 assert_eq_m256d(r, e);
3759 }
3760
3761 #[simd_test(enable = "avx")]
3762 unsafe fn test_mm_permute_pd() {
3763 let a = _mm_setr_pd(4., 3.);
3764 let r = _mm_permute_pd::<1>(a);
3765 let e = _mm_setr_pd(3., 4.);
3766 assert_eq_m128d(r, e);
3767 }
3768
3769 #[simd_test(enable = "avx")]
3770 unsafe fn test_mm256_permute2f128_ps() {
3771 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3772 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3773 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3774 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3775 assert_eq_m256(r, e);
3776 }
3777
3778 #[simd_test(enable = "avx")]
3779 unsafe fn test_mm256_permute2f128_pd() {
3780 let a = _mm256_setr_pd(1., 2., 3., 4.);
3781 let b = _mm256_setr_pd(5., 6., 7., 8.);
3782 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3783 let e = _mm256_setr_pd(3., 4., 7., 8.);
3784 assert_eq_m256d(r, e);
3785 }
3786
3787 #[simd_test(enable = "avx")]
3788 unsafe fn test_mm256_permute2f128_si256() {
3789 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3790 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3791 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3792 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3793 assert_eq_m256i(r, e);
3794 }
3795
3796 #[simd_test(enable = "avx")]
3797 unsafe fn test_mm256_broadcast_ss() {
3798 let r = _mm256_broadcast_ss(&3.);
3799 let e = _mm256_set1_ps(3.);
3800 assert_eq_m256(r, e);
3801 }
3802
3803 #[simd_test(enable = "avx")]
3804 unsafe fn test_mm_broadcast_ss() {
3805 let r = _mm_broadcast_ss(&3.);
3806 let e = _mm_set1_ps(3.);
3807 assert_eq_m128(r, e);
3808 }
3809
3810 #[simd_test(enable = "avx")]
3811 unsafe fn test_mm256_broadcast_sd() {
3812 let r = _mm256_broadcast_sd(&3.);
3813 let e = _mm256_set1_pd(3.);
3814 assert_eq_m256d(r, e);
3815 }
3816
3817 #[simd_test(enable = "avx")]
3818 unsafe fn test_mm256_broadcast_ps() {
3819 let a = _mm_setr_ps(4., 3., 2., 5.);
3820 let r = _mm256_broadcast_ps(&a);
3821 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3822 assert_eq_m256(r, e);
3823 }
3824
3825 #[simd_test(enable = "avx")]
3826 unsafe fn test_mm256_broadcast_pd() {
3827 let a = _mm_setr_pd(4., 3.);
3828 let r = _mm256_broadcast_pd(&a);
3829 let e = _mm256_setr_pd(4., 3., 4., 3.);
3830 assert_eq_m256d(r, e);
3831 }
3832
3833 #[simd_test(enable = "avx")]
3834 unsafe fn test_mm256_insertf128_ps() {
3835 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3836 let b = _mm_setr_ps(4., 9., 16., 25.);
3837 let r = _mm256_insertf128_ps::<0>(a, b);
3838 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3839 assert_eq_m256(r, e);
3840 }
3841
3842 #[simd_test(enable = "avx")]
3843 unsafe fn test_mm256_insertf128_pd() {
3844 let a = _mm256_setr_pd(1., 2., 3., 4.);
3845 let b = _mm_setr_pd(5., 6.);
3846 let r = _mm256_insertf128_pd::<0>(a, b);
3847 let e = _mm256_setr_pd(5., 6., 3., 4.);
3848 assert_eq_m256d(r, e);
3849 }
3850
3851 #[simd_test(enable = "avx")]
3852 unsafe fn test_mm256_insertf128_si256() {
3853 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3854 let b = _mm_setr_epi64x(5, 6);
3855 let r = _mm256_insertf128_si256::<0>(a, b);
3856 let e = _mm256_setr_epi64x(5, 6, 3, 4);
3857 assert_eq_m256i(r, e);
3858 }
3859
3860 #[simd_test(enable = "avx")]
3861 unsafe fn test_mm256_insert_epi8() {
3862 #[rustfmt::skip]
3863 let a = _mm256_setr_epi8(
3864 1, 2, 3, 4, 5, 6, 7, 8,
3865 9, 10, 11, 12, 13, 14, 15, 16,
3866 17, 18, 19, 20, 21, 22, 23, 24,
3867 25, 26, 27, 28, 29, 30, 31, 32,
3868 );
3869 let r = _mm256_insert_epi8::<31>(a, 0);
3870 #[rustfmt::skip]
3871 let e = _mm256_setr_epi8(
3872 1, 2, 3, 4, 5, 6, 7, 8,
3873 9, 10, 11, 12, 13, 14, 15, 16,
3874 17, 18, 19, 20, 21, 22, 23, 24,
3875 25, 26, 27, 28, 29, 30, 31, 0,
3876 );
3877 assert_eq_m256i(r, e);
3878 }
3879
3880 #[simd_test(enable = "avx")]
3881 unsafe fn test_mm256_insert_epi16() {
3882 #[rustfmt::skip]
3883 let a = _mm256_setr_epi16(
3884 0, 1, 2, 3, 4, 5, 6, 7,
3885 8, 9, 10, 11, 12, 13, 14, 15,
3886 );
3887 let r = _mm256_insert_epi16::<15>(a, 0);
3888 #[rustfmt::skip]
3889 let e = _mm256_setr_epi16(
3890 0, 1, 2, 3, 4, 5, 6, 7,
3891 8, 9, 10, 11, 12, 13, 14, 0,
3892 );
3893 assert_eq_m256i(r, e);
3894 }
3895
3896 #[simd_test(enable = "avx")]
3897 unsafe fn test_mm256_insert_epi32() {
3898 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3899 let r = _mm256_insert_epi32::<7>(a, 0);
3900 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
3901 assert_eq_m256i(r, e);
3902 }
3903
3904 #[simd_test(enable = "avx")]
3905 unsafe fn test_mm256_load_pd() {
3906 let a = _mm256_setr_pd(1., 2., 3., 4.);
3907 let p = ptr::addr_of!(a) as *const f64;
3908 let r = _mm256_load_pd(p);
3909 let e = _mm256_setr_pd(1., 2., 3., 4.);
3910 assert_eq_m256d(r, e);
3911 }
3912
3913 #[simd_test(enable = "avx")]
3914 unsafe fn test_mm256_store_pd() {
3915 let a = _mm256_setr_pd(1., 2., 3., 4.);
3916 let mut r = _mm256_undefined_pd();
3917 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3918 assert_eq_m256d(r, a);
3919 }
3920
3921 #[simd_test(enable = "avx")]
3922 unsafe fn test_mm256_load_ps() {
3923 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3924 let p = ptr::addr_of!(a) as *const f32;
3925 let r = _mm256_load_ps(p);
3926 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3927 assert_eq_m256(r, e);
3928 }
3929
3930 #[simd_test(enable = "avx")]
3931 unsafe fn test_mm256_store_ps() {
3932 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3933 let mut r = _mm256_undefined_ps();
3934 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3935 assert_eq_m256(r, a);
3936 }
3937
3938 #[simd_test(enable = "avx")]
3939 unsafe fn test_mm256_loadu_pd() {
3940 let a = &[1.0f64, 2., 3., 4.];
3941 let p = a.as_ptr();
3942 let r = _mm256_loadu_pd(black_box(p));
3943 let e = _mm256_setr_pd(1., 2., 3., 4.);
3944 assert_eq_m256d(r, e);
3945 }
3946
3947 #[simd_test(enable = "avx")]
3948 unsafe fn test_mm256_storeu_pd() {
3949 let a = _mm256_set1_pd(9.);
3950 let mut r = _mm256_undefined_pd();
3951 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
3952 assert_eq_m256d(r, a);
3953 }
3954
3955 #[simd_test(enable = "avx")]
3956 unsafe fn test_mm256_loadu_ps() {
3957 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
3958 let p = a.as_ptr();
3959 let r = _mm256_loadu_ps(black_box(p));
3960 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3961 assert_eq_m256(r, e);
3962 }
3963
3964 #[simd_test(enable = "avx")]
3965 unsafe fn test_mm256_storeu_ps() {
3966 let a = _mm256_set1_ps(9.);
3967 let mut r = _mm256_undefined_ps();
3968 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
3969 assert_eq_m256(r, a);
3970 }
3971
3972 #[simd_test(enable = "avx")]
3973 unsafe fn test_mm256_load_si256() {
3974 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3975 let p = ptr::addr_of!(a);
3976 let r = _mm256_load_si256(p);
3977 let e = _mm256_setr_epi64x(1, 2, 3, 4);
3978 assert_eq_m256i(r, e);
3979 }
3980
3981 #[simd_test(enable = "avx")]
3982 unsafe fn test_mm256_store_si256() {
3983 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3984 let mut r = _mm256_undefined_si256();
3985 _mm256_store_si256(ptr::addr_of_mut!(r), a);
3986 assert_eq_m256i(r, a);
3987 }
3988
3989 #[simd_test(enable = "avx")]
3990 unsafe fn test_mm256_loadu_si256() {
3991 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3992 let p = ptr::addr_of!(a);
3993 let r = _mm256_loadu_si256(black_box(p));
3994 let e = _mm256_setr_epi64x(1, 2, 3, 4);
3995 assert_eq_m256i(r, e);
3996 }
3997
3998 #[simd_test(enable = "avx")]
3999 unsafe fn test_mm256_storeu_si256() {
4000 let a = _mm256_set1_epi8(9);
4001 let mut r = _mm256_undefined_si256();
4002 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4003 assert_eq_m256i(r, a);
4004 }
4005
4006 #[simd_test(enable = "avx")]
4007 unsafe fn test_mm256_maskload_pd() {
4008 let a = &[1.0f64, 2., 3., 4.];
4009 let p = a.as_ptr();
4010 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4011 let r = _mm256_maskload_pd(black_box(p), mask);
4012 let e = _mm256_setr_pd(0., 2., 0., 4.);
4013 assert_eq_m256d(r, e);
4014 }
4015
4016 #[simd_test(enable = "avx")]
4017 unsafe fn test_mm256_maskstore_pd() {
4018 let mut r = _mm256_set1_pd(0.);
4019 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4020 let a = _mm256_setr_pd(1., 2., 3., 4.);
4021 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4022 let e = _mm256_setr_pd(0., 2., 0., 4.);
4023 assert_eq_m256d(r, e);
4024 }
4025
4026 #[simd_test(enable = "avx")]
4027 unsafe fn test_mm_maskload_pd() {
4028 let a = &[1.0f64, 2.];
4029 let p = a.as_ptr();
4030 let mask = _mm_setr_epi64x(0, !0);
4031 let r = _mm_maskload_pd(black_box(p), mask);
4032 let e = _mm_setr_pd(0., 2.);
4033 assert_eq_m128d(r, e);
4034 }
4035
4036 #[simd_test(enable = "avx")]
4037 unsafe fn test_mm_maskstore_pd() {
4038 let mut r = _mm_set1_pd(0.);
4039 let mask = _mm_setr_epi64x(0, !0);
4040 let a = _mm_setr_pd(1., 2.);
4041 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4042 let e = _mm_setr_pd(0., 2.);
4043 assert_eq_m128d(r, e);
4044 }
4045
4046 #[simd_test(enable = "avx")]
4047 unsafe fn test_mm256_maskload_ps() {
4048 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4049 let p = a.as_ptr();
4050 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4051 let r = _mm256_maskload_ps(black_box(p), mask);
4052 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4053 assert_eq_m256(r, e);
4054 }
4055
4056 #[simd_test(enable = "avx")]
4057 unsafe fn test_mm256_maskstore_ps() {
4058 let mut r = _mm256_set1_ps(0.);
4059 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4060 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4061 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4062 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4063 assert_eq_m256(r, e);
4064 }
4065
4066 #[simd_test(enable = "avx")]
4067 unsafe fn test_mm_maskload_ps() {
4068 let a = &[1.0f32, 2., 3., 4.];
4069 let p = a.as_ptr();
4070 let mask = _mm_setr_epi32(0, !0, 0, !0);
4071 let r = _mm_maskload_ps(black_box(p), mask);
4072 let e = _mm_setr_ps(0., 2., 0., 4.);
4073 assert_eq_m128(r, e);
4074 }
4075
4076 #[simd_test(enable = "avx")]
4077 unsafe fn test_mm_maskstore_ps() {
4078 let mut r = _mm_set1_ps(0.);
4079 let mask = _mm_setr_epi32(0, !0, 0, !0);
4080 let a = _mm_setr_ps(1., 2., 3., 4.);
4081 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4082 let e = _mm_setr_ps(0., 2., 0., 4.);
4083 assert_eq_m128(r, e);
4084 }
4085
4086 #[simd_test(enable = "avx")]
4087 unsafe fn test_mm256_movehdup_ps() {
4088 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4089 let r = _mm256_movehdup_ps(a);
4090 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4091 assert_eq_m256(r, e);
4092 }
4093
4094 #[simd_test(enable = "avx")]
4095 unsafe fn test_mm256_moveldup_ps() {
4096 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4097 let r = _mm256_moveldup_ps(a);
4098 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4099 assert_eq_m256(r, e);
4100 }
4101
4102 #[simd_test(enable = "avx")]
4103 unsafe fn test_mm256_movedup_pd() {
4104 let a = _mm256_setr_pd(1., 2., 3., 4.);
4105 let r = _mm256_movedup_pd(a);
4106 let e = _mm256_setr_pd(1., 1., 3., 3.);
4107 assert_eq_m256d(r, e);
4108 }
4109
4110 #[simd_test(enable = "avx")]
4111 unsafe fn test_mm256_lddqu_si256() {
4112 #[rustfmt::skip]
4113 let a = _mm256_setr_epi8(
4114 1, 2, 3, 4, 5, 6, 7, 8,
4115 9, 10, 11, 12, 13, 14, 15, 16,
4116 17, 18, 19, 20, 21, 22, 23, 24,
4117 25, 26, 27, 28, 29, 30, 31, 32,
4118 );
4119 let p = ptr::addr_of!(a);
4120 let r = _mm256_lddqu_si256(black_box(p));
4121 #[rustfmt::skip]
4122 let e = _mm256_setr_epi8(
4123 1, 2, 3, 4, 5, 6, 7, 8,
4124 9, 10, 11, 12, 13, 14, 15, 16,
4125 17, 18, 19, 20, 21, 22, 23, 24,
4126 25, 26, 27, 28, 29, 30, 31, 32,
4127 );
4128 assert_eq_m256i(r, e);
4129 }
4130
4131 #[simd_test(enable = "avx")]
4132 unsafe fn test_mm256_stream_si256() {
4133 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4134 let mut r = _mm256_undefined_si256();
4135 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4136 assert_eq_m256i(r, a);
4137 }
4138
4139 #[simd_test(enable = "avx")]
4140 unsafe fn test_mm256_stream_pd() {
4141 #[repr(align(32))]
4142 struct Memory {
4143 pub data: [f64; 4],
4144 }
4145 let a = _mm256_set1_pd(7.0);
4146 let mut mem = Memory { data: [-1.0; 4] };
4147
4148 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4149 for i in 0..4 {
4150 assert_eq!(mem.data[i], get_m256d(a, i));
4151 }
4152 }
4153
4154 #[simd_test(enable = "avx")]
4155 unsafe fn test_mm256_stream_ps() {
4156 #[repr(align(32))]
4157 struct Memory {
4158 pub data: [f32; 8],
4159 }
4160 let a = _mm256_set1_ps(7.0);
4161 let mut mem = Memory { data: [-1.0; 8] };
4162
4163 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4164 for i in 0..8 {
4165 assert_eq!(mem.data[i], get_m256(a, i));
4166 }
4167 }
4168
4169 #[simd_test(enable = "avx")]
4170 unsafe fn test_mm256_rcp_ps() {
4171 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4172 let r = _mm256_rcp_ps(a);
4173 #[rustfmt::skip]
4174 let e = _mm256_setr_ps(
4175 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4176 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4177 );
4178 let rel_err = 0.00048828125;
4179 for i in 0..8 {
4180 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4181 }
4182 }
4183
4184 #[simd_test(enable = "avx")]
4185 unsafe fn test_mm256_rsqrt_ps() {
4186 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4187 let r = _mm256_rsqrt_ps(a);
4188 #[rustfmt::skip]
4189 let e = _mm256_setr_ps(
4190 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4191 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4192 );
4193 let rel_err = 0.00048828125;
4194 for i in 0..8 {
4195 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4196 }
4197 }
4198
4199 #[simd_test(enable = "avx")]
4200 unsafe fn test_mm256_unpackhi_pd() {
4201 let a = _mm256_setr_pd(1., 2., 3., 4.);
4202 let b = _mm256_setr_pd(5., 6., 7., 8.);
4203 let r = _mm256_unpackhi_pd(a, b);
4204 let e = _mm256_setr_pd(2., 6., 4., 8.);
4205 assert_eq_m256d(r, e);
4206 }
4207
4208 #[simd_test(enable = "avx")]
4209 unsafe fn test_mm256_unpackhi_ps() {
4210 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4211 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4212 let r = _mm256_unpackhi_ps(a, b);
4213 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4214 assert_eq_m256(r, e);
4215 }
4216
4217 #[simd_test(enable = "avx")]
4218 unsafe fn test_mm256_unpacklo_pd() {
4219 let a = _mm256_setr_pd(1., 2., 3., 4.);
4220 let b = _mm256_setr_pd(5., 6., 7., 8.);
4221 let r = _mm256_unpacklo_pd(a, b);
4222 let e = _mm256_setr_pd(1., 5., 3., 7.);
4223 assert_eq_m256d(r, e);
4224 }
4225
4226 #[simd_test(enable = "avx")]
4227 unsafe fn test_mm256_unpacklo_ps() {
4228 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4229 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4230 let r = _mm256_unpacklo_ps(a, b);
4231 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4232 assert_eq_m256(r, e);
4233 }
4234
4235 #[simd_test(enable = "avx")]
4236 unsafe fn test_mm256_testz_si256() {
4237 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4238 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4239 let r = _mm256_testz_si256(a, b);
4240 assert_eq!(r, 0);
4241 let b = _mm256_set1_epi64x(0);
4242 let r = _mm256_testz_si256(a, b);
4243 assert_eq!(r, 1);
4244 }
4245
4246 #[simd_test(enable = "avx")]
4247 unsafe fn test_mm256_testc_si256() {
4248 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4249 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4250 let r = _mm256_testc_si256(a, b);
4251 assert_eq!(r, 0);
4252 let b = _mm256_set1_epi64x(0);
4253 let r = _mm256_testc_si256(a, b);
4254 assert_eq!(r, 1);
4255 }
4256
4257 #[simd_test(enable = "avx")]
4258 unsafe fn test_mm256_testnzc_si256() {
4259 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4260 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4261 let r = _mm256_testnzc_si256(a, b);
4262 assert_eq!(r, 1);
4263 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4264 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4265 let r = _mm256_testnzc_si256(a, b);
4266 assert_eq!(r, 0);
4267 }
4268
4269 #[simd_test(enable = "avx")]
4270 unsafe fn test_mm256_testz_pd() {
4271 let a = _mm256_setr_pd(1., 2., 3., 4.);
4272 let b = _mm256_setr_pd(5., 6., 7., 8.);
4273 let r = _mm256_testz_pd(a, b);
4274 assert_eq!(r, 1);
4275 let a = _mm256_set1_pd(-1.);
4276 let r = _mm256_testz_pd(a, a);
4277 assert_eq!(r, 0);
4278 }
4279
4280 #[simd_test(enable = "avx")]
4281 unsafe fn test_mm256_testc_pd() {
4282 let a = _mm256_setr_pd(1., 2., 3., 4.);
4283 let b = _mm256_setr_pd(5., 6., 7., 8.);
4284 let r = _mm256_testc_pd(a, b);
4285 assert_eq!(r, 1);
4286 let a = _mm256_set1_pd(1.);
4287 let b = _mm256_set1_pd(-1.);
4288 let r = _mm256_testc_pd(a, b);
4289 assert_eq!(r, 0);
4290 }
4291
4292 #[simd_test(enable = "avx")]
4293 unsafe fn test_mm256_testnzc_pd() {
4294 let a = _mm256_setr_pd(1., 2., 3., 4.);
4295 let b = _mm256_setr_pd(5., 6., 7., 8.);
4296 let r = _mm256_testnzc_pd(a, b);
4297 assert_eq!(r, 0);
4298 let a = _mm256_setr_pd(1., -1., -1., -1.);
4299 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4300 let r = _mm256_testnzc_pd(a, b);
4301 assert_eq!(r, 1);
4302 }
4303
4304 #[simd_test(enable = "avx")]
4305 unsafe fn test_mm_testz_pd() {
4306 let a = _mm_setr_pd(1., 2.);
4307 let b = _mm_setr_pd(5., 6.);
4308 let r = _mm_testz_pd(a, b);
4309 assert_eq!(r, 1);
4310 let a = _mm_set1_pd(-1.);
4311 let r = _mm_testz_pd(a, a);
4312 assert_eq!(r, 0);
4313 }
4314
4315 #[simd_test(enable = "avx")]
4316 unsafe fn test_mm_testc_pd() {
4317 let a = _mm_setr_pd(1., 2.);
4318 let b = _mm_setr_pd(5., 6.);
4319 let r = _mm_testc_pd(a, b);
4320 assert_eq!(r, 1);
4321 let a = _mm_set1_pd(1.);
4322 let b = _mm_set1_pd(-1.);
4323 let r = _mm_testc_pd(a, b);
4324 assert_eq!(r, 0);
4325 }
4326
4327 #[simd_test(enable = "avx")]
4328 unsafe fn test_mm_testnzc_pd() {
4329 let a = _mm_setr_pd(1., 2.);
4330 let b = _mm_setr_pd(5., 6.);
4331 let r = _mm_testnzc_pd(a, b);
4332 assert_eq!(r, 0);
4333 let a = _mm_setr_pd(1., -1.);
4334 let b = _mm_setr_pd(-1., -1.);
4335 let r = _mm_testnzc_pd(a, b);
4336 assert_eq!(r, 1);
4337 }
4338
4339 #[simd_test(enable = "avx")]
4340 unsafe fn test_mm256_testz_ps() {
4341 let a = _mm256_set1_ps(1.);
4342 let r = _mm256_testz_ps(a, a);
4343 assert_eq!(r, 1);
4344 let a = _mm256_set1_ps(-1.);
4345 let r = _mm256_testz_ps(a, a);
4346 assert_eq!(r, 0);
4347 }
4348
4349 #[simd_test(enable = "avx")]
4350 unsafe fn test_mm256_testc_ps() {
4351 let a = _mm256_set1_ps(1.);
4352 let r = _mm256_testc_ps(a, a);
4353 assert_eq!(r, 1);
4354 let b = _mm256_set1_ps(-1.);
4355 let r = _mm256_testc_ps(a, b);
4356 assert_eq!(r, 0);
4357 }
4358
4359 #[simd_test(enable = "avx")]
4360 unsafe fn test_mm256_testnzc_ps() {
4361 let a = _mm256_set1_ps(1.);
4362 let r = _mm256_testnzc_ps(a, a);
4363 assert_eq!(r, 0);
4364 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4365 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4366 let r = _mm256_testnzc_ps(a, b);
4367 assert_eq!(r, 1);
4368 }
4369
4370 #[simd_test(enable = "avx")]
4371 unsafe fn test_mm_testz_ps() {
4372 let a = _mm_set1_ps(1.);
4373 let r = _mm_testz_ps(a, a);
4374 assert_eq!(r, 1);
4375 let a = _mm_set1_ps(-1.);
4376 let r = _mm_testz_ps(a, a);
4377 assert_eq!(r, 0);
4378 }
4379
4380 #[simd_test(enable = "avx")]
4381 unsafe fn test_mm_testc_ps() {
4382 let a = _mm_set1_ps(1.);
4383 let r = _mm_testc_ps(a, a);
4384 assert_eq!(r, 1);
4385 let b = _mm_set1_ps(-1.);
4386 let r = _mm_testc_ps(a, b);
4387 assert_eq!(r, 0);
4388 }
4389
4390 #[simd_test(enable = "avx")]
4391 unsafe fn test_mm_testnzc_ps() {
4392 let a = _mm_set1_ps(1.);
4393 let r = _mm_testnzc_ps(a, a);
4394 assert_eq!(r, 0);
4395 let a = _mm_setr_ps(1., -1., -1., -1.);
4396 let b = _mm_setr_ps(-1., -1., 1., 1.);
4397 let r = _mm_testnzc_ps(a, b);
4398 assert_eq!(r, 1);
4399 }
4400
4401 #[simd_test(enable = "avx")]
4402 unsafe fn test_mm256_movemask_pd() {
4403 let a = _mm256_setr_pd(1., -2., 3., -4.);
4404 let r = _mm256_movemask_pd(a);
4405 assert_eq!(r, 0xA);
4406 }
4407
4408 #[simd_test(enable = "avx")]
4409 unsafe fn test_mm256_movemask_ps() {
4410 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4411 let r = _mm256_movemask_ps(a);
4412 assert_eq!(r, 0xAA);
4413 }
4414
4415 #[simd_test(enable = "avx")]
4416 unsafe fn test_mm256_setzero_pd() {
4417 let r = _mm256_setzero_pd();
4418 assert_eq_m256d(r, _mm256_set1_pd(0.));
4419 }
4420
4421 #[simd_test(enable = "avx")]
4422 unsafe fn test_mm256_setzero_ps() {
4423 let r = _mm256_setzero_ps();
4424 assert_eq_m256(r, _mm256_set1_ps(0.));
4425 }
4426
4427 #[simd_test(enable = "avx")]
4428 unsafe fn test_mm256_setzero_si256() {
4429 let r = _mm256_setzero_si256();
4430 assert_eq_m256i(r, _mm256_set1_epi8(0));
4431 }
4432
4433 #[simd_test(enable = "avx")]
4434 unsafe fn test_mm256_set_pd() {
4435 let r = _mm256_set_pd(1., 2., 3., 4.);
4436 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4437 }
4438
4439 #[simd_test(enable = "avx")]
4440 unsafe fn test_mm256_set_ps() {
4441 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4442 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4443 }
4444
4445 #[simd_test(enable = "avx")]
4446 unsafe fn test_mm256_set_epi8() {
4447 #[rustfmt::skip]
4448 let r = _mm256_set_epi8(
4449 1, 2, 3, 4, 5, 6, 7, 8,
4450 9, 10, 11, 12, 13, 14, 15, 16,
4451 17, 18, 19, 20, 21, 22, 23, 24,
4452 25, 26, 27, 28, 29, 30, 31, 32,
4453 );
4454 #[rustfmt::skip]
4455 let e = _mm256_setr_epi8(
4456 32, 31, 30, 29, 28, 27, 26, 25,
4457 24, 23, 22, 21, 20, 19, 18, 17,
4458 16, 15, 14, 13, 12, 11, 10, 9,
4459 8, 7, 6, 5, 4, 3, 2, 1
4460 );
4461 assert_eq_m256i(r, e);
4462 }
4463
4464 #[simd_test(enable = "avx")]
4465 unsafe fn test_mm256_set_epi16() {
4466 #[rustfmt::skip]
4467 let r = _mm256_set_epi16(
4468 1, 2, 3, 4, 5, 6, 7, 8,
4469 9, 10, 11, 12, 13, 14, 15, 16,
4470 );
4471 #[rustfmt::skip]
4472 let e = _mm256_setr_epi16(
4473 16, 15, 14, 13, 12, 11, 10, 9, 8,
4474 7, 6, 5, 4, 3, 2, 1,
4475 );
4476 assert_eq_m256i(r, e);
4477 }
4478
4479 #[simd_test(enable = "avx")]
4480 unsafe fn test_mm256_set_epi32() {
4481 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4482 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4483 }
4484
4485 #[simd_test(enable = "avx")]
4486 unsafe fn test_mm256_set_epi64x() {
4487 let r = _mm256_set_epi64x(1, 2, 3, 4);
4488 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4489 }
4490
4491 #[simd_test(enable = "avx")]
4492 unsafe fn test_mm256_setr_pd() {
4493 let r = _mm256_setr_pd(1., 2., 3., 4.);
4494 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4495 }
4496
4497 #[simd_test(enable = "avx")]
4498 unsafe fn test_mm256_setr_ps() {
4499 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4500 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4501 }
4502
4503 #[simd_test(enable = "avx")]
4504 unsafe fn test_mm256_setr_epi8() {
4505 #[rustfmt::skip]
4506 let r = _mm256_setr_epi8(
4507 1, 2, 3, 4, 5, 6, 7, 8,
4508 9, 10, 11, 12, 13, 14, 15, 16,
4509 17, 18, 19, 20, 21, 22, 23, 24,
4510 25, 26, 27, 28, 29, 30, 31, 32,
4511 );
4512 #[rustfmt::skip]
4513 let e = _mm256_setr_epi8(
4514 1, 2, 3, 4, 5, 6, 7, 8,
4515 9, 10, 11, 12, 13, 14, 15, 16,
4516 17, 18, 19, 20, 21, 22, 23, 24,
4517 25, 26, 27, 28, 29, 30, 31, 32
4518 );
4519
4520 assert_eq_m256i(r, e);
4521 }
4522
4523 #[simd_test(enable = "avx")]
4524 unsafe fn test_mm256_setr_epi16() {
4525 #[rustfmt::skip]
4526 let r = _mm256_setr_epi16(
4527 1, 2, 3, 4, 5, 6, 7, 8,
4528 9, 10, 11, 12, 13, 14, 15, 16,
4529 );
4530 #[rustfmt::skip]
4531 let e = _mm256_setr_epi16(
4532 1, 2, 3, 4, 5, 6, 7, 8,
4533 9, 10, 11, 12, 13, 14, 15, 16,
4534 );
4535 assert_eq_m256i(r, e);
4536 }
4537
4538 #[simd_test(enable = "avx")]
4539 unsafe fn test_mm256_setr_epi32() {
4540 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4541 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4542 }
4543
4544 #[simd_test(enable = "avx")]
4545 unsafe fn test_mm256_setr_epi64x() {
4546 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4547 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4548 }
4549
4550 #[simd_test(enable = "avx")]
4551 unsafe fn test_mm256_set1_pd() {
4552 let r = _mm256_set1_pd(1.);
4553 assert_eq_m256d(r, _mm256_set1_pd(1.));
4554 }
4555
4556 #[simd_test(enable = "avx")]
4557 unsafe fn test_mm256_set1_ps() {
4558 let r = _mm256_set1_ps(1.);
4559 assert_eq_m256(r, _mm256_set1_ps(1.));
4560 }
4561
4562 #[simd_test(enable = "avx")]
4563 unsafe fn test_mm256_set1_epi8() {
4564 let r = _mm256_set1_epi8(1);
4565 assert_eq_m256i(r, _mm256_set1_epi8(1));
4566 }
4567
4568 #[simd_test(enable = "avx")]
4569 unsafe fn test_mm256_set1_epi16() {
4570 let r = _mm256_set1_epi16(1);
4571 assert_eq_m256i(r, _mm256_set1_epi16(1));
4572 }
4573
4574 #[simd_test(enable = "avx")]
4575 unsafe fn test_mm256_set1_epi32() {
4576 let r = _mm256_set1_epi32(1);
4577 assert_eq_m256i(r, _mm256_set1_epi32(1));
4578 }
4579
4580 #[simd_test(enable = "avx")]
4581 unsafe fn test_mm256_set1_epi64x() {
4582 let r = _mm256_set1_epi64x(1);
4583 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4584 }
4585
4586 #[simd_test(enable = "avx")]
4587 unsafe fn test_mm256_castpd_ps() {
4588 let a = _mm256_setr_pd(1., 2., 3., 4.);
4589 let r = _mm256_castpd_ps(a);
4590 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4591 assert_eq_m256(r, e);
4592 }
4593
4594 #[simd_test(enable = "avx")]
4595 unsafe fn test_mm256_castps_pd() {
4596 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4597 let r = _mm256_castps_pd(a);
4598 let e = _mm256_setr_pd(1., 2., 3., 4.);
4599 assert_eq_m256d(r, e);
4600 }
4601
4602 #[simd_test(enable = "avx")]
4603 unsafe fn test_mm256_castps_si256() {
4604 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4605 let r = _mm256_castps_si256(a);
4606 #[rustfmt::skip]
4607 let e = _mm256_setr_epi8(
4608 0, 0, -128, 63, 0, 0, 0, 64,
4609 0, 0, 64, 64, 0, 0, -128, 64,
4610 0, 0, -96, 64, 0, 0, -64, 64,
4611 0, 0, -32, 64, 0, 0, 0, 65,
4612 );
4613 assert_eq_m256i(r, e);
4614 }
4615
4616 #[simd_test(enable = "avx")]
4617 unsafe fn test_mm256_castsi256_ps() {
4618 #[rustfmt::skip]
4619 let a = _mm256_setr_epi8(
4620 0, 0, -128, 63, 0, 0, 0, 64,
4621 0, 0, 64, 64, 0, 0, -128, 64,
4622 0, 0, -96, 64, 0, 0, -64, 64,
4623 0, 0, -32, 64, 0, 0, 0, 65,
4624 );
4625 let r = _mm256_castsi256_ps(a);
4626 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4627 assert_eq_m256(r, e);
4628 }
4629
4630 #[simd_test(enable = "avx")]
4631 unsafe fn test_mm256_castpd_si256() {
4632 let a = _mm256_setr_pd(1., 2., 3., 4.);
4633 let r = _mm256_castpd_si256(a);
4634 assert_eq_m256d(transmute(r), a);
4635 }
4636
4637 #[simd_test(enable = "avx")]
4638 unsafe fn test_mm256_castsi256_pd() {
4639 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4640 let r = _mm256_castsi256_pd(a);
4641 assert_eq_m256d(r, transmute(a));
4642 }
4643
4644 #[simd_test(enable = "avx")]
4645 unsafe fn test_mm256_castps256_ps128() {
4646 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4647 let r = _mm256_castps256_ps128(a);
4648 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4649 }
4650
4651 #[simd_test(enable = "avx")]
4652 unsafe fn test_mm256_castpd256_pd128() {
4653 let a = _mm256_setr_pd(1., 2., 3., 4.);
4654 let r = _mm256_castpd256_pd128(a);
4655 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4656 }
4657
4658 #[simd_test(enable = "avx")]
4659 unsafe fn test_mm256_castsi256_si128() {
4660 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4661 let r = _mm256_castsi256_si128(a);
4662 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4663 }
4664
4665 #[simd_test(enable = "avx")]
4666 unsafe fn test_mm256_zextps128_ps256() {
4667 let a = _mm_setr_ps(1., 2., 3., 4.);
4668 let r = _mm256_zextps128_ps256(a);
4669 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4670 assert_eq_m256(r, e);
4671 }
4672
4673 #[simd_test(enable = "avx")]
4674 unsafe fn test_mm256_zextsi128_si256() {
4675 let a = _mm_setr_epi64x(1, 2);
4676 let r = _mm256_zextsi128_si256(a);
4677 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4678 assert_eq_m256i(r, e);
4679 }
4680
4681 #[simd_test(enable = "avx")]
4682 unsafe fn test_mm256_zextpd128_pd256() {
4683 let a = _mm_setr_pd(1., 2.);
4684 let r = _mm256_zextpd128_pd256(a);
4685 let e = _mm256_setr_pd(1., 2., 0., 0.);
4686 assert_eq_m256d(r, e);
4687 }
4688
4689 #[simd_test(enable = "avx")]
4690 unsafe fn test_mm256_set_m128() {
4691 let hi = _mm_setr_ps(5., 6., 7., 8.);
4692 let lo = _mm_setr_ps(1., 2., 3., 4.);
4693 let r = _mm256_set_m128(hi, lo);
4694 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4695 assert_eq_m256(r, e);
4696 }
4697
4698 #[simd_test(enable = "avx")]
4699 unsafe fn test_mm256_set_m128d() {
4700 let hi = _mm_setr_pd(3., 4.);
4701 let lo = _mm_setr_pd(1., 2.);
4702 let r = _mm256_set_m128d(hi, lo);
4703 let e = _mm256_setr_pd(1., 2., 3., 4.);
4704 assert_eq_m256d(r, e);
4705 }
4706
4707 #[simd_test(enable = "avx")]
4708 unsafe fn test_mm256_set_m128i() {
4709 #[rustfmt::skip]
4710 let hi = _mm_setr_epi8(
4711 17, 18, 19, 20,
4712 21, 22, 23, 24,
4713 25, 26, 27, 28,
4714 29, 30, 31, 32,
4715 );
4716 #[rustfmt::skip]
4717 let lo = _mm_setr_epi8(
4718 1, 2, 3, 4,
4719 5, 6, 7, 8,
4720 9, 10, 11, 12,
4721 13, 14, 15, 16,
4722 );
4723 let r = _mm256_set_m128i(hi, lo);
4724 #[rustfmt::skip]
4725 let e = _mm256_setr_epi8(
4726 1, 2, 3, 4, 5, 6, 7, 8,
4727 9, 10, 11, 12, 13, 14, 15, 16,
4728 17, 18, 19, 20, 21, 22, 23, 24,
4729 25, 26, 27, 28, 29, 30, 31, 32,
4730 );
4731 assert_eq_m256i(r, e);
4732 }
4733
4734 #[simd_test(enable = "avx")]
4735 unsafe fn test_mm256_setr_m128() {
4736 let lo = _mm_setr_ps(1., 2., 3., 4.);
4737 let hi = _mm_setr_ps(5., 6., 7., 8.);
4738 let r = _mm256_setr_m128(lo, hi);
4739 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4740 assert_eq_m256(r, e);
4741 }
4742
4743 #[simd_test(enable = "avx")]
4744 unsafe fn test_mm256_setr_m128d() {
4745 let lo = _mm_setr_pd(1., 2.);
4746 let hi = _mm_setr_pd(3., 4.);
4747 let r = _mm256_setr_m128d(lo, hi);
4748 let e = _mm256_setr_pd(1., 2., 3., 4.);
4749 assert_eq_m256d(r, e);
4750 }
4751
4752 #[simd_test(enable = "avx")]
4753 unsafe fn test_mm256_setr_m128i() {
4754 #[rustfmt::skip]
4755 let lo = _mm_setr_epi8(
4756 1, 2, 3, 4,
4757 5, 6, 7, 8,
4758 9, 10, 11, 12,
4759 13, 14, 15, 16,
4760 );
4761 #[rustfmt::skip]
4762 let hi = _mm_setr_epi8(
4763 17, 18, 19, 20, 21, 22, 23, 24,
4764 25, 26, 27, 28, 29, 30, 31, 32,
4765 );
4766 let r = _mm256_setr_m128i(lo, hi);
4767 #[rustfmt::skip]
4768 let e = _mm256_setr_epi8(
4769 1, 2, 3, 4, 5, 6, 7, 8,
4770 9, 10, 11, 12, 13, 14, 15, 16,
4771 17, 18, 19, 20, 21, 22, 23, 24,
4772 25, 26, 27, 28, 29, 30, 31, 32,
4773 );
4774 assert_eq_m256i(r, e);
4775 }
4776
4777 #[simd_test(enable = "avx")]
4778 unsafe fn test_mm256_loadu2_m128() {
4779 let hi = &[5., 6., 7., 8.];
4780 let hiaddr = hi.as_ptr();
4781 let lo = &[1., 2., 3., 4.];
4782 let loaddr = lo.as_ptr();
4783 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4784 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4785 assert_eq_m256(r, e);
4786 }
4787
4788 #[simd_test(enable = "avx")]
4789 unsafe fn test_mm256_loadu2_m128d() {
4790 let hi = &[3., 4.];
4791 let hiaddr = hi.as_ptr();
4792 let lo = &[1., 2.];
4793 let loaddr = lo.as_ptr();
4794 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4795 let e = _mm256_setr_pd(1., 2., 3., 4.);
4796 assert_eq_m256d(r, e);
4797 }
4798
4799 #[simd_test(enable = "avx")]
4800 unsafe fn test_mm256_loadu2_m128i() {
4801 #[rustfmt::skip]
4802 let hi = _mm_setr_epi8(
4803 17, 18, 19, 20, 21, 22, 23, 24,
4804 25, 26, 27, 28, 29, 30, 31, 32,
4805 );
4806 #[rustfmt::skip]
4807 let lo = _mm_setr_epi8(
4808 1, 2, 3, 4, 5, 6, 7, 8,
4809 9, 10, 11, 12, 13, 14, 15, 16,
4810 );
4811 let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
4812 #[rustfmt::skip]
4813 let e = _mm256_setr_epi8(
4814 1, 2, 3, 4, 5, 6, 7, 8,
4815 9, 10, 11, 12, 13, 14, 15, 16,
4816 17, 18, 19, 20, 21, 22, 23, 24,
4817 25, 26, 27, 28, 29, 30, 31, 32,
4818 );
4819 assert_eq_m256i(r, e);
4820 }
4821
4822 #[simd_test(enable = "avx")]
4823 unsafe fn test_mm256_storeu2_m128() {
4824 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4825 let mut hi = _mm_undefined_ps();
4826 let mut lo = _mm_undefined_ps();
4827 _mm256_storeu2_m128(
4828 ptr::addr_of_mut!(hi) as *mut f32,
4829 ptr::addr_of_mut!(lo) as *mut f32,
4830 a,
4831 );
4832 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4833 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4834 }
4835
4836 #[simd_test(enable = "avx")]
4837 unsafe fn test_mm256_storeu2_m128d() {
4838 let a = _mm256_setr_pd(1., 2., 3., 4.);
4839 let mut hi = _mm_undefined_pd();
4840 let mut lo = _mm_undefined_pd();
4841 _mm256_storeu2_m128d(
4842 ptr::addr_of_mut!(hi) as *mut f64,
4843 ptr::addr_of_mut!(lo) as *mut f64,
4844 a,
4845 );
4846 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4847 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4848 }
4849
4850 #[simd_test(enable = "avx")]
4851 unsafe fn test_mm256_storeu2_m128i() {
4852 #[rustfmt::skip]
4853 let a = _mm256_setr_epi8(
4854 1, 2, 3, 4, 5, 6, 7, 8,
4855 9, 10, 11, 12, 13, 14, 15, 16,
4856 17, 18, 19, 20, 21, 22, 23, 24,
4857 25, 26, 27, 28, 29, 30, 31, 32,
4858 );
4859 let mut hi = _mm_undefined_si128();
4860 let mut lo = _mm_undefined_si128();
4861 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
4862 #[rustfmt::skip]
4863 let e_hi = _mm_setr_epi8(
4864 17, 18, 19, 20, 21, 22, 23, 24,
4865 25, 26, 27, 28, 29, 30, 31, 32
4866 );
4867 #[rustfmt::skip]
4868 let e_lo = _mm_setr_epi8(
4869 1, 2, 3, 4, 5, 6, 7, 8,
4870 9, 10, 11, 12, 13, 14, 15, 16
4871 );
4872
4873 assert_eq_m128i(hi, e_hi);
4874 assert_eq_m128i(lo, e_lo);
4875 }
4876
4877 #[simd_test(enable = "avx")]
4878 unsafe fn test_mm256_cvtss_f32() {
4879 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4880 let r = _mm256_cvtss_f32(a);
4881 assert_eq!(r, 1.);
4882 }
4883}
4884