1//! Advanced Vector Extensions (AVX)
2//!
3//! The references are:
4//!
5//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
6//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
7//! Programmer's Manual, Volume 3: General-Purpose and System
8//! Instructions][amd64_ref].
9//!
10//! [Wikipedia][wiki] provides a quick overview of the instructions available.
11//!
12//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
13//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
14//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
15
16use crate::{
17 core_arch::{simd::*, simd_llvm::*, x86::*},
18 intrinsics,
19 mem::{self, transmute},
20 ptr,
21};
22
23#[cfg(test)]
24use stdarch_test::assert_instr;
25
26/// Adds packed double-precision (64-bit) floating-point elements
27/// in `a` and `b`.
28///
29/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd)
30#[inline]
31#[target_feature(enable = "avx")]
32#[cfg_attr(test, assert_instr(vaddpd))]
33#[stable(feature = "simd_x86", since = "1.27.0")]
34pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
35 simd_add(x:a, y:b)
36}
37
38/// Adds packed single-precision (32-bit) floating-point elements in `a` and
39/// `b`.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps)
42#[inline]
43#[target_feature(enable = "avx")]
44#[cfg_attr(test, assert_instr(vaddps))]
45#[stable(feature = "simd_x86", since = "1.27.0")]
46pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
47 simd_add(x:a, y:b)
48}
49
50/// Computes the bitwise AND of a packed double-precision (64-bit)
51/// floating-point elements in `a` and `b`.
52///
53/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
54#[inline]
55#[target_feature(enable = "avx")]
56// FIXME: Should be 'vandpd' instruction.
57// See https://github.com/rust-lang/stdarch/issues/71
58#[cfg_attr(test, assert_instr(vandps))]
59#[stable(feature = "simd_x86", since = "1.27.0")]
60pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
61 let a: u64x4 = transmute(src:a);
62 let b: u64x4 = transmute(src:b);
63 transmute(src:simd_and(x:a, y:b))
64}
65
66/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
67/// elements in `a` and `b`.
68///
69/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps)
70#[inline]
71#[target_feature(enable = "avx")]
72#[cfg_attr(test, assert_instr(vandps))]
73#[stable(feature = "simd_x86", since = "1.27.0")]
74pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
75 let a: u32x8 = transmute(src:a);
76 let b: u32x8 = transmute(src:b);
77 transmute(src:simd_and(x:a, y:b))
78}
79
80/// Computes the bitwise OR packed double-precision (64-bit) floating-point
81/// elements in `a` and `b`.
82///
83/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
84#[inline]
85#[target_feature(enable = "avx")]
86// FIXME: should be `vorpd` instruction.
87// See <https://github.com/rust-lang/stdarch/issues/71>.
88#[cfg_attr(test, assert_instr(vorps))]
89#[stable(feature = "simd_x86", since = "1.27.0")]
90pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
91 let a: u64x4 = transmute(src:a);
92 let b: u64x4 = transmute(src:b);
93 transmute(src:simd_or(x:a, y:b))
94}
95
96/// Computes the bitwise OR packed single-precision (32-bit) floating-point
97/// elements in `a` and `b`.
98///
99/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps)
100#[inline]
101#[target_feature(enable = "avx")]
102#[cfg_attr(test, assert_instr(vorps))]
103#[stable(feature = "simd_x86", since = "1.27.0")]
104pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
105 let a: u32x8 = transmute(src:a);
106 let b: u32x8 = transmute(src:b);
107 transmute(src:simd_or(x:a, y:b))
108}
109
110/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
111/// lanes using the control in `imm8`.
112///
113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd)
114#[inline]
115#[target_feature(enable = "avx")]
116#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
117#[rustc_legacy_const_generics(2)]
118#[stable(feature = "simd_x86", since = "1.27.0")]
119pub unsafe fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
120 static_assert_uimm_bits!(MASK, 8);
121 simd_shuffle!(
122 a,
123 b,
124 [
125 MASK as u32 & 0b1,
126 ((MASK as u32 >> 1) & 0b1) + 4,
127 ((MASK as u32 >> 2) & 0b1) + 2,
128 ((MASK as u32 >> 3) & 0b1) + 6,
129 ],
130 )
131}
132
133/// Shuffles single-precision (32-bit) floating-point elements in `a` within
134/// 128-bit lanes using the control in `imm8`.
135///
136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps)
137#[inline]
138#[target_feature(enable = "avx")]
139#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
140#[rustc_legacy_const_generics(2)]
141#[stable(feature = "simd_x86", since = "1.27.0")]
142pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
143 static_assert_uimm_bits!(MASK, 8);
144 simd_shuffle!(
145 a,
146 b,
147 [
148 MASK as u32 & 0b11,
149 (MASK as u32 >> 2) & 0b11,
150 ((MASK as u32 >> 4) & 0b11) + 8,
151 ((MASK as u32 >> 6) & 0b11) + 8,
152 (MASK as u32 & 0b11) + 4,
153 ((MASK as u32 >> 2) & 0b11) + 4,
154 ((MASK as u32 >> 4) & 0b11) + 12,
155 ((MASK as u32 >> 6) & 0b11) + 12,
156 ],
157 )
158}
159
160/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
161/// elements in `a`, and then AND with `b`.
162///
163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
164#[inline]
165#[target_feature(enable = "avx")]
166// FIXME: should be `vandnpd` instruction.
167#[cfg_attr(test, assert_instr(vandnps))]
168#[stable(feature = "simd_x86", since = "1.27.0")]
169pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
170 let a: u64x4 = transmute(src:a);
171 let b: u64x4 = transmute(src:b);
172 transmute(src:simd_and(x:simd_xor(u64x4::splat(!(0_u64)), a), y:b))
173}
174
175/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
176/// elements in `a`
177/// and then AND with `b`.
178///
179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps)
180#[inline]
181#[target_feature(enable = "avx")]
182#[cfg_attr(test, assert_instr(vandnps))]
183#[stable(feature = "simd_x86", since = "1.27.0")]
184pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
185 let a: u32x8 = transmute(src:a);
186 let b: u32x8 = transmute(src:b);
187 transmute(src:simd_and(x:simd_xor(u32x8::splat(!(0_u32)), a), y:b))
188}
189
190/// Compares packed double-precision (64-bit) floating-point elements
191/// in `a` and `b`, and returns packed maximum values
192///
193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)
194#[inline]
195#[target_feature(enable = "avx")]
196#[cfg_attr(test, assert_instr(vmaxpd))]
197#[stable(feature = "simd_x86", since = "1.27.0")]
198pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
199 vmaxpd(a, b)
200}
201
202/// Compares packed single-precision (32-bit) floating-point elements in `a`
203/// and `b`, and returns packed maximum values
204///
205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)
206#[inline]
207#[target_feature(enable = "avx")]
208#[cfg_attr(test, assert_instr(vmaxps))]
209#[stable(feature = "simd_x86", since = "1.27.0")]
210pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
211 vmaxps(a, b)
212}
213
214/// Compares packed double-precision (64-bit) floating-point elements
215/// in `a` and `b`, and returns packed minimum values
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)
218#[inline]
219#[target_feature(enable = "avx")]
220#[cfg_attr(test, assert_instr(vminpd))]
221#[stable(feature = "simd_x86", since = "1.27.0")]
222pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
223 vminpd(a, b)
224}
225
226/// Compares packed single-precision (32-bit) floating-point elements in `a`
227/// and `b`, and returns packed minimum values
228///
229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)
230#[inline]
231#[target_feature(enable = "avx")]
232#[cfg_attr(test, assert_instr(vminps))]
233#[stable(feature = "simd_x86", since = "1.27.0")]
234pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
235 vminps(a, b)
236}
237
238/// Multiplies packed double-precision (64-bit) floating-point elements
239/// in `a` and `b`.
240///
241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd)
242#[inline]
243#[target_feature(enable = "avx")]
244#[cfg_attr(test, assert_instr(vmulpd))]
245#[stable(feature = "simd_x86", since = "1.27.0")]
246pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
247 simd_mul(x:a, y:b)
248}
249
250/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
251/// `b`.
252///
253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps)
254#[inline]
255#[target_feature(enable = "avx")]
256#[cfg_attr(test, assert_instr(vmulps))]
257#[stable(feature = "simd_x86", since = "1.27.0")]
258pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
259 simd_mul(x:a, y:b)
260}
261
262/// Alternatively adds and subtracts packed double-precision (64-bit)
263/// floating-point elements in `a` to/from packed elements in `b`.
264///
265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd)
266#[inline]
267#[target_feature(enable = "avx")]
268#[cfg_attr(test, assert_instr(vaddsubpd))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
271 let a: f64x4 = a.as_f64x4();
272 let b: f64x4 = b.as_f64x4();
273 let add: f64x4 = simd_add(x:a, y:b);
274 let sub: f64x4 = simd_sub(x:a, y:b);
275 simd_shuffle!(add, sub, [4, 1, 6, 3])
276}
277
278/// Alternatively adds and subtracts packed single-precision (32-bit)
279/// floating-point elements in `a` to/from packed elements in `b`.
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps)
282#[inline]
283#[target_feature(enable = "avx")]
284#[cfg_attr(test, assert_instr(vaddsubps))]
285#[stable(feature = "simd_x86", since = "1.27.0")]
286pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
287 let a: f32x8 = a.as_f32x8();
288 let b: f32x8 = b.as_f32x8();
289 let add: f32x8 = simd_add(x:a, y:b);
290 let sub: f32x8 = simd_sub(x:a, y:b);
291 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
292}
293
294/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
295/// from packed elements in `a`.
296///
297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd)
298#[inline]
299#[target_feature(enable = "avx")]
300#[cfg_attr(test, assert_instr(vsubpd))]
301#[stable(feature = "simd_x86", since = "1.27.0")]
302pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
303 simd_sub(x:a, y:b)
304}
305
306/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
307/// from packed elements in `a`.
308///
309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps)
310#[inline]
311#[target_feature(enable = "avx")]
312#[cfg_attr(test, assert_instr(vsubps))]
313#[stable(feature = "simd_x86", since = "1.27.0")]
314pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
315 simd_sub(x:a, y:b)
316}
317
318/// Computes the division of each of the 8 packed 32-bit floating-point elements
319/// in `a` by the corresponding packed elements in `b`.
320///
321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps)
322#[inline]
323#[target_feature(enable = "avx")]
324#[cfg_attr(test, assert_instr(vdivps))]
325#[stable(feature = "simd_x86", since = "1.27.0")]
326pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
327 simd_div(x:a, y:b)
328}
329
330/// Computes the division of each of the 4 packed 64-bit floating-point elements
331/// in `a` by the corresponding packed elements in `b`.
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd)
334#[inline]
335#[target_feature(enable = "avx")]
336#[cfg_attr(test, assert_instr(vdivpd))]
337#[stable(feature = "simd_x86", since = "1.27.0")]
338pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
339 simd_div(x:a, y:b)
340}
341
342/// Rounds packed double-precision (64-bit) floating point elements in `a`
343/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
344///
345/// - `0x00`: Round to the nearest whole number.
346/// - `0x01`: Round down, toward negative infinity.
347/// - `0x02`: Round up, toward positive infinity.
348/// - `0x03`: Truncate the values.
349///
350/// For a complete list of options, check [the LLVM docs][llvm_docs].
351///
352/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
353///
354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)
355#[inline]
356#[target_feature(enable = "avx")]
357#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
358#[rustc_legacy_const_generics(1)]
359#[stable(feature = "simd_x86", since = "1.27.0")]
360pub unsafe fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
361 static_assert_uimm_bits!(ROUNDING, 4);
362 roundpd256(a, ROUNDING)
363}
364
365/// Rounds packed double-precision (64-bit) floating point elements in `a`
366/// toward positive infinity.
367///
368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd)
369#[inline]
370#[target_feature(enable = "avx")]
371#[cfg_attr(test, assert_instr(vroundpd))]
372#[stable(feature = "simd_x86", since = "1.27.0")]
373pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d {
374 simd_ceil(a)
375}
376
377/// Rounds packed double-precision (64-bit) floating point elements in `a`
378/// toward negative infinity.
379///
380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd)
381#[inline]
382#[target_feature(enable = "avx")]
383#[cfg_attr(test, assert_instr(vroundpd))]
384#[stable(feature = "simd_x86", since = "1.27.0")]
385pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d {
386 simd_floor(a)
387}
388
389/// Rounds packed single-precision (32-bit) floating point elements in `a`
390/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
391///
392/// - `0x00`: Round to the nearest whole number.
393/// - `0x01`: Round down, toward negative infinity.
394/// - `0x02`: Round up, toward positive infinity.
395/// - `0x03`: Truncate the values.
396///
397/// For a complete list of options, check [the LLVM docs][llvm_docs].
398///
399/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)
402#[inline]
403#[target_feature(enable = "avx")]
404#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
405#[rustc_legacy_const_generics(1)]
406#[stable(feature = "simd_x86", since = "1.27.0")]
407pub unsafe fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
408 static_assert_uimm_bits!(ROUNDING, 4);
409 roundps256(a, ROUNDING)
410}
411
412/// Rounds packed single-precision (32-bit) floating point elements in `a`
413/// toward positive infinity.
414///
415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps)
416#[inline]
417#[target_feature(enable = "avx")]
418#[cfg_attr(test, assert_instr(vroundps))]
419#[stable(feature = "simd_x86", since = "1.27.0")]
420pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 {
421 simd_ceil(a)
422}
423
424/// Rounds packed single-precision (32-bit) floating point elements in `a`
425/// toward negative infinity.
426///
427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps)
428#[inline]
429#[target_feature(enable = "avx")]
430#[cfg_attr(test, assert_instr(vroundps))]
431#[stable(feature = "simd_x86", since = "1.27.0")]
432pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 {
433 simd_floor(a)
434}
435
436/// Returns the square root of packed single-precision (32-bit) floating point
437/// elements in `a`.
438///
439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)
440#[inline]
441#[target_feature(enable = "avx")]
442#[cfg_attr(test, assert_instr(vsqrtps))]
443#[stable(feature = "simd_x86", since = "1.27.0")]
444pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 {
445 sqrtps256(a)
446}
447
448/// Returns the square root of packed double-precision (64-bit) floating point
449/// elements in `a`.
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)
452#[inline]
453#[target_feature(enable = "avx")]
454#[cfg_attr(test, assert_instr(vsqrtpd))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
457 simd_fsqrt(a)
458}
459
460/// Blends packed double-precision (64-bit) floating-point elements from
461/// `a` and `b` using control mask `imm8`.
462///
463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)
464#[inline]
465#[target_feature(enable = "avx")]
466// Note: LLVM7 prefers single-precision blend instructions when
467// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
468// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
469#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
470#[rustc_legacy_const_generics(2)]
471#[stable(feature = "simd_x86", since = "1.27.0")]
472pub unsafe fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
473 static_assert_uimm_bits!(IMM4, 4);
474 simd_shuffle!(
475 a,
476 b,
477 [
478 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
479 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
480 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
481 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
482 ],
483 )
484}
485
486/// Blends packed single-precision (32-bit) floating-point elements from
487/// `a` and `b` using control mask `imm8`.
488///
489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps)
490#[inline]
491#[target_feature(enable = "avx")]
492#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
493#[rustc_legacy_const_generics(2)]
494#[stable(feature = "simd_x86", since = "1.27.0")]
495pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
496 static_assert_uimm_bits!(IMM8, 8);
497 simd_shuffle!(
498 a,
499 b,
500 [
501 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
502 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
503 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
504 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
505 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
506 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
507 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
508 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
509 ],
510 )
511}
512
513/// Blends packed double-precision (64-bit) floating-point elements from
514/// `a` and `b` using `c` as a mask.
515///
516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd)
517#[inline]
518#[target_feature(enable = "avx")]
519#[cfg_attr(test, assert_instr(vblendvpd))]
520#[stable(feature = "simd_x86", since = "1.27.0")]
521pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
522 let mask: i64x4 = simd_lt(x:transmute::<_, i64x4>(c), y:i64x4::splat(0));
523 transmute(src:simd_select(m:mask, a:b.as_f64x4(), b:a.as_f64x4()))
524}
525
526/// Blends packed single-precision (32-bit) floating-point elements from
527/// `a` and `b` using `c` as a mask.
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
530#[inline]
531#[target_feature(enable = "avx")]
532#[cfg_attr(test, assert_instr(vblendvps))]
533#[stable(feature = "simd_x86", since = "1.27.0")]
534pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
535 let mask: i32x8 = simd_lt(x:transmute::<_, i32x8>(c), y:i32x8::splat(0));
536 transmute(src:simd_select(m:mask, a:b.as_f32x8(), b:a.as_f32x8()))
537}
538
539/// Conditionally multiplies the packed single-precision (32-bit) floating-point
540/// elements in `a` and `b` using the high 4 bits in `imm8`,
541/// sum the four products, and conditionally return the sum
542/// using the low 4 bits of `imm8`.
543///
544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)
545#[inline]
546#[target_feature(enable = "avx")]
547#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
548#[rustc_legacy_const_generics(2)]
549#[stable(feature = "simd_x86", since = "1.27.0")]
550pub unsafe fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
551 static_assert_uimm_bits!(IMM8, 8);
552 vdpps(a, b, IMM8)
553}
554
555/// Horizontal addition of adjacent pairs in the two packed vectors
556/// of 4 64-bit floating points `a` and `b`.
557/// In the result, sums of elements from `a` are returned in even locations,
558/// while sums of elements from `b` are returned in odd locations.
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd)
561#[inline]
562#[target_feature(enable = "avx")]
563#[cfg_attr(test, assert_instr(vhaddpd))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
566 vhaddpd(a, b)
567}
568
569/// Horizontal addition of adjacent pairs in the two packed vectors
570/// of 8 32-bit floating points `a` and `b`.
571/// In the result, sums of elements from `a` are returned in locations of
572/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
573/// 2, 3, 6, 7.
574///
575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps)
576#[inline]
577#[target_feature(enable = "avx")]
578#[cfg_attr(test, assert_instr(vhaddps))]
579#[stable(feature = "simd_x86", since = "1.27.0")]
580pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
581 vhaddps(a, b)
582}
583
584/// Horizontal subtraction of adjacent pairs in the two packed vectors
585/// of 4 64-bit floating points `a` and `b`.
586/// In the result, sums of elements from `a` are returned in even locations,
587/// while sums of elements from `b` are returned in odd locations.
588///
589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd)
590#[inline]
591#[target_feature(enable = "avx")]
592#[cfg_attr(test, assert_instr(vhsubpd))]
593#[stable(feature = "simd_x86", since = "1.27.0")]
594pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
595 vhsubpd(a, b)
596}
597
598/// Horizontal subtraction of adjacent pairs in the two packed vectors
599/// of 8 32-bit floating points `a` and `b`.
600/// In the result, sums of elements from `a` are returned in locations of
601/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
602/// 2, 3, 6, 7.
603///
604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps)
605#[inline]
606#[target_feature(enable = "avx")]
607#[cfg_attr(test, assert_instr(vhsubps))]
608#[stable(feature = "simd_x86", since = "1.27.0")]
609pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
610 vhsubps(a, b)
611}
612
613/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
614/// elements in `a` and `b`.
615///
616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
617#[inline]
618#[target_feature(enable = "avx")]
619// FIXME Should be 'vxorpd' instruction.
620#[cfg_attr(test, assert_instr(vxorps))]
621#[stable(feature = "simd_x86", since = "1.27.0")]
622pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
623 let a: u64x4 = transmute(src:a);
624 let b: u64x4 = transmute(src:b);
625 transmute(src:simd_xor(x:a, y:b))
626}
627
628/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
629/// elements in `a` and `b`.
630///
631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps)
632#[inline]
633#[target_feature(enable = "avx")]
634#[cfg_attr(test, assert_instr(vxorps))]
635#[stable(feature = "simd_x86", since = "1.27.0")]
636pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
637 let a: u32x8 = transmute(src:a);
638 let b: u32x8 = transmute(src:b);
639 transmute(src:simd_xor(x:a, y:b))
640}
641
642/// Equal (ordered, non-signaling)
643#[stable(feature = "simd_x86", since = "1.27.0")]
644pub const _CMP_EQ_OQ: i32 = 0x00;
645/// Less-than (ordered, signaling)
646#[stable(feature = "simd_x86", since = "1.27.0")]
647pub const _CMP_LT_OS: i32 = 0x01;
648/// Less-than-or-equal (ordered, signaling)
649#[stable(feature = "simd_x86", since = "1.27.0")]
650pub const _CMP_LE_OS: i32 = 0x02;
651/// Unordered (non-signaling)
652#[stable(feature = "simd_x86", since = "1.27.0")]
653pub const _CMP_UNORD_Q: i32 = 0x03;
654/// Not-equal (unordered, non-signaling)
655#[stable(feature = "simd_x86", since = "1.27.0")]
656pub const _CMP_NEQ_UQ: i32 = 0x04;
657/// Not-less-than (unordered, signaling)
658#[stable(feature = "simd_x86", since = "1.27.0")]
659pub const _CMP_NLT_US: i32 = 0x05;
660/// Not-less-than-or-equal (unordered, signaling)
661#[stable(feature = "simd_x86", since = "1.27.0")]
662pub const _CMP_NLE_US: i32 = 0x06;
663/// Ordered (non-signaling)
664#[stable(feature = "simd_x86", since = "1.27.0")]
665pub const _CMP_ORD_Q: i32 = 0x07;
666/// Equal (unordered, non-signaling)
667#[stable(feature = "simd_x86", since = "1.27.0")]
668pub const _CMP_EQ_UQ: i32 = 0x08;
669/// Not-greater-than-or-equal (unordered, signaling)
670#[stable(feature = "simd_x86", since = "1.27.0")]
671pub const _CMP_NGE_US: i32 = 0x09;
672/// Not-greater-than (unordered, signaling)
673#[stable(feature = "simd_x86", since = "1.27.0")]
674pub const _CMP_NGT_US: i32 = 0x0a;
675/// False (ordered, non-signaling)
676#[stable(feature = "simd_x86", since = "1.27.0")]
677pub const _CMP_FALSE_OQ: i32 = 0x0b;
678/// Not-equal (ordered, non-signaling)
679#[stable(feature = "simd_x86", since = "1.27.0")]
680pub const _CMP_NEQ_OQ: i32 = 0x0c;
681/// Greater-than-or-equal (ordered, signaling)
682#[stable(feature = "simd_x86", since = "1.27.0")]
683pub const _CMP_GE_OS: i32 = 0x0d;
684/// Greater-than (ordered, signaling)
685#[stable(feature = "simd_x86", since = "1.27.0")]
686pub const _CMP_GT_OS: i32 = 0x0e;
687/// True (unordered, non-signaling)
688#[stable(feature = "simd_x86", since = "1.27.0")]
689pub const _CMP_TRUE_UQ: i32 = 0x0f;
690/// Equal (ordered, signaling)
691#[stable(feature = "simd_x86", since = "1.27.0")]
692pub const _CMP_EQ_OS: i32 = 0x10;
693/// Less-than (ordered, non-signaling)
694#[stable(feature = "simd_x86", since = "1.27.0")]
695pub const _CMP_LT_OQ: i32 = 0x11;
696/// Less-than-or-equal (ordered, non-signaling)
697#[stable(feature = "simd_x86", since = "1.27.0")]
698pub const _CMP_LE_OQ: i32 = 0x12;
699/// Unordered (signaling)
700#[stable(feature = "simd_x86", since = "1.27.0")]
701pub const _CMP_UNORD_S: i32 = 0x13;
702/// Not-equal (unordered, signaling)
703#[stable(feature = "simd_x86", since = "1.27.0")]
704pub const _CMP_NEQ_US: i32 = 0x14;
705/// Not-less-than (unordered, non-signaling)
706#[stable(feature = "simd_x86", since = "1.27.0")]
707pub const _CMP_NLT_UQ: i32 = 0x15;
708/// Not-less-than-or-equal (unordered, non-signaling)
709#[stable(feature = "simd_x86", since = "1.27.0")]
710pub const _CMP_NLE_UQ: i32 = 0x16;
711/// Ordered (signaling)
712#[stable(feature = "simd_x86", since = "1.27.0")]
713pub const _CMP_ORD_S: i32 = 0x17;
714/// Equal (unordered, signaling)
715#[stable(feature = "simd_x86", since = "1.27.0")]
716pub const _CMP_EQ_US: i32 = 0x18;
717/// Not-greater-than-or-equal (unordered, non-signaling)
718#[stable(feature = "simd_x86", since = "1.27.0")]
719pub const _CMP_NGE_UQ: i32 = 0x19;
720/// Not-greater-than (unordered, non-signaling)
721#[stable(feature = "simd_x86", since = "1.27.0")]
722pub const _CMP_NGT_UQ: i32 = 0x1a;
723/// False (ordered, signaling)
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub const _CMP_FALSE_OS: i32 = 0x1b;
726/// Not-equal (ordered, signaling)
727#[stable(feature = "simd_x86", since = "1.27.0")]
728pub const _CMP_NEQ_OS: i32 = 0x1c;
729/// Greater-than-or-equal (ordered, non-signaling)
730#[stable(feature = "simd_x86", since = "1.27.0")]
731pub const _CMP_GE_OQ: i32 = 0x1d;
732/// Greater-than (ordered, non-signaling)
733#[stable(feature = "simd_x86", since = "1.27.0")]
734pub const _CMP_GT_OQ: i32 = 0x1e;
735/// True (unordered, signaling)
736#[stable(feature = "simd_x86", since = "1.27.0")]
737pub const _CMP_TRUE_US: i32 = 0x1f;
738
739/// Compares packed double-precision (64-bit) floating-point
740/// elements in `a` and `b` based on the comparison operand
741/// specified by `IMM5`.
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
744#[inline]
745#[target_feature(enable = "avx,sse2")]
746#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
747#[rustc_legacy_const_generics(2)]
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub unsafe fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
750 static_assert_uimm_bits!(IMM5, 5);
751 vcmppd(a, b, imm8:const { IMM5 as i8 })
752}
753
754/// Compares packed double-precision (64-bit) floating-point
755/// elements in `a` and `b` based on the comparison operand
756/// specified by `IMM5`.
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)
759#[inline]
760#[target_feature(enable = "avx")]
761#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
762#[rustc_legacy_const_generics(2)]
763#[stable(feature = "simd_x86", since = "1.27.0")]
764pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
765 static_assert_uimm_bits!(IMM5, 5);
766 vcmppd256(a, b, IMM5 as u8)
767}
768
769/// Compares packed single-precision (32-bit) floating-point
770/// elements in `a` and `b` based on the comparison operand
771/// specified by `IMM5`.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
774#[inline]
775#[target_feature(enable = "avx,sse")]
776#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
777#[rustc_legacy_const_generics(2)]
778#[stable(feature = "simd_x86", since = "1.27.0")]
779pub unsafe fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
780 static_assert_uimm_bits!(IMM5, 5);
781 vcmpps(a, b, imm8:const { IMM5 as i8 })
782}
783
784/// Compares packed single-precision (32-bit) floating-point
785/// elements in `a` and `b` based on the comparison operand
786/// specified by `IMM5`.
787///
788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)
789#[inline]
790#[target_feature(enable = "avx")]
791#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
792#[rustc_legacy_const_generics(2)]
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
795 static_assert_uimm_bits!(IMM5, 5);
796 vcmpps256(a, b, imm8:const { IMM5 as u8 })
797}
798
799/// Compares the lower double-precision (64-bit) floating-point element in
800/// `a` and `b` based on the comparison operand specified by `IMM5`,
801/// store the result in the lower element of returned vector,
802/// and copies the upper element from `a` to the upper element of returned
803/// vector.
804///
805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
806#[inline]
807#[target_feature(enable = "avx,sse2")]
808#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd
809#[rustc_legacy_const_generics(2)]
810#[stable(feature = "simd_x86", since = "1.27.0")]
811pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
812 static_assert_uimm_bits!(IMM5, 5);
813 vcmpsd(a, b, IMM5 as i8)
814}
815
816/// Compares the lower single-precision (32-bit) floating-point element in
817/// `a` and `b` based on the comparison operand specified by `IMM5`,
818/// store the result in the lower element of returned vector,
819/// and copies the upper 3 packed elements from `a` to the upper elements of
820/// returned vector.
821///
822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
823#[inline]
824#[target_feature(enable = "avx,sse")]
825#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss
826#[rustc_legacy_const_generics(2)]
827#[stable(feature = "simd_x86", since = "1.27.0")]
828pub unsafe fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
829 static_assert_uimm_bits!(IMM5, 5);
830 vcmpss(a, b, IMM5 as i8)
831}
832
833/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
834/// floating-point elements.
835///
836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd)
837#[inline]
838#[target_feature(enable = "avx")]
839#[cfg_attr(test, assert_instr(vcvtdq2pd))]
840#[stable(feature = "simd_x86", since = "1.27.0")]
841pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
842 simd_cast(a.as_i32x4())
843}
844
845/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
846/// floating-point elements.
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps)
849#[inline]
850#[target_feature(enable = "avx")]
851#[cfg_attr(test, assert_instr(vcvtdq2ps))]
852#[stable(feature = "simd_x86", since = "1.27.0")]
853pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
854 vcvtdq2ps(a.as_i32x8())
855}
856
857/// Converts packed double-precision (64-bit) floating-point elements in `a`
858/// to packed single-precision (32-bit) floating-point elements.
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps)
861#[inline]
862#[target_feature(enable = "avx")]
863#[cfg_attr(test, assert_instr(vcvtpd2ps))]
864#[stable(feature = "simd_x86", since = "1.27.0")]
865pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
866 vcvtpd2ps(a)
867}
868
869/// Converts packed single-precision (32-bit) floating-point elements in `a`
870/// to packed 32-bit integers.
871///
872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)
873#[inline]
874#[target_feature(enable = "avx")]
875#[cfg_attr(test, assert_instr(vcvtps2dq))]
876#[stable(feature = "simd_x86", since = "1.27.0")]
877pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
878 transmute(src:vcvtps2dq(a))
879}
880
881/// Converts packed single-precision (32-bit) floating-point elements in `a`
882/// to packed double-precision (64-bit) floating-point elements.
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd)
885#[inline]
886#[target_feature(enable = "avx")]
887#[cfg_attr(test, assert_instr(vcvtps2pd))]
888#[stable(feature = "simd_x86", since = "1.27.0")]
889pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d {
890 simd_cast(a)
891}
892
893/// Converts packed double-precision (64-bit) floating-point elements in `a`
894/// to packed 32-bit integers with truncation.
895///
896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)
897#[inline]
898#[target_feature(enable = "avx")]
899#[cfg_attr(test, assert_instr(vcvttpd2dq))]
900#[stable(feature = "simd_x86", since = "1.27.0")]
901pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
902 transmute(src:vcvttpd2dq(a))
903}
904
905/// Converts packed double-precision (64-bit) floating-point elements in `a`
906/// to packed 32-bit integers.
907///
908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)
909#[inline]
910#[target_feature(enable = "avx")]
911#[cfg_attr(test, assert_instr(vcvtpd2dq))]
912#[stable(feature = "simd_x86", since = "1.27.0")]
913pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
914 transmute(src:vcvtpd2dq(a))
915}
916
917/// Converts packed single-precision (32-bit) floating-point elements in `a`
918/// to packed 32-bit integers with truncation.
919///
920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)
921#[inline]
922#[target_feature(enable = "avx")]
923#[cfg_attr(test, assert_instr(vcvttps2dq))]
924#[stable(feature = "simd_x86", since = "1.27.0")]
925pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
926 transmute(src:vcvttps2dq(a))
927}
928
929/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
930/// floating-point elements) from `a`, selected with `imm8`.
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps)
933#[inline]
934#[target_feature(enable = "avx")]
935#[cfg_attr(
936 all(test, not(target_os = "windows")),
937 assert_instr(vextractf128, IMM1 = 1)
938)]
939#[rustc_legacy_const_generics(1)]
940#[stable(feature = "simd_x86", since = "1.27.0")]
941pub unsafe fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
942 static_assert_uimm_bits!(IMM1, 1);
943 simd_shuffle!(
944 a,
945 _mm256_undefined_ps(),
946 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
947 )
948}
949
950/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
951/// floating-point elements) from `a`, selected with `imm8`.
952///
953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd)
954#[inline]
955#[target_feature(enable = "avx")]
956#[cfg_attr(
957 all(test, not(target_os = "windows")),
958 assert_instr(vextractf128, IMM1 = 1)
959)]
960#[rustc_legacy_const_generics(1)]
961#[stable(feature = "simd_x86", since = "1.27.0")]
962pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
963 static_assert_uimm_bits!(IMM1, 1);
964 simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize])
965}
966
967/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
968///
969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256)
970#[inline]
971#[target_feature(enable = "avx")]
972#[cfg_attr(
973 all(test, not(target_os = "windows")),
974 assert_instr(vextractf128, IMM1 = 1)
975)]
976#[rustc_legacy_const_generics(1)]
977#[stable(feature = "simd_x86", since = "1.27.0")]
978pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
979 static_assert_uimm_bits!(IMM1, 1);
980 let dst: i64x2 = simd_shuffle!(
981 a.as_i64x4(),
982 _mm256_undefined_si256().as_i64x4(),
983 [[0, 1], [2, 3]][IMM1 as usize],
984 );
985 transmute(src:dst)
986}
987
988/// Zeroes the contents of all XMM or YMM registers.
989///
990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
991#[inline]
992#[target_feature(enable = "avx")]
993#[cfg_attr(test, assert_instr(vzeroall))]
994#[stable(feature = "simd_x86", since = "1.27.0")]
995pub unsafe fn _mm256_zeroall() {
996 vzeroall()
997}
998
999/// Zeroes the upper 128 bits of all YMM registers;
1000/// the lower 128-bits of the registers are unmodified.
1001///
1002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
1003#[inline]
1004#[target_feature(enable = "avx")]
1005#[cfg_attr(test, assert_instr(vzeroupper))]
1006#[stable(feature = "simd_x86", since = "1.27.0")]
1007pub unsafe fn _mm256_zeroupper() {
1008 vzeroupper()
1009}
1010
1011/// Shuffles single-precision (32-bit) floating-point elements in `a`
1012/// within 128-bit lanes using the control in `b`.
1013///
1014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)
1015#[inline]
1016#[target_feature(enable = "avx")]
1017#[cfg_attr(test, assert_instr(vpermilps))]
1018#[stable(feature = "simd_x86", since = "1.27.0")]
1019pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1020 vpermilps256(a, b:b.as_i32x8())
1021}
1022
1023/// Shuffles single-precision (32-bit) floating-point elements in `a`
1024/// using the control in `b`.
1025///
1026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)
1027#[inline]
1028#[target_feature(enable = "avx")]
1029#[cfg_attr(test, assert_instr(vpermilps))]
1030#[stable(feature = "simd_x86", since = "1.27.0")]
1031pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1032 vpermilps(a, b:b.as_i32x4())
1033}
1034
1035/// Shuffles single-precision (32-bit) floating-point elements in `a`
1036/// within 128-bit lanes using the control in `imm8`.
1037///
1038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps)
1039#[inline]
1040#[target_feature(enable = "avx")]
1041#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1042#[rustc_legacy_const_generics(1)]
1043#[stable(feature = "simd_x86", since = "1.27.0")]
1044pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1045 static_assert_uimm_bits!(IMM8, 8);
1046 simd_shuffle!(
1047 a,
1048 _mm256_undefined_ps(),
1049 [
1050 (IMM8 as u32 >> 0) & 0b11,
1051 (IMM8 as u32 >> 2) & 0b11,
1052 (IMM8 as u32 >> 4) & 0b11,
1053 (IMM8 as u32 >> 6) & 0b11,
1054 ((IMM8 as u32 >> 0) & 0b11) + 4,
1055 ((IMM8 as u32 >> 2) & 0b11) + 4,
1056 ((IMM8 as u32 >> 4) & 0b11) + 4,
1057 ((IMM8 as u32 >> 6) & 0b11) + 4,
1058 ],
1059 )
1060}
1061
1062/// Shuffles single-precision (32-bit) floating-point elements in `a`
1063/// using the control in `imm8`.
1064///
1065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
1066#[inline]
1067#[target_feature(enable = "avx,sse")]
1068#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1069#[rustc_legacy_const_generics(1)]
1070#[stable(feature = "simd_x86", since = "1.27.0")]
1071pub unsafe fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1072 static_assert_uimm_bits!(IMM8, 8);
1073 simd_shuffle!(
1074 a,
1075 _mm_undefined_ps(),
1076 [
1077 (IMM8 as u32 >> 0) & 0b11,
1078 (IMM8 as u32 >> 2) & 0b11,
1079 (IMM8 as u32 >> 4) & 0b11,
1080 (IMM8 as u32 >> 6) & 0b11,
1081 ],
1082 )
1083}
1084
1085/// Shuffles double-precision (64-bit) floating-point elements in `a`
1086/// within 256-bit lanes using the control in `b`.
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)
1089#[inline]
1090#[target_feature(enable = "avx")]
1091#[cfg_attr(test, assert_instr(vpermilpd))]
1092#[stable(feature = "simd_x86", since = "1.27.0")]
1093pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1094 vpermilpd256(a, b:b.as_i64x4())
1095}
1096
1097/// Shuffles double-precision (64-bit) floating-point elements in `a`
1098/// using the control in `b`.
1099///
1100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)
1101#[inline]
1102#[target_feature(enable = "avx")]
1103#[cfg_attr(test, assert_instr(vpermilpd))]
1104#[stable(feature = "simd_x86", since = "1.27.0")]
1105pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1106 vpermilpd(a, b:b.as_i64x2())
1107}
1108
1109/// Shuffles double-precision (64-bit) floating-point elements in `a`
1110/// within 128-bit lanes using the control in `imm8`.
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd)
1113#[inline]
1114#[target_feature(enable = "avx")]
1115#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1116#[rustc_legacy_const_generics(1)]
1117#[stable(feature = "simd_x86", since = "1.27.0")]
1118pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1119 static_assert_uimm_bits!(IMM4, 4);
1120 simd_shuffle!(
1121 a,
1122 _mm256_undefined_pd(),
1123 [
1124 ((IMM4 as u32 >> 0) & 1),
1125 ((IMM4 as u32 >> 1) & 1),
1126 ((IMM4 as u32 >> 2) & 1) + 2,
1127 ((IMM4 as u32 >> 3) & 1) + 2,
1128 ],
1129 )
1130}
1131
1132/// Shuffles double-precision (64-bit) floating-point elements in `a`
1133/// using the control in `imm8`.
1134///
1135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
1136#[inline]
1137#[target_feature(enable = "avx,sse2")]
1138#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1139#[rustc_legacy_const_generics(1)]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub unsafe fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1142 static_assert_uimm_bits!(IMM2, 2);
1143 simd_shuffle!(
1144 a,
1145 _mm_undefined_pd(),
1146 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1147 )
1148}
1149
1150/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
1151/// floating-point elements) selected by `imm8` from `a` and `b`.
1152///
1153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps)
1154#[inline]
1155#[target_feature(enable = "avx")]
1156#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1157#[rustc_legacy_const_generics(2)]
1158#[stable(feature = "simd_x86", since = "1.27.0")]
1159pub unsafe fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1160 static_assert_uimm_bits!(IMM8, 8);
1161 vperm2f128ps256(a, b, IMM8 as i8)
1162}
1163
1164/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
1165/// floating-point elements) selected by `imm8` from `a` and `b`.
1166///
1167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd)
1168#[inline]
1169#[target_feature(enable = "avx")]
1170#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1171#[rustc_legacy_const_generics(2)]
1172#[stable(feature = "simd_x86", since = "1.27.0")]
1173pub unsafe fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1174 static_assert_uimm_bits!(IMM8, 8);
1175 vperm2f128pd256(a, b, IMM8 as i8)
1176}
1177
1178/// Shuffles 128-bits (composed of integer data) selected by `imm8`
1179/// from `a` and `b`.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256)
1182#[inline]
1183#[target_feature(enable = "avx")]
1184#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1185#[rustc_legacy_const_generics(2)]
1186#[stable(feature = "simd_x86", since = "1.27.0")]
1187pub unsafe fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1188 static_assert_uimm_bits!(IMM8, 8);
1189 transmute(src:vperm2f128si256(a:a.as_i32x8(), b:b.as_i32x8(), IMM8 as i8))
1190}
1191
1192/// Broadcasts a single-precision (32-bit) floating-point element from memory
1193/// to all elements of the returned vector.
1194///
1195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss)
1196#[inline]
1197#[target_feature(enable = "avx")]
1198#[cfg_attr(test, assert_instr(vbroadcastss))]
1199#[stable(feature = "simd_x86", since = "1.27.0")]
1200#[allow(clippy::trivially_copy_pass_by_ref)]
1201pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1202 _mm256_set1_ps(*f)
1203}
1204
1205/// Broadcasts a single-precision (32-bit) floating-point element from memory
1206/// to all elements of the returned vector.
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss)
1209#[inline]
1210#[target_feature(enable = "avx")]
1211#[cfg_attr(test, assert_instr(vbroadcastss))]
1212#[stable(feature = "simd_x86", since = "1.27.0")]
1213#[allow(clippy::trivially_copy_pass_by_ref)]
1214pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 {
1215 _mm_set1_ps(*f)
1216}
1217
1218/// Broadcasts a double-precision (64-bit) floating-point element from memory
1219/// to all elements of the returned vector.
1220///
1221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd)
1222#[inline]
1223#[target_feature(enable = "avx")]
1224#[cfg_attr(test, assert_instr(vbroadcastsd))]
1225#[stable(feature = "simd_x86", since = "1.27.0")]
1226#[allow(clippy::trivially_copy_pass_by_ref)]
1227pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1228 _mm256_set1_pd(*f)
1229}
1230
1231/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
1232/// (32-bit) floating-point elements) to all elements of the returned vector.
1233///
1234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps)
1235#[inline]
1236#[target_feature(enable = "avx")]
1237#[cfg_attr(test, assert_instr(vbroadcastf128))]
1238#[stable(feature = "simd_x86", since = "1.27.0")]
1239pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1240 vbroadcastf128ps256(a)
1241}
1242
1243/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
1244/// (64-bit) floating-point elements) to all elements of the returned vector.
1245///
1246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd)
1247#[inline]
1248#[target_feature(enable = "avx")]
1249#[cfg_attr(test, assert_instr(vbroadcastf128))]
1250#[stable(feature = "simd_x86", since = "1.27.0")]
1251pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1252 vbroadcastf128pd256(a)
1253}
1254
1255/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
1256/// single-precision (32-bit) floating-point elements) from `b` into result
1257/// at the location specified by `imm8`.
1258///
1259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps)
1260#[inline]
1261#[target_feature(enable = "avx")]
1262#[cfg_attr(
1263 all(test, not(target_os = "windows")),
1264 assert_instr(vinsertf128, IMM1 = 1)
1265)]
1266#[rustc_legacy_const_generics(2)]
1267#[stable(feature = "simd_x86", since = "1.27.0")]
1268pub unsafe fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1269 static_assert_uimm_bits!(IMM1, 1);
1270 simd_shuffle!(
1271 a,
1272 _mm256_castps128_ps256(b),
1273 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1274 )
1275}
1276
1277/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
1278/// double-precision (64-bit) floating-point elements) from `b` into result
1279/// at the location specified by `imm8`.
1280///
1281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd)
1282#[inline]
1283#[target_feature(enable = "avx")]
1284#[cfg_attr(
1285 all(test, not(target_os = "windows")),
1286 assert_instr(vinsertf128, IMM1 = 1)
1287)]
1288#[rustc_legacy_const_generics(2)]
1289#[stable(feature = "simd_x86", since = "1.27.0")]
1290pub unsafe fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1291 static_assert_uimm_bits!(IMM1, 1);
1292 simd_shuffle!(
1293 a,
1294 _mm256_castpd128_pd256(b),
1295 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1296 )
1297}
1298
1299/// Copies `a` to result, then inserts 128 bits from `b` into result
1300/// at the location specified by `imm8`.
1301///
1302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256)
1303#[inline]
1304#[target_feature(enable = "avx")]
1305#[cfg_attr(
1306 all(test, not(target_os = "windows")),
1307 assert_instr(vinsertf128, IMM1 = 1)
1308)]
1309#[rustc_legacy_const_generics(2)]
1310#[stable(feature = "simd_x86", since = "1.27.0")]
1311pub unsafe fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1312 static_assert_uimm_bits!(IMM1, 1);
1313 let dst: i64x4 = simd_shuffle!(
1314 a.as_i64x4(),
1315 _mm256_castsi128_si256(b).as_i64x4(),
1316 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1317 );
1318 transmute(src:dst)
1319}
1320
1321/// Copies `a` to result, and inserts the 8-bit integer `i` into result
1322/// at the location specified by `index`.
1323///
1324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8)
1325#[inline]
1326#[target_feature(enable = "avx")]
1327// This intrinsic has no corresponding instruction.
1328#[rustc_legacy_const_generics(2)]
1329#[stable(feature = "simd_x86", since = "1.27.0")]
1330pub unsafe fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1331 static_assert_uimm_bits!(INDEX, 5);
1332 transmute(src:simd_insert(x:a.as_i8x32(), INDEX as u32, val:i))
1333}
1334
1335/// Copies `a` to result, and inserts the 16-bit integer `i` into result
1336/// at the location specified by `index`.
1337///
1338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16)
1339#[inline]
1340#[target_feature(enable = "avx")]
1341// This intrinsic has no corresponding instruction.
1342#[rustc_legacy_const_generics(2)]
1343#[stable(feature = "simd_x86", since = "1.27.0")]
1344pub unsafe fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1345 static_assert_uimm_bits!(INDEX, 4);
1346 transmute(src:simd_insert(x:a.as_i16x16(), INDEX as u32, val:i))
1347}
1348
1349/// Copies `a` to result, and inserts the 32-bit integer `i` into result
1350/// at the location specified by `index`.
1351///
1352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32)
1353#[inline]
1354#[target_feature(enable = "avx")]
1355// This intrinsic has no corresponding instruction.
1356#[rustc_legacy_const_generics(2)]
1357#[stable(feature = "simd_x86", since = "1.27.0")]
1358pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1359 static_assert_uimm_bits!(INDEX, 3);
1360 transmute(src:simd_insert(x:a.as_i32x8(), INDEX as u32, val:i))
1361}
1362
1363/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1364/// floating-point elements) from memory into result.
1365/// `mem_addr` must be aligned on a 32-byte boundary or a
1366/// general-protection exception may be generated.
1367///
1368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1369#[inline]
1370#[target_feature(enable = "avx")]
1371#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373#[allow(clippy::cast_ptr_alignment)]
1374pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1375 *(mem_addr as *const __m256d)
1376}
1377
1378/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1379/// floating-point elements) from `a` into memory.
1380/// `mem_addr` must be aligned on a 32-byte boundary or a
1381/// general-protection exception may be generated.
1382///
1383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1384#[inline]
1385#[target_feature(enable = "avx")]
1386#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1387#[stable(feature = "simd_x86", since = "1.27.0")]
1388#[allow(clippy::cast_ptr_alignment)]
1389pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1390 *(mem_addr as *mut __m256d) = a;
1391}
1392
1393/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1394/// floating-point elements) from memory into result.
1395/// `mem_addr` must be aligned on a 32-byte boundary or a
1396/// general-protection exception may be generated.
1397///
1398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
1399#[inline]
1400#[target_feature(enable = "avx")]
1401#[cfg_attr(test, assert_instr(vmovaps))]
1402#[stable(feature = "simd_x86", since = "1.27.0")]
1403#[allow(clippy::cast_ptr_alignment)]
1404pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1405 *(mem_addr as *const __m256)
1406}
1407
1408/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1409/// floating-point elements) from `a` into memory.
1410/// `mem_addr` must be aligned on a 32-byte boundary or a
1411/// general-protection exception may be generated.
1412///
1413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
1414#[inline]
1415#[target_feature(enable = "avx")]
1416#[cfg_attr(test, assert_instr(vmovaps))]
1417#[stable(feature = "simd_x86", since = "1.27.0")]
1418#[allow(clippy::cast_ptr_alignment)]
1419pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1420 *(mem_addr as *mut __m256) = a;
1421}
1422
1423/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1424/// floating-point elements) from memory into result.
1425/// `mem_addr` does not need to be aligned on any particular boundary.
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1428#[inline]
1429#[target_feature(enable = "avx")]
1430#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1431#[stable(feature = "simd_x86", since = "1.27.0")]
1432pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1433 let mut dst: __m256d = _mm256_undefined_pd();
1434 ptr::copy_nonoverlapping(
1435 src:mem_addr as *const u8,
1436 &mut dst as *mut __m256d as *mut u8,
1437 count:mem::size_of::<__m256d>(),
1438 );
1439 dst
1440}
1441
1442/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1443/// floating-point elements) from `a` into memory.
1444/// `mem_addr` does not need to be aligned on any particular boundary.
1445///
1446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1447#[inline]
1448#[target_feature(enable = "avx")]
1449#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1450#[stable(feature = "simd_x86", since = "1.27.0")]
1451pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1452 mem_addr.cast::<__m256d>().write_unaligned(val:a);
1453}
1454
1455/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1456/// floating-point elements) from memory into result.
1457/// `mem_addr` does not need to be aligned on any particular boundary.
1458///
1459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps)
1460#[inline]
1461#[target_feature(enable = "avx")]
1462#[cfg_attr(test, assert_instr(vmovups))]
1463#[stable(feature = "simd_x86", since = "1.27.0")]
1464pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1465 let mut dst: __m256 = _mm256_undefined_ps();
1466 ptr::copy_nonoverlapping(
1467 src:mem_addr as *const u8,
1468 &mut dst as *mut __m256 as *mut u8,
1469 count:mem::size_of::<__m256>(),
1470 );
1471 dst
1472}
1473
1474/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1475/// floating-point elements) from `a` into memory.
1476/// `mem_addr` does not need to be aligned on any particular boundary.
1477///
1478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps)
1479#[inline]
1480#[target_feature(enable = "avx")]
1481#[cfg_attr(test, assert_instr(vmovups))]
1482#[stable(feature = "simd_x86", since = "1.27.0")]
1483pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1484 mem_addr.cast::<__m256>().write_unaligned(val:a);
1485}
1486
1487/// Loads 256-bits of integer data from memory into result.
1488/// `mem_addr` must be aligned on a 32-byte boundary or a
1489/// general-protection exception may be generated.
1490///
1491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
1492#[inline]
1493#[target_feature(enable = "avx")]
1494#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1495#[stable(feature = "simd_x86", since = "1.27.0")]
1496pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1497 *mem_addr
1498}
1499
1500/// Stores 256-bits of integer data from `a` into memory.
1501/// `mem_addr` must be aligned on a 32-byte boundary or a
1502/// general-protection exception may be generated.
1503///
1504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
1505#[inline]
1506#[target_feature(enable = "avx")]
1507#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected
1508#[stable(feature = "simd_x86", since = "1.27.0")]
1509pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1510 *mem_addr = a;
1511}
1512
1513/// Loads 256-bits of integer data from memory into result.
1514/// `mem_addr` does not need to be aligned on any particular boundary.
1515///
1516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
1517#[inline]
1518#[target_feature(enable = "avx")]
1519#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1520#[stable(feature = "simd_x86", since = "1.27.0")]
1521pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1522 let mut dst: __m256i = _mm256_undefined_si256();
1523 ptr::copy_nonoverlapping(
1524 src:mem_addr as *const u8,
1525 &mut dst as *mut __m256i as *mut u8,
1526 count:mem::size_of::<__m256i>(),
1527 );
1528 dst
1529}
1530
1531/// Stores 256-bits of integer data from `a` into memory.
1532/// `mem_addr` does not need to be aligned on any particular boundary.
1533///
1534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
1535#[inline]
1536#[target_feature(enable = "avx")]
1537#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1538#[stable(feature = "simd_x86", since = "1.27.0")]
1539pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1540 mem_addr.write_unaligned(val:a);
1541}
1542
1543/// Loads packed double-precision (64-bit) floating-point elements from memory
1544/// into result using `mask` (elements are zeroed out when the high bit of the
1545/// corresponding element is not set).
1546///
1547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd)
1548#[inline]
1549#[target_feature(enable = "avx")]
1550#[cfg_attr(test, assert_instr(vmaskmovpd))]
1551#[stable(feature = "simd_x86", since = "1.27.0")]
1552pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1553 maskloadpd256(mem_addr as *const i8, mask:mask.as_i64x4())
1554}
1555
1556/// Stores packed double-precision (64-bit) floating-point elements from `a`
1557/// into memory using `mask`.
1558///
1559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd)
1560#[inline]
1561#[target_feature(enable = "avx")]
1562#[cfg_attr(test, assert_instr(vmaskmovpd))]
1563#[stable(feature = "simd_x86", since = "1.27.0")]
1564pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1565 maskstorepd256(mem_addr as *mut i8, mask:mask.as_i64x4(), a);
1566}
1567
1568/// Loads packed double-precision (64-bit) floating-point elements from memory
1569/// into result using `mask` (elements are zeroed out when the high bit of the
1570/// corresponding element is not set).
1571///
1572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd)
1573#[inline]
1574#[target_feature(enable = "avx")]
1575#[cfg_attr(test, assert_instr(vmaskmovpd))]
1576#[stable(feature = "simd_x86", since = "1.27.0")]
1577pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1578 maskloadpd(mem_addr as *const i8, mask:mask.as_i64x2())
1579}
1580
1581/// Stores packed double-precision (64-bit) floating-point elements from `a`
1582/// into memory using `mask`.
1583///
1584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd)
1585#[inline]
1586#[target_feature(enable = "avx")]
1587#[cfg_attr(test, assert_instr(vmaskmovpd))]
1588#[stable(feature = "simd_x86", since = "1.27.0")]
1589pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1590 maskstorepd(mem_addr as *mut i8, mask:mask.as_i64x2(), a);
1591}
1592
1593/// Loads packed single-precision (32-bit) floating-point elements from memory
1594/// into result using `mask` (elements are zeroed out when the high bit of the
1595/// corresponding element is not set).
1596///
1597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps)
1598#[inline]
1599#[target_feature(enable = "avx")]
1600#[cfg_attr(test, assert_instr(vmaskmovps))]
1601#[stable(feature = "simd_x86", since = "1.27.0")]
1602pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1603 maskloadps256(mem_addr as *const i8, mask:mask.as_i32x8())
1604}
1605
1606/// Stores packed single-precision (32-bit) floating-point elements from `a`
1607/// into memory using `mask`.
1608///
1609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps)
1610#[inline]
1611#[target_feature(enable = "avx")]
1612#[cfg_attr(test, assert_instr(vmaskmovps))]
1613#[stable(feature = "simd_x86", since = "1.27.0")]
1614pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1615 maskstoreps256(mem_addr as *mut i8, mask:mask.as_i32x8(), a);
1616}
1617
1618/// Loads packed single-precision (32-bit) floating-point elements from memory
1619/// into result using `mask` (elements are zeroed out when the high bit of the
1620/// corresponding element is not set).
1621///
1622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps)
1623#[inline]
1624#[target_feature(enable = "avx")]
1625#[cfg_attr(test, assert_instr(vmaskmovps))]
1626#[stable(feature = "simd_x86", since = "1.27.0")]
1627pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1628 maskloadps(mem_addr as *const i8, mask:mask.as_i32x4())
1629}
1630
1631/// Stores packed single-precision (32-bit) floating-point elements from `a`
1632/// into memory using `mask`.
1633///
1634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps)
1635#[inline]
1636#[target_feature(enable = "avx")]
1637#[cfg_attr(test, assert_instr(vmaskmovps))]
1638#[stable(feature = "simd_x86", since = "1.27.0")]
1639pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1640 maskstoreps(mem_addr as *mut i8, mask:mask.as_i32x4(), a);
1641}
1642
1643/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
1644/// from `a`, and returns the results.
1645///
1646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps)
1647#[inline]
1648#[target_feature(enable = "avx")]
1649#[cfg_attr(test, assert_instr(vmovshdup))]
1650#[stable(feature = "simd_x86", since = "1.27.0")]
1651pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1652 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7])
1653}
1654
1655/// Duplicate even-indexed single-precision (32-bit) floating-point elements
1656/// from `a`, and returns the results.
1657///
1658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps)
1659#[inline]
1660#[target_feature(enable = "avx")]
1661#[cfg_attr(test, assert_instr(vmovsldup))]
1662#[stable(feature = "simd_x86", since = "1.27.0")]
1663pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1664 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6])
1665}
1666
1667/// Duplicate even-indexed double-precision (64-bit) floating-point elements
1668/// from `a`, and returns the results.
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd)
1671#[inline]
1672#[target_feature(enable = "avx")]
1673#[cfg_attr(test, assert_instr(vmovddup))]
1674#[stable(feature = "simd_x86", since = "1.27.0")]
1675pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1676 simd_shuffle!(a, a, [0, 0, 2, 2])
1677}
1678
1679/// Loads 256-bits of integer data from unaligned memory into result.
1680/// This intrinsic may perform better than `_mm256_loadu_si256` when the
1681/// data crosses a cache line boundary.
1682///
1683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)
1684#[inline]
1685#[target_feature(enable = "avx")]
1686#[cfg_attr(test, assert_instr(vlddqu))]
1687#[stable(feature = "simd_x86", since = "1.27.0")]
1688pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1689 transmute(src:vlddqu(mem_addr as *const i8))
1690}
1691
1692/// Moves integer data from a 256-bit integer vector to a 32-byte
1693/// aligned memory location. To minimize caching, the data is flagged as
1694/// non-temporal (unlikely to be used again soon)
1695///
1696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
1697#[inline]
1698#[target_feature(enable = "avx")]
1699#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
1700#[stable(feature = "simd_x86", since = "1.27.0")]
1701pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1702 intrinsics::nontemporal_store(ptr:mem_addr, val:a);
1703}
1704
1705/// Moves double-precision values from a 256-bit vector of `[4 x double]`
1706/// to a 32-byte aligned memory location. To minimize caching, the data is
1707/// flagged as non-temporal (unlikely to be used again soon).
1708///
1709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
1710#[inline]
1711#[target_feature(enable = "avx")]
1712#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
1713#[stable(feature = "simd_x86", since = "1.27.0")]
1714#[allow(clippy::cast_ptr_alignment)]
1715pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1716 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256d, val:a);
1717}
1718
1719/// Moves single-precision floating point values from a 256-bit vector
1720/// of `[8 x float]` to a 32-byte aligned memory location. To minimize
1721/// caching, the data is flagged as non-temporal (unlikely to be used again
1722/// soon).
1723///
1724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
1725#[inline]
1726#[target_feature(enable = "avx")]
1727#[cfg_attr(test, assert_instr(vmovntps))]
1728#[stable(feature = "simd_x86", since = "1.27.0")]
1729#[allow(clippy::cast_ptr_alignment)]
1730pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1731 intrinsics::nontemporal_store(ptr:mem_addr as *mut __m256, val:a);
1732}
1733
1734/// Computes the approximate reciprocal of packed single-precision (32-bit)
1735/// floating-point elements in `a`, and returns the results. The maximum
1736/// relative error for this approximation is less than 1.5*2^-12.
1737///
1738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)
1739#[inline]
1740#[target_feature(enable = "avx")]
1741#[cfg_attr(test, assert_instr(vrcpps))]
1742#[stable(feature = "simd_x86", since = "1.27.0")]
1743pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 {
1744 vrcpps(a)
1745}
1746
1747/// Computes the approximate reciprocal square root of packed single-precision
1748/// (32-bit) floating-point elements in `a`, and returns the results.
1749/// The maximum relative error for this approximation is less than 1.5*2^-12.
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)
1752#[inline]
1753#[target_feature(enable = "avx")]
1754#[cfg_attr(test, assert_instr(vrsqrtps))]
1755#[stable(feature = "simd_x86", since = "1.27.0")]
1756pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
1757 vrsqrtps(a)
1758}
1759
1760/// Unpacks and interleave double-precision (64-bit) floating-point elements
1761/// from the high half of each 128-bit lane in `a` and `b`.
1762///
1763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd)
1764#[inline]
1765#[target_feature(enable = "avx")]
1766#[cfg_attr(test, assert_instr(vunpckhpd))]
1767#[stable(feature = "simd_x86", since = "1.27.0")]
1768pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
1769 simd_shuffle!(a, b, [1, 5, 3, 7])
1770}
1771
1772/// Unpacks and interleave single-precision (32-bit) floating-point elements
1773/// from the high half of each 128-bit lane in `a` and `b`.
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps)
1776#[inline]
1777#[target_feature(enable = "avx")]
1778#[cfg_attr(test, assert_instr(vunpckhps))]
1779#[stable(feature = "simd_x86", since = "1.27.0")]
1780pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
1781 simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15])
1782}
1783
1784/// Unpacks and interleave double-precision (64-bit) floating-point elements
1785/// from the low half of each 128-bit lane in `a` and `b`.
1786///
1787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd)
1788#[inline]
1789#[target_feature(enable = "avx")]
1790#[cfg_attr(test, assert_instr(vunpcklpd))]
1791#[stable(feature = "simd_x86", since = "1.27.0")]
1792pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
1793 simd_shuffle!(a, b, [0, 4, 2, 6])
1794}
1795
1796/// Unpacks and interleave single-precision (32-bit) floating-point elements
1797/// from the low half of each 128-bit lane in `a` and `b`.
1798///
1799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps)
1800#[inline]
1801#[target_feature(enable = "avx")]
1802#[cfg_attr(test, assert_instr(vunpcklps))]
1803#[stable(feature = "simd_x86", since = "1.27.0")]
1804pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
1805 simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13])
1806}
1807
1808/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1809/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1810/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1811/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
1812///
1813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
1814#[inline]
1815#[target_feature(enable = "avx")]
1816#[cfg_attr(test, assert_instr(vptest))]
1817#[stable(feature = "simd_x86", since = "1.27.0")]
1818pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
1819 ptestz256(a:a.as_i64x4(), b:b.as_i64x4())
1820}
1821
1822/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1823/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1824/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1825/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
1826///
1827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256)
1828#[inline]
1829#[target_feature(enable = "avx")]
1830#[cfg_attr(test, assert_instr(vptest))]
1831#[stable(feature = "simd_x86", since = "1.27.0")]
1832pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
1833 ptestc256(a:a.as_i64x4(), b:b.as_i64x4())
1834}
1835
1836/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
1837/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
1838/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
1839/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
1840/// `CF` values are zero, otherwise return 0.
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)
1843#[inline]
1844#[target_feature(enable = "avx")]
1845#[cfg_attr(test, assert_instr(vptest))]
1846#[stable(feature = "simd_x86", since = "1.27.0")]
1847pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
1848 ptestnzc256(a:a.as_i64x4(), b:b.as_i64x4())
1849}
1850
1851/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1852/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1853/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1854/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1855/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1856/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1857/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)
1860#[inline]
1861#[target_feature(enable = "avx")]
1862#[cfg_attr(test, assert_instr(vtestpd))]
1863#[stable(feature = "simd_x86", since = "1.27.0")]
1864pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
1865 vtestzpd256(a, b)
1866}
1867
1868/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1869/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1870/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1871/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1872/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1873/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1874/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1875///
1876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)
1877#[inline]
1878#[target_feature(enable = "avx")]
1879#[cfg_attr(test, assert_instr(vtestpd))]
1880#[stable(feature = "simd_x86", since = "1.27.0")]
1881pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
1882 vtestcpd256(a, b)
1883}
1884
1885/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
1886/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1887/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1888/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1889/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1890/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1891/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1892/// are zero, otherwise return 0.
1893///
1894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)
1895#[inline]
1896#[target_feature(enable = "avx")]
1897#[cfg_attr(test, assert_instr(vtestpd))]
1898#[stable(feature = "simd_x86", since = "1.27.0")]
1899pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
1900 vtestnzcpd256(a, b)
1901}
1902
1903/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1904/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1905/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1906/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1907/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1908/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1909/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1910///
1911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd)
1912#[inline]
1913#[target_feature(enable = "avx")]
1914#[cfg_attr(test, assert_instr(vtestpd))]
1915#[stable(feature = "simd_x86", since = "1.27.0")]
1916pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
1917 vtestzpd(a, b)
1918}
1919
1920/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1921/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1922/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1923/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1924/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1925/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1926/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1927///
1928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd)
1929#[inline]
1930#[target_feature(enable = "avx")]
1931#[cfg_attr(test, assert_instr(vtestpd))]
1932#[stable(feature = "simd_x86", since = "1.27.0")]
1933pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
1934 vtestcpd(a, b)
1935}
1936
1937/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
1938/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
1939/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
1940/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1941/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1942/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
1943/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1944/// are zero, otherwise return 0.
1945///
1946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)
1947#[inline]
1948#[target_feature(enable = "avx")]
1949#[cfg_attr(test, assert_instr(vtestpd))]
1950#[stable(feature = "simd_x86", since = "1.27.0")]
1951pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
1952 vtestnzcpd(a, b)
1953}
1954
1955/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1956/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1957/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1958/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1959/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1960/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1961/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
1962///
1963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)
1964#[inline]
1965#[target_feature(enable = "avx")]
1966#[cfg_attr(test, assert_instr(vtestps))]
1967#[stable(feature = "simd_x86", since = "1.27.0")]
1968pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
1969 vtestzps256(a, b)
1970}
1971
1972/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1973/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1974/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1975/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1976/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1977/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1978/// is zero, otherwise set `CF` to 0. Return the `CF` value.
1979///
1980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)
1981#[inline]
1982#[target_feature(enable = "avx")]
1983#[cfg_attr(test, assert_instr(vtestps))]
1984#[stable(feature = "simd_x86", since = "1.27.0")]
1985pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
1986 vtestcps256(a, b)
1987}
1988
1989/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
1990/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
1991/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
1992/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
1993/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
1994/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
1995/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
1996/// are zero, otherwise return 0.
1997///
1998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)
1999#[inline]
2000#[target_feature(enable = "avx")]
2001#[cfg_attr(test, assert_instr(vtestps))]
2002#[stable(feature = "simd_x86", since = "1.27.0")]
2003pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2004 vtestnzcps256(a, b)
2005}
2006
2007/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2008/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2009/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2010/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2011/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2012/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2013/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps)
2016#[inline]
2017#[target_feature(enable = "avx")]
2018#[cfg_attr(test, assert_instr(vtestps))]
2019#[stable(feature = "simd_x86", since = "1.27.0")]
2020pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2021 vtestzps(a, b)
2022}
2023
2024/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2025/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2026/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2027/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2028/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2029/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2030/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps)
2033#[inline]
2034#[target_feature(enable = "avx")]
2035#[cfg_attr(test, assert_instr(vtestps))]
2036#[stable(feature = "simd_x86", since = "1.27.0")]
2037pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2038 vtestcps(a, b)
2039}
2040
2041/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2042/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2043/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2044/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2045/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2046/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2047/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2048/// are zero, otherwise return 0.
2049///
2050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)
2051#[inline]
2052#[target_feature(enable = "avx")]
2053#[cfg_attr(test, assert_instr(vtestps))]
2054#[stable(feature = "simd_x86", since = "1.27.0")]
2055pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2056 vtestnzcps(a, b)
2057}
2058
2059/// Sets each bit of the returned mask based on the most significant bit of the
2060/// corresponding packed double-precision (64-bit) floating-point element in
2061/// `a`.
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd)
2064#[inline]
2065#[target_feature(enable = "avx")]
2066#[cfg_attr(test, assert_instr(vmovmskpd))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
2069 // Propagate the highest bit to the rest, because simd_bitmask
2070 // requires all-1 or all-0.
2071 let mask: i64x4 = simd_lt(x:transmute(a), y:i64x4::splat(0));
2072 simd_bitmask::<i64x4, u8>(mask).into()
2073}
2074
2075/// Sets each bit of the returned mask based on the most significant bit of the
2076/// corresponding packed single-precision (32-bit) floating-point element in
2077/// `a`.
2078///
2079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
2080#[inline]
2081#[target_feature(enable = "avx")]
2082#[cfg_attr(test, assert_instr(vmovmskps))]
2083#[stable(feature = "simd_x86", since = "1.27.0")]
2084pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2085 // Propagate the highest bit to the rest, because simd_bitmask
2086 // requires all-1 or all-0.
2087 let mask: i32x8 = simd_lt(x:transmute(a), y:i32x8::splat(0));
2088 simd_bitmask::<i32x8, u8>(mask).into()
2089}
2090
2091/// Returns vector of type __m256d with all elements set to zero.
2092///
2093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2094#[inline]
2095#[target_feature(enable = "avx")]
2096#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
2097#[stable(feature = "simd_x86", since = "1.27.0")]
2098pub unsafe fn _mm256_setzero_pd() -> __m256d {
2099 _mm256_set1_pd(0.0)
2100}
2101
2102/// Returns vector of type __m256 with all elements set to zero.
2103///
2104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps)
2105#[inline]
2106#[target_feature(enable = "avx")]
2107#[cfg_attr(test, assert_instr(vxorps))]
2108#[stable(feature = "simd_x86", since = "1.27.0")]
2109pub unsafe fn _mm256_setzero_ps() -> __m256 {
2110 _mm256_set1_ps(0.0)
2111}
2112
2113/// Returns vector of type __m256i with all elements set to zero.
2114///
2115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
2116#[inline]
2117#[target_feature(enable = "avx")]
2118#[cfg_attr(test, assert_instr(vxor))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub unsafe fn _mm256_setzero_si256() -> __m256i {
2121 _mm256_set1_epi8(0)
2122}
2123
2124/// Sets packed double-precision (64-bit) floating-point elements in returned
2125/// vector with the supplied values.
2126///
2127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd)
2128#[inline]
2129#[target_feature(enable = "avx")]
2130// This intrinsic has no corresponding instruction.
2131#[cfg_attr(test, assert_instr(vinsertf128))]
2132#[stable(feature = "simd_x86", since = "1.27.0")]
2133pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2134 _mm256_setr_pd(a:d, b:c, c:b, d:a)
2135}
2136
2137/// Sets packed single-precision (32-bit) floating-point elements in returned
2138/// vector with the supplied values.
2139///
2140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps)
2141#[inline]
2142#[target_feature(enable = "avx")]
2143// This intrinsic has no corresponding instruction.
2144#[stable(feature = "simd_x86", since = "1.27.0")]
2145pub unsafe fn _mm256_set_ps(
2146 a: f32,
2147 b: f32,
2148 c: f32,
2149 d: f32,
2150 e: f32,
2151 f: f32,
2152 g: f32,
2153 h: f32,
2154) -> __m256 {
2155 _mm256_setr_ps(a:h, b:g, c:f, d:e, e:d, f:c, g:b, h:a)
2156}
2157
2158/// Sets packed 8-bit integers in returned vector with the supplied values.
2159///
2160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8)
2161#[inline]
2162#[target_feature(enable = "avx")]
2163// This intrinsic has no corresponding instruction.
2164#[stable(feature = "simd_x86", since = "1.27.0")]
2165pub unsafe fn _mm256_set_epi8(
2166 e00: i8,
2167 e01: i8,
2168 e02: i8,
2169 e03: i8,
2170 e04: i8,
2171 e05: i8,
2172 e06: i8,
2173 e07: i8,
2174 e08: i8,
2175 e09: i8,
2176 e10: i8,
2177 e11: i8,
2178 e12: i8,
2179 e13: i8,
2180 e14: i8,
2181 e15: i8,
2182 e16: i8,
2183 e17: i8,
2184 e18: i8,
2185 e19: i8,
2186 e20: i8,
2187 e21: i8,
2188 e22: i8,
2189 e23: i8,
2190 e24: i8,
2191 e25: i8,
2192 e26: i8,
2193 e27: i8,
2194 e28: i8,
2195 e29: i8,
2196 e30: i8,
2197 e31: i8,
2198) -> __m256i {
2199 #[rustfmt::skip]
2200 _mm256_setr_epi8(
2201 e00:e31, e01:e30, e02:e29, e03:e28, e04:e27, e05:e26, e06:e25, e07:e24,
2202 e08:e23, e09:e22, e10:e21, e11:e20, e12:e19, e13:e18, e14:e17, e15:e16,
2203 e16:e15, e17:e14, e18:e13, e19:e12, e20:e11, e21:e10, e22:e09, e23:e08,
2204 e24:e07, e25:e06, e26:e05, e27:e04, e28:e03, e29:e02, e30:e01, e31:e00,
2205 )
2206}
2207
2208/// Sets packed 16-bit integers in returned vector with the supplied values.
2209///
2210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16)
2211#[inline]
2212#[target_feature(enable = "avx")]
2213// This intrinsic has no corresponding instruction.
2214#[stable(feature = "simd_x86", since = "1.27.0")]
2215pub unsafe fn _mm256_set_epi16(
2216 e00: i16,
2217 e01: i16,
2218 e02: i16,
2219 e03: i16,
2220 e04: i16,
2221 e05: i16,
2222 e06: i16,
2223 e07: i16,
2224 e08: i16,
2225 e09: i16,
2226 e10: i16,
2227 e11: i16,
2228 e12: i16,
2229 e13: i16,
2230 e14: i16,
2231 e15: i16,
2232) -> __m256i {
2233 #[rustfmt::skip]
2234 _mm256_setr_epi16(
2235 e00:e15, e01:e14, e02:e13, e03:e12,
2236 e04:e11, e05:e10, e06:e09, e07:e08,
2237 e08:e07, e09:e06, e10:e05, e11:e04,
2238 e12:e03, e13:e02, e14:e01, e15:e00,
2239 )
2240}
2241
2242/// Sets packed 32-bit integers in returned vector with the supplied values.
2243///
2244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32)
2245#[inline]
2246#[target_feature(enable = "avx")]
2247// This intrinsic has no corresponding instruction.
2248#[stable(feature = "simd_x86", since = "1.27.0")]
2249pub unsafe fn _mm256_set_epi32(
2250 e0: i32,
2251 e1: i32,
2252 e2: i32,
2253 e3: i32,
2254 e4: i32,
2255 e5: i32,
2256 e6: i32,
2257 e7: i32,
2258) -> __m256i {
2259 _mm256_setr_epi32(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
2260}
2261
2262/// Sets packed 64-bit integers in returned vector with the supplied values.
2263///
2264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
2265#[inline]
2266#[target_feature(enable = "avx")]
2267// This intrinsic has no corresponding instruction.
2268#[stable(feature = "simd_x86", since = "1.27.0")]
2269pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2270 _mm256_setr_epi64x(a:d, b:c, c:b, d:a)
2271}
2272
2273/// Sets packed double-precision (64-bit) floating-point elements in returned
2274/// vector with the supplied values in reverse order.
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd)
2277#[inline]
2278#[target_feature(enable = "avx")]
2279// This intrinsic has no corresponding instruction.
2280#[stable(feature = "simd_x86", since = "1.27.0")]
2281pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2282 __m256d(a, b, c, d)
2283}
2284
2285/// Sets packed single-precision (32-bit) floating-point elements in returned
2286/// vector with the supplied values in reverse order.
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps)
2289#[inline]
2290#[target_feature(enable = "avx")]
2291// This intrinsic has no corresponding instruction.
2292#[stable(feature = "simd_x86", since = "1.27.0")]
2293pub unsafe fn _mm256_setr_ps(
2294 a: f32,
2295 b: f32,
2296 c: f32,
2297 d: f32,
2298 e: f32,
2299 f: f32,
2300 g: f32,
2301 h: f32,
2302) -> __m256 {
2303 __m256(a, b, c, d, e, f, g, h)
2304}
2305
2306/// Sets packed 8-bit integers in returned vector with the supplied values in
2307/// reverse order.
2308///
2309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8)
2310#[inline]
2311#[target_feature(enable = "avx")]
2312// This intrinsic has no corresponding instruction.
2313#[stable(feature = "simd_x86", since = "1.27.0")]
2314pub unsafe fn _mm256_setr_epi8(
2315 e00: i8,
2316 e01: i8,
2317 e02: i8,
2318 e03: i8,
2319 e04: i8,
2320 e05: i8,
2321 e06: i8,
2322 e07: i8,
2323 e08: i8,
2324 e09: i8,
2325 e10: i8,
2326 e11: i8,
2327 e12: i8,
2328 e13: i8,
2329 e14: i8,
2330 e15: i8,
2331 e16: i8,
2332 e17: i8,
2333 e18: i8,
2334 e19: i8,
2335 e20: i8,
2336 e21: i8,
2337 e22: i8,
2338 e23: i8,
2339 e24: i8,
2340 e25: i8,
2341 e26: i8,
2342 e27: i8,
2343 e28: i8,
2344 e29: i8,
2345 e30: i8,
2346 e31: i8,
2347) -> __m256i {
2348 #[rustfmt::skip]
2349 transmute(src:i8x32::new(
2350 x0:e00, x1:e01, x2:e02, x3:e03, x4:e04, x5:e05, x6:e06, x7:e07,
2351 x8:e08, x9:e09, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
2352 x16:e16, x17:e17, x18:e18, x19:e19, x20:e20, x21:e21, x22:e22, x23:e23,
2353 x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
2354 ))
2355}
2356
2357/// Sets packed 16-bit integers in returned vector with the supplied values in
2358/// reverse order.
2359///
2360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16)
2361#[inline]
2362#[target_feature(enable = "avx")]
2363// This intrinsic has no corresponding instruction.
2364#[stable(feature = "simd_x86", since = "1.27.0")]
2365pub unsafe fn _mm256_setr_epi16(
2366 e00: i16,
2367 e01: i16,
2368 e02: i16,
2369 e03: i16,
2370 e04: i16,
2371 e05: i16,
2372 e06: i16,
2373 e07: i16,
2374 e08: i16,
2375 e09: i16,
2376 e10: i16,
2377 e11: i16,
2378 e12: i16,
2379 e13: i16,
2380 e14: i16,
2381 e15: i16,
2382) -> __m256i {
2383 #[rustfmt::skip]
2384 transmute(src:i16x16::new(
2385 x0:e00, x1:e01, x2:e02, x3:e03,
2386 x4:e04, x5:e05, x6:e06, x7:e07,
2387 x8:e08, x9:e09, x10:e10, x11:e11,
2388 x12:e12, x13:e13, x14:e14, x15:e15,
2389 ))
2390}
2391
2392/// Sets packed 32-bit integers in returned vector with the supplied values in
2393/// reverse order.
2394///
2395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32)
2396#[inline]
2397#[target_feature(enable = "avx")]
2398// This intrinsic has no corresponding instruction.
2399#[stable(feature = "simd_x86", since = "1.27.0")]
2400pub unsafe fn _mm256_setr_epi32(
2401 e0: i32,
2402 e1: i32,
2403 e2: i32,
2404 e3: i32,
2405 e4: i32,
2406 e5: i32,
2407 e6: i32,
2408 e7: i32,
2409) -> __m256i {
2410 transmute(src:i32x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7))
2411}
2412
2413/// Sets packed 64-bit integers in returned vector with the supplied values in
2414/// reverse order.
2415///
2416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x)
2417#[inline]
2418#[target_feature(enable = "avx")]
2419// This intrinsic has no corresponding instruction.
2420#[stable(feature = "simd_x86", since = "1.27.0")]
2421pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2422 transmute(src:i64x4::new(x0:a, x1:b, x2:c, x3:d))
2423}
2424
2425/// Broadcasts double-precision (64-bit) floating-point value `a` to all
2426/// elements of returned vector.
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd)
2429#[inline]
2430#[target_feature(enable = "avx")]
2431// This intrinsic has no corresponding instruction.
2432#[stable(feature = "simd_x86", since = "1.27.0")]
2433pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d {
2434 _mm256_setr_pd(a, b:a, c:a, d:a)
2435}
2436
2437/// Broadcasts single-precision (32-bit) floating-point value `a` to all
2438/// elements of returned vector.
2439///
2440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps)
2441#[inline]
2442#[target_feature(enable = "avx")]
2443// This intrinsic has no corresponding instruction.
2444#[stable(feature = "simd_x86", since = "1.27.0")]
2445pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 {
2446 _mm256_setr_ps(a, b:a, c:a, d:a, e:a, f:a, g:a, h:a)
2447}
2448
2449/// Broadcasts 8-bit integer `a` to all elements of returned vector.
2450/// This intrinsic may generate the `vpbroadcastb`.
2451///
2452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8)
2453#[inline]
2454#[target_feature(enable = "avx")]
2455// This intrinsic has no corresponding instruction.
2456#[stable(feature = "simd_x86", since = "1.27.0")]
2457pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
2458 #[rustfmt::skip]
2459 _mm256_setr_epi8(
2460 e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a,
2461 e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a,
2462 e16:a, e17:a, e18:a, e19:a, e20:a, e21:a, e22:a, e23:a,
2463 e24:a, e25:a, e26:a, e27:a, e28:a, e29:a, e30:a, e31:a,
2464 )
2465}
2466
2467/// Broadcasts 16-bit integer `a` to all elements of returned vector.
2468/// This intrinsic may generate the `vpbroadcastw`.
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
2471#[inline]
2472#[target_feature(enable = "avx")]
2473//#[cfg_attr(test, assert_instr(vpshufb))]
2474#[cfg_attr(test, assert_instr(vinsertf128))]
2475// This intrinsic has no corresponding instruction.
2476#[stable(feature = "simd_x86", since = "1.27.0")]
2477pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i {
2478 _mm256_setr_epi16(e00:a, e01:a, e02:a, e03:a, e04:a, e05:a, e06:a, e07:a, e08:a, e09:a, e10:a, e11:a, e12:a, e13:a, e14:a, e15:a)
2479}
2480
2481/// Broadcasts 32-bit integer `a` to all elements of returned vector.
2482/// This intrinsic may generate the `vpbroadcastd`.
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32)
2485#[inline]
2486#[target_feature(enable = "avx")]
2487// This intrinsic has no corresponding instruction.
2488#[stable(feature = "simd_x86", since = "1.27.0")]
2489pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i {
2490 _mm256_setr_epi32(e0:a, e1:a, e2:a, e3:a, e4:a, e5:a, e6:a, e7:a)
2491}
2492
2493/// Broadcasts 64-bit integer `a` to all elements of returned vector.
2494/// This intrinsic may generate the `vpbroadcastq`.
2495///
2496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
2497#[inline]
2498#[target_feature(enable = "avx")]
2499#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2500#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2501// This intrinsic has no corresponding instruction.
2502#[stable(feature = "simd_x86", since = "1.27.0")]
2503pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i {
2504 _mm256_setr_epi64x(a, b:a, c:a, d:a)
2505}
2506
2507/// Cast vector of type __m256d to type __m256.
2508///
2509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps)
2510#[inline]
2511#[target_feature(enable = "avx")]
2512// This intrinsic is only used for compilation and does not generate any
2513// instructions, thus it has zero latency.
2514#[stable(feature = "simd_x86", since = "1.27.0")]
2515pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2516 transmute(src:a)
2517}
2518
2519/// Cast vector of type __m256 to type __m256d.
2520///
2521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd)
2522#[inline]
2523#[target_feature(enable = "avx")]
2524// This intrinsic is only used for compilation and does not generate any
2525// instructions, thus it has zero latency.
2526#[stable(feature = "simd_x86", since = "1.27.0")]
2527pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d {
2528 transmute(src:a)
2529}
2530
2531/// Casts vector of type __m256 to type __m256i.
2532///
2533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
2534#[inline]
2535#[target_feature(enable = "avx")]
2536// This intrinsic is only used for compilation and does not generate any
2537// instructions, thus it has zero latency.
2538#[stable(feature = "simd_x86", since = "1.27.0")]
2539pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i {
2540 transmute(src:a)
2541}
2542
2543/// Casts vector of type __m256i to type __m256.
2544///
2545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
2546#[inline]
2547#[target_feature(enable = "avx")]
2548// This intrinsic is only used for compilation and does not generate any
2549// instructions, thus it has zero latency.
2550#[stable(feature = "simd_x86", since = "1.27.0")]
2551pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2552 transmute(src:a)
2553}
2554
2555/// Casts vector of type __m256d to type __m256i.
2556///
2557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256)
2558#[inline]
2559#[target_feature(enable = "avx")]
2560// This intrinsic is only used for compilation and does not generate any
2561// instructions, thus it has zero latency.
2562#[stable(feature = "simd_x86", since = "1.27.0")]
2563pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2564 transmute(src:a)
2565}
2566
2567/// Casts vector of type __m256i to type __m256d.
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd)
2570#[inline]
2571#[target_feature(enable = "avx")]
2572// This intrinsic is only used for compilation and does not generate any
2573// instructions, thus it has zero latency.
2574#[stable(feature = "simd_x86", since = "1.27.0")]
2575pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2576 transmute(src:a)
2577}
2578
2579/// Casts vector of type __m256 to type __m128.
2580///
2581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128)
2582#[inline]
2583#[target_feature(enable = "avx")]
2584// This intrinsic is only used for compilation and does not generate any
2585// instructions, thus it has zero latency.
2586#[stable(feature = "simd_x86", since = "1.27.0")]
2587pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2588 simd_shuffle!(a, a, [0, 1, 2, 3])
2589}
2590
2591/// Casts vector of type __m256d to type __m128d.
2592///
2593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128)
2594#[inline]
2595#[target_feature(enable = "avx")]
2596// This intrinsic is only used for compilation and does not generate any
2597// instructions, thus it has zero latency.
2598#[stable(feature = "simd_x86", since = "1.27.0")]
2599pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2600 simd_shuffle!(a, a, [0, 1])
2601}
2602
2603/// Casts vector of type __m256i to type __m128i.
2604///
2605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128)
2606#[inline]
2607#[target_feature(enable = "avx")]
2608// This intrinsic is only used for compilation and does not generate any
2609// instructions, thus it has zero latency.
2610#[stable(feature = "simd_x86", since = "1.27.0")]
2611pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2612 let a: i64x4 = a.as_i64x4();
2613 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2614 transmute(src:dst)
2615}
2616
2617/// Casts vector of type __m128 to type __m256;
2618/// the upper 128 bits of the result are undefined.
2619///
2620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
2621#[inline]
2622#[target_feature(enable = "avx")]
2623// This intrinsic is only used for compilation and does not generate any
2624// instructions, thus it has zero latency.
2625#[stable(feature = "simd_x86", since = "1.27.0")]
2626pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2627 // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2628 simd_shuffle!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
2629}
2630
2631/// Casts vector of type __m128d to type __m256d;
2632/// the upper 128 bits of the result are undefined.
2633///
2634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
2635#[inline]
2636#[target_feature(enable = "avx")]
2637// This intrinsic is only used for compilation and does not generate any
2638// instructions, thus it has zero latency.
2639#[stable(feature = "simd_x86", since = "1.27.0")]
2640pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2641 // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2642 simd_shuffle!(a, a, [0, 1, 0, 0])
2643}
2644
2645/// Casts vector of type __m128i to type __m256i;
2646/// the upper 128 bits of the result are undefined.
2647///
2648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
2649#[inline]
2650#[target_feature(enable = "avx")]
2651// This intrinsic is only used for compilation and does not generate any
2652// instructions, thus it has zero latency.
2653#[stable(feature = "simd_x86", since = "1.27.0")]
2654pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2655 let a: i64x2 = a.as_i64x2();
2656 // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2657 let dst: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 0]);
2658 transmute(src:dst)
2659}
2660
2661/// Constructs a 256-bit floating-point vector of `[8 x float]` from a
2662/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain
2663/// the value of the source vector. The upper 128 bits are set to zero.
2664///
2665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
2666#[inline]
2667#[target_feature(enable = "avx,sse")]
2668// This intrinsic is only used for compilation and does not generate any
2669// instructions, thus it has zero latency.
2670#[stable(feature = "simd_x86", since = "1.27.0")]
2671pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
2672 simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
2673}
2674
2675/// Constructs a 256-bit integer vector from a 128-bit integer vector.
2676/// The lower 128 bits contain the value of the source vector. The upper
2677/// 128 bits are set to zero.
2678///
2679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
2680#[inline]
2681#[target_feature(enable = "avx,sse2")]
2682// This intrinsic is only used for compilation and does not generate any
2683// instructions, thus it has zero latency.
2684#[stable(feature = "simd_x86", since = "1.27.0")]
2685pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
2686 let b: i64x2 = _mm_setzero_si128().as_i64x2();
2687 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
2688 transmute(src:dst)
2689}
2690
2691/// Constructs a 256-bit floating-point vector of `[4 x double]` from a
2692/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits
2693/// contain the value of the source vector. The upper 128 bits are set
2694/// to zero.
2695///
2696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
2697#[inline]
2698#[target_feature(enable = "avx,sse2")]
2699// This intrinsic is only used for compilation and does not generate any
2700// instructions, thus it has zero latency.
2701#[stable(feature = "simd_x86", since = "1.27.0")]
2702pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
2703 simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3])
2704}
2705
2706/// Returns vector of type `__m256` with indeterminate elements.
2707/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2708/// In practice, this is equivalent to [`mem::zeroed`].
2709///
2710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps)
2711#[inline]
2712#[target_feature(enable = "avx")]
2713// This intrinsic has no corresponding instruction.
2714#[stable(feature = "simd_x86", since = "1.27.0")]
2715pub unsafe fn _mm256_undefined_ps() -> __m256 {
2716 _mm256_set1_ps(0.0)
2717}
2718
2719/// Returns vector of type `__m256d` with indeterminate elements.
2720/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2721/// In practice, this is equivalent to [`mem::zeroed`].
2722///
2723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd)
2724#[inline]
2725#[target_feature(enable = "avx")]
2726// This intrinsic has no corresponding instruction.
2727#[stable(feature = "simd_x86", since = "1.27.0")]
2728pub unsafe fn _mm256_undefined_pd() -> __m256d {
2729 _mm256_set1_pd(0.0)
2730}
2731
2732/// Returns vector of type __m256i with with indeterminate elements.
2733/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
2734/// In practice, this is equivalent to [`mem::zeroed`].
2735///
2736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256)
2737#[inline]
2738#[target_feature(enable = "avx")]
2739// This intrinsic has no corresponding instruction.
2740#[stable(feature = "simd_x86", since = "1.27.0")]
2741pub unsafe fn _mm256_undefined_si256() -> __m256i {
2742 __m256i(0, 0, 0, 0)
2743}
2744
2745/// Sets packed __m256 returned vector with the supplied values.
2746///
2747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128)
2748#[inline]
2749#[target_feature(enable = "avx")]
2750#[cfg_attr(test, assert_instr(vinsertf128))]
2751#[stable(feature = "simd_x86", since = "1.27.0")]
2752pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
2753 simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
2754}
2755
2756/// Sets packed __m256d returned vector with the supplied values.
2757///
2758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d)
2759#[inline]
2760#[target_feature(enable = "avx")]
2761#[cfg_attr(test, assert_instr(vinsertf128))]
2762#[stable(feature = "simd_x86", since = "1.27.0")]
2763pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
2764 let hi: __m128 = transmute(src:hi);
2765 let lo: __m128 = transmute(src:lo);
2766 transmute(src:_mm256_set_m128(hi, lo))
2767}
2768
2769/// Sets packed __m256i returned vector with the supplied values.
2770///
2771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
2772#[inline]
2773#[target_feature(enable = "avx")]
2774#[cfg_attr(test, assert_instr(vinsertf128))]
2775#[stable(feature = "simd_x86", since = "1.27.0")]
2776pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2777 let hi: __m128 = transmute(src:hi);
2778 let lo: __m128 = transmute(src:lo);
2779 transmute(src:_mm256_set_m128(hi, lo))
2780}
2781
2782/// Sets packed __m256 returned vector with the supplied values.
2783///
2784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128)
2785#[inline]
2786#[target_feature(enable = "avx")]
2787#[cfg_attr(test, assert_instr(vinsertf128))]
2788#[stable(feature = "simd_x86", since = "1.27.0")]
2789pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
2790 _mm256_set_m128(hi, lo)
2791}
2792
2793/// Sets packed __m256d returned vector with the supplied values.
2794///
2795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d)
2796#[inline]
2797#[target_feature(enable = "avx")]
2798#[cfg_attr(test, assert_instr(vinsertf128))]
2799#[stable(feature = "simd_x86", since = "1.27.0")]
2800pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
2801 _mm256_set_m128d(hi, lo)
2802}
2803
2804/// Sets packed __m256i returned vector with the supplied values.
2805///
2806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i)
2807#[inline]
2808#[target_feature(enable = "avx")]
2809#[cfg_attr(test, assert_instr(vinsertf128))]
2810#[stable(feature = "simd_x86", since = "1.27.0")]
2811pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
2812 _mm256_set_m128i(hi, lo)
2813}
2814
2815/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
2816/// floating-point elements) from memory, and combine them into a 256-bit
2817/// value.
2818/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2819///
2820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
2821#[inline]
2822#[target_feature(enable = "avx,sse")]
2823// This intrinsic has no corresponding instruction.
2824#[stable(feature = "simd_x86", since = "1.27.0")]
2825pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
2826 let a: __m256 = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
2827 _mm256_insertf128_ps::<1>(a, b:_mm_loadu_ps(hiaddr))
2828}
2829
2830/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
2831/// floating-point elements) from memory, and combine them into a 256-bit
2832/// value.
2833/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2834///
2835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
2836#[inline]
2837#[target_feature(enable = "avx,sse2")]
2838// This intrinsic has no corresponding instruction.
2839#[stable(feature = "simd_x86", since = "1.27.0")]
2840pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
2841 let a: __m256d = _mm256_castpd128_pd256(_mm_loadu_pd(mem_addr:loaddr));
2842 _mm256_insertf128_pd::<1>(a, b:_mm_loadu_pd(mem_addr:hiaddr))
2843}
2844
2845/// Loads two 128-bit values (composed of integer data) from memory, and combine
2846/// them into a 256-bit value.
2847/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2848///
2849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
2850#[inline]
2851#[target_feature(enable = "avx,sse2")]
2852// This intrinsic has no corresponding instruction.
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2854pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
2855 let a: __m256i = _mm256_castsi128_si256(_mm_loadu_si128(mem_addr:loaddr));
2856 _mm256_insertf128_si256::<1>(a, b:_mm_loadu_si128(mem_addr:hiaddr))
2857}
2858
2859/// Stores the high and low 128-bit halves (each composed of 4 packed
2860/// single-precision (32-bit) floating-point elements) from `a` into memory two
2861/// different 128-bit locations.
2862/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2863///
2864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
2865#[inline]
2866#[target_feature(enable = "avx,sse")]
2867// This intrinsic has no corresponding instruction.
2868#[stable(feature = "simd_x86", since = "1.27.0")]
2869pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
2870 let lo: __m128 = _mm256_castps256_ps128(a);
2871 _mm_storeu_ps(p:loaddr, a:lo);
2872 let hi: __m128 = _mm256_extractf128_ps::<1>(a);
2873 _mm_storeu_ps(p:hiaddr, a:hi);
2874}
2875
2876/// Stores the high and low 128-bit halves (each composed of 2 packed
2877/// double-precision (64-bit) floating-point elements) from `a` into memory two
2878/// different 128-bit locations.
2879/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2880///
2881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
2882#[inline]
2883#[target_feature(enable = "avx,sse2")]
2884// This intrinsic has no corresponding instruction.
2885#[stable(feature = "simd_x86", since = "1.27.0")]
2886pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
2887 let lo: __m128d = _mm256_castpd256_pd128(a);
2888 _mm_storeu_pd(mem_addr:loaddr, a:lo);
2889 let hi: __m128d = _mm256_extractf128_pd::<1>(a);
2890 _mm_storeu_pd(mem_addr:hiaddr, a:hi);
2891}
2892
2893/// Stores the high and low 128-bit halves (each composed of integer data) from
2894/// `a` into memory two different 128-bit locations.
2895/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
2898#[inline]
2899#[target_feature(enable = "avx,sse2")]
2900// This intrinsic has no corresponding instruction.
2901#[stable(feature = "simd_x86", since = "1.27.0")]
2902pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
2903 let lo: __m128i = _mm256_castsi256_si128(a);
2904 _mm_storeu_si128(mem_addr:loaddr, a:lo);
2905 let hi: __m128i = _mm256_extractf128_si256::<1>(a);
2906 _mm_storeu_si128(mem_addr:hiaddr, a:hi);
2907}
2908
2909/// Returns the first element of the input vector of `[8 x float]`.
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32)
2912#[inline]
2913#[target_feature(enable = "avx")]
2914//#[cfg_attr(test, assert_instr(movss))] FIXME
2915#[stable(feature = "simd_x86", since = "1.27.0")]
2916pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2917 simd_extract(x:a, idx:0)
2918}
2919
2920// LLVM intrinsics used in the above functions
2921#[allow(improper_ctypes)]
2922extern "C" {
2923 #[link_name = "llvm.x86.avx.round.pd.256"]
2924 fn roundpd256(a: __m256d, b: i32) -> __m256d;
2925 #[link_name = "llvm.x86.avx.round.ps.256"]
2926 fn roundps256(a: __m256, b: i32) -> __m256;
2927 #[link_name = "llvm.x86.avx.sqrt.ps.256"]
2928 fn sqrtps256(a: __m256) -> __m256;
2929 #[link_name = "llvm.x86.avx.dp.ps.256"]
2930 fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
2931 #[link_name = "llvm.x86.avx.hadd.pd.256"]
2932 fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
2933 #[link_name = "llvm.x86.avx.hadd.ps.256"]
2934 fn vhaddps(a: __m256, b: __m256) -> __m256;
2935 #[link_name = "llvm.x86.avx.hsub.pd.256"]
2936 fn vhsubpd(a: __m256d, b: __m256d) -> __m256d;
2937 #[link_name = "llvm.x86.avx.hsub.ps.256"]
2938 fn vhsubps(a: __m256, b: __m256) -> __m256;
2939 #[link_name = "llvm.x86.sse2.cmp.pd"]
2940 fn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2941 #[link_name = "llvm.x86.avx.cmp.pd.256"]
2942 fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
2943 #[link_name = "llvm.x86.sse.cmp.ps"]
2944 fn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
2945 #[link_name = "llvm.x86.avx.cmp.ps.256"]
2946 fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
2947 #[link_name = "llvm.x86.sse2.cmp.sd"]
2948 fn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2949 #[link_name = "llvm.x86.sse.cmp.ss"]
2950 fn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
2951 #[link_name = "llvm.x86.avx.cvtdq2.ps.256"]
2952 fn vcvtdq2ps(a: i32x8) -> __m256;
2953 #[link_name = "llvm.x86.avx.cvt.pd2.ps.256"]
2954 fn vcvtpd2ps(a: __m256d) -> __m128;
2955 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
2956 fn vcvtps2dq(a: __m256) -> i32x8;
2957 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
2958 fn vcvttpd2dq(a: __m256d) -> i32x4;
2959 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
2960 fn vcvtpd2dq(a: __m256d) -> i32x4;
2961 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
2962 fn vcvttps2dq(a: __m256) -> i32x8;
2963 #[link_name = "llvm.x86.avx.vzeroall"]
2964 fn vzeroall();
2965 #[link_name = "llvm.x86.avx.vzeroupper"]
2966 fn vzeroupper();
2967 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
2968 fn vpermilps256(a: __m256, b: i32x8) -> __m256;
2969 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
2970 fn vpermilps(a: __m128, b: i32x4) -> __m128;
2971 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
2972 fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
2973 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
2974 fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
2975 #[link_name = "llvm.x86.avx.vperm2f128.ps.256"]
2976 fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256;
2977 #[link_name = "llvm.x86.avx.vperm2f128.pd.256"]
2978 fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d;
2979 #[link_name = "llvm.x86.avx.vperm2f128.si.256"]
2980 fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8;
2981 #[link_name = "llvm.x86.avx.vbroadcastf128.ps.256"]
2982 fn vbroadcastf128ps256(a: &__m128) -> __m256;
2983 #[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"]
2984 fn vbroadcastf128pd256(a: &__m128d) -> __m256d;
2985 #[link_name = "llvm.x86.avx.maskload.pd.256"]
2986 fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
2987 #[link_name = "llvm.x86.avx.maskstore.pd.256"]
2988 fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
2989 #[link_name = "llvm.x86.avx.maskload.pd"]
2990 fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
2991 #[link_name = "llvm.x86.avx.maskstore.pd"]
2992 fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
2993 #[link_name = "llvm.x86.avx.maskload.ps.256"]
2994 fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
2995 #[link_name = "llvm.x86.avx.maskstore.ps.256"]
2996 fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
2997 #[link_name = "llvm.x86.avx.maskload.ps"]
2998 fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
2999 #[link_name = "llvm.x86.avx.maskstore.ps"]
3000 fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
3001 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3002 fn vlddqu(mem_addr: *const i8) -> i8x32;
3003 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3004 fn vrcpps(a: __m256) -> __m256;
3005 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3006 fn vrsqrtps(a: __m256) -> __m256;
3007 #[link_name = "llvm.x86.avx.ptestz.256"]
3008 fn ptestz256(a: i64x4, b: i64x4) -> i32;
3009 #[link_name = "llvm.x86.avx.ptestc.256"]
3010 fn ptestc256(a: i64x4, b: i64x4) -> i32;
3011 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3012 fn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3013 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3014 fn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3015 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3016 fn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3017 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3018 fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3019 #[link_name = "llvm.x86.avx.vtestz.pd"]
3020 fn vtestzpd(a: __m128d, b: __m128d) -> i32;
3021 #[link_name = "llvm.x86.avx.vtestc.pd"]
3022 fn vtestcpd(a: __m128d, b: __m128d) -> i32;
3023 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3024 fn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3025 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3026 fn vtestzps256(a: __m256, b: __m256) -> i32;
3027 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3028 fn vtestcps256(a: __m256, b: __m256) -> i32;
3029 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3030 fn vtestnzcps256(a: __m256, b: __m256) -> i32;
3031 #[link_name = "llvm.x86.avx.vtestz.ps"]
3032 fn vtestzps(a: __m128, b: __m128) -> i32;
3033 #[link_name = "llvm.x86.avx.vtestc.ps"]
3034 fn vtestcps(a: __m128, b: __m128) -> i32;
3035 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3036 fn vtestnzcps(a: __m128, b: __m128) -> i32;
3037 #[link_name = "llvm.x86.avx.min.ps.256"]
3038 fn vminps(a: __m256, b: __m256) -> __m256;
3039 #[link_name = "llvm.x86.avx.max.ps.256"]
3040 fn vmaxps(a: __m256, b: __m256) -> __m256;
3041 #[link_name = "llvm.x86.avx.min.pd.256"]
3042 fn vminpd(a: __m256d, b: __m256d) -> __m256d;
3043 #[link_name = "llvm.x86.avx.max.pd.256"]
3044 fn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3045}
3046
3047#[cfg(test)]
3048mod tests {
3049 use crate::hint::black_box;
3050 use stdarch_test::simd_test;
3051
3052 use crate::core_arch::x86::*;
3053
3054 #[simd_test(enable = "avx")]
3055 unsafe fn test_mm256_add_pd() {
3056 let a = _mm256_setr_pd(1., 2., 3., 4.);
3057 let b = _mm256_setr_pd(5., 6., 7., 8.);
3058 let r = _mm256_add_pd(a, b);
3059 let e = _mm256_setr_pd(6., 8., 10., 12.);
3060 assert_eq_m256d(r, e);
3061 }
3062
3063 #[simd_test(enable = "avx")]
3064 unsafe fn test_mm256_add_ps() {
3065 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3066 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3067 let r = _mm256_add_ps(a, b);
3068 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3069 assert_eq_m256(r, e);
3070 }
3071
3072 #[simd_test(enable = "avx")]
3073 unsafe fn test_mm256_and_pd() {
3074 let a = _mm256_set1_pd(1.);
3075 let b = _mm256_set1_pd(0.6);
3076 let r = _mm256_and_pd(a, b);
3077 let e = _mm256_set1_pd(0.5);
3078 assert_eq_m256d(r, e);
3079 }
3080
3081 #[simd_test(enable = "avx")]
3082 unsafe fn test_mm256_and_ps() {
3083 let a = _mm256_set1_ps(1.);
3084 let b = _mm256_set1_ps(0.6);
3085 let r = _mm256_and_ps(a, b);
3086 let e = _mm256_set1_ps(0.5);
3087 assert_eq_m256(r, e);
3088 }
3089
3090 #[simd_test(enable = "avx")]
3091 unsafe fn test_mm256_or_pd() {
3092 let a = _mm256_set1_pd(1.);
3093 let b = _mm256_set1_pd(0.6);
3094 let r = _mm256_or_pd(a, b);
3095 let e = _mm256_set1_pd(1.2);
3096 assert_eq_m256d(r, e);
3097 }
3098
3099 #[simd_test(enable = "avx")]
3100 unsafe fn test_mm256_or_ps() {
3101 let a = _mm256_set1_ps(1.);
3102 let b = _mm256_set1_ps(0.6);
3103 let r = _mm256_or_ps(a, b);
3104 let e = _mm256_set1_ps(1.2);
3105 assert_eq_m256(r, e);
3106 }
3107
3108 #[simd_test(enable = "avx")]
3109 unsafe fn test_mm256_shuffle_pd() {
3110 let a = _mm256_setr_pd(1., 4., 5., 8.);
3111 let b = _mm256_setr_pd(2., 3., 6., 7.);
3112 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3113 let e = _mm256_setr_pd(4., 3., 8., 7.);
3114 assert_eq_m256d(r, e);
3115 }
3116
3117 #[simd_test(enable = "avx")]
3118 unsafe fn test_mm256_shuffle_ps() {
3119 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3120 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3121 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3122 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3123 assert_eq_m256(r, e);
3124 }
3125
3126 #[simd_test(enable = "avx")]
3127 unsafe fn test_mm256_andnot_pd() {
3128 let a = _mm256_set1_pd(0.);
3129 let b = _mm256_set1_pd(0.6);
3130 let r = _mm256_andnot_pd(a, b);
3131 assert_eq_m256d(r, b);
3132 }
3133
3134 #[simd_test(enable = "avx")]
3135 unsafe fn test_mm256_andnot_ps() {
3136 let a = _mm256_set1_ps(0.);
3137 let b = _mm256_set1_ps(0.6);
3138 let r = _mm256_andnot_ps(a, b);
3139 assert_eq_m256(r, b);
3140 }
3141
3142 #[simd_test(enable = "avx")]
3143 unsafe fn test_mm256_max_pd() {
3144 let a = _mm256_setr_pd(1., 4., 5., 8.);
3145 let b = _mm256_setr_pd(2., 3., 6., 7.);
3146 let r = _mm256_max_pd(a, b);
3147 let e = _mm256_setr_pd(2., 4., 6., 8.);
3148 assert_eq_m256d(r, e);
3149 // > If the values being compared are both 0.0s (of either sign), the
3150 // > value in the second operand (source operand) is returned.
3151 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3152 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3153 let wu: [u64; 4] = transmute(w);
3154 let xu: [u64; 4] = transmute(x);
3155 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3156 assert_eq!(xu, [0u64; 4]);
3157 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3158 // > second operand (source operand), either a NaN or a valid
3159 // > floating-point value, is written to the result.
3160 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3161 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3162 let yf: [f64; 4] = transmute(y);
3163 let zf: [f64; 4] = transmute(z);
3164 assert_eq!(yf, [0.0; 4]);
3165 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3166 }
3167
3168 #[simd_test(enable = "avx")]
3169 unsafe fn test_mm256_max_ps() {
3170 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3171 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3172 let r = _mm256_max_ps(a, b);
3173 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3174 assert_eq_m256(r, e);
3175 // > If the values being compared are both 0.0s (of either sign), the
3176 // > value in the second operand (source operand) is returned.
3177 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3178 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3179 let wu: [u32; 8] = transmute(w);
3180 let xu: [u32; 8] = transmute(x);
3181 assert_eq!(wu, [0x8000_0000u32; 8]);
3182 assert_eq!(xu, [0u32; 8]);
3183 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3184 // > second operand (source operand), either a NaN or a valid
3185 // > floating-point value, is written to the result.
3186 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3187 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3188 let yf: [f32; 8] = transmute(y);
3189 let zf: [f32; 8] = transmute(z);
3190 assert_eq!(yf, [0.0; 8]);
3191 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3192 }
3193
3194 #[simd_test(enable = "avx")]
3195 unsafe fn test_mm256_min_pd() {
3196 let a = _mm256_setr_pd(1., 4., 5., 8.);
3197 let b = _mm256_setr_pd(2., 3., 6., 7.);
3198 let r = _mm256_min_pd(a, b);
3199 let e = _mm256_setr_pd(1., 3., 5., 7.);
3200 assert_eq_m256d(r, e);
3201 // > If the values being compared are both 0.0s (of either sign), the
3202 // > value in the second operand (source operand) is returned.
3203 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3204 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3205 let wu: [u64; 4] = transmute(w);
3206 let xu: [u64; 4] = transmute(x);
3207 assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
3208 assert_eq!(xu, [0u64; 4]);
3209 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3210 // > second operand (source operand), either a NaN or a valid
3211 // > floating-point value, is written to the result.
3212 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3213 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3214 let yf: [f64; 4] = transmute(y);
3215 let zf: [f64; 4] = transmute(z);
3216 assert_eq!(yf, [0.0; 4]);
3217 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3218 }
3219
3220 #[simd_test(enable = "avx")]
3221 unsafe fn test_mm256_min_ps() {
3222 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3223 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3224 let r = _mm256_min_ps(a, b);
3225 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3226 assert_eq_m256(r, e);
3227 // > If the values being compared are both 0.0s (of either sign), the
3228 // > value in the second operand (source operand) is returned.
3229 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3230 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3231 let wu: [u32; 8] = transmute(w);
3232 let xu: [u32; 8] = transmute(x);
3233 assert_eq!(wu, [0x8000_0000u32; 8]);
3234 assert_eq!(xu, [0u32; 8]);
3235 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3236 // > second operand (source operand), either a NaN or a valid
3237 // > floating-point value, is written to the result.
3238 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3239 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3240 let yf: [f32; 8] = transmute(y);
3241 let zf: [f32; 8] = transmute(z);
3242 assert_eq!(yf, [0.0; 8]);
3243 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3244 }
3245
3246 #[simd_test(enable = "avx")]
3247 unsafe fn test_mm256_mul_pd() {
3248 let a = _mm256_setr_pd(1., 2., 3., 4.);
3249 let b = _mm256_setr_pd(5., 6., 7., 8.);
3250 let r = _mm256_mul_pd(a, b);
3251 let e = _mm256_setr_pd(5., 12., 21., 32.);
3252 assert_eq_m256d(r, e);
3253 }
3254
3255 #[simd_test(enable = "avx")]
3256 unsafe fn test_mm256_mul_ps() {
3257 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3258 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3259 let r = _mm256_mul_ps(a, b);
3260 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3261 assert_eq_m256(r, e);
3262 }
3263
3264 #[simd_test(enable = "avx")]
3265 unsafe fn test_mm256_addsub_pd() {
3266 let a = _mm256_setr_pd(1., 2., 3., 4.);
3267 let b = _mm256_setr_pd(5., 6., 7., 8.);
3268 let r = _mm256_addsub_pd(a, b);
3269 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3270 assert_eq_m256d(r, e);
3271 }
3272
3273 #[simd_test(enable = "avx")]
3274 unsafe fn test_mm256_addsub_ps() {
3275 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3276 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3277 let r = _mm256_addsub_ps(a, b);
3278 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3279 assert_eq_m256(r, e);
3280 }
3281
3282 #[simd_test(enable = "avx")]
3283 unsafe fn test_mm256_sub_pd() {
3284 let a = _mm256_setr_pd(1., 2., 3., 4.);
3285 let b = _mm256_setr_pd(5., 6., 7., 8.);
3286 let r = _mm256_sub_pd(a, b);
3287 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3288 assert_eq_m256d(r, e);
3289 }
3290
3291 #[simd_test(enable = "avx")]
3292 unsafe fn test_mm256_sub_ps() {
3293 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3294 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3295 let r = _mm256_sub_ps(a, b);
3296 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3297 assert_eq_m256(r, e);
3298 }
3299
3300 #[simd_test(enable = "avx")]
3301 unsafe fn test_mm256_round_pd() {
3302 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3303 let result_closest = _mm256_round_pd::<0b0000>(a);
3304 let result_down = _mm256_round_pd::<0b0001>(a);
3305 let result_up = _mm256_round_pd::<0b0010>(a);
3306 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3307 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3308 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3309 assert_eq_m256d(result_closest, expected_closest);
3310 assert_eq_m256d(result_down, expected_down);
3311 assert_eq_m256d(result_up, expected_up);
3312 }
3313
3314 #[simd_test(enable = "avx")]
3315 unsafe fn test_mm256_floor_pd() {
3316 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3317 let result_down = _mm256_floor_pd(a);
3318 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3319 assert_eq_m256d(result_down, expected_down);
3320 }
3321
3322 #[simd_test(enable = "avx")]
3323 unsafe fn test_mm256_ceil_pd() {
3324 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3325 let result_up = _mm256_ceil_pd(a);
3326 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3327 assert_eq_m256d(result_up, expected_up);
3328 }
3329
3330 #[simd_test(enable = "avx")]
3331 unsafe fn test_mm256_round_ps() {
3332 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3333 let result_closest = _mm256_round_ps::<0b0000>(a);
3334 let result_down = _mm256_round_ps::<0b0001>(a);
3335 let result_up = _mm256_round_ps::<0b0010>(a);
3336 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3337 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3338 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3339 assert_eq_m256(result_closest, expected_closest);
3340 assert_eq_m256(result_down, expected_down);
3341 assert_eq_m256(result_up, expected_up);
3342 }
3343
3344 #[simd_test(enable = "avx")]
3345 unsafe fn test_mm256_floor_ps() {
3346 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3347 let result_down = _mm256_floor_ps(a);
3348 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3349 assert_eq_m256(result_down, expected_down);
3350 }
3351
3352 #[simd_test(enable = "avx")]
3353 unsafe fn test_mm256_ceil_ps() {
3354 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3355 let result_up = _mm256_ceil_ps(a);
3356 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3357 assert_eq_m256(result_up, expected_up);
3358 }
3359
3360 #[simd_test(enable = "avx")]
3361 unsafe fn test_mm256_sqrt_pd() {
3362 let a = _mm256_setr_pd(4., 9., 16., 25.);
3363 let r = _mm256_sqrt_pd(a);
3364 let e = _mm256_setr_pd(2., 3., 4., 5.);
3365 assert_eq_m256d(r, e);
3366 }
3367
3368 #[simd_test(enable = "avx")]
3369 unsafe fn test_mm256_sqrt_ps() {
3370 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3371 let r = _mm256_sqrt_ps(a);
3372 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3373 assert_eq_m256(r, e);
3374 }
3375
3376 #[simd_test(enable = "avx")]
3377 unsafe fn test_mm256_div_ps() {
3378 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3379 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3380 let r = _mm256_div_ps(a, b);
3381 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3382 assert_eq_m256(r, e);
3383 }
3384
3385 #[simd_test(enable = "avx")]
3386 unsafe fn test_mm256_div_pd() {
3387 let a = _mm256_setr_pd(4., 9., 16., 25.);
3388 let b = _mm256_setr_pd(4., 3., 2., 5.);
3389 let r = _mm256_div_pd(a, b);
3390 let e = _mm256_setr_pd(1., 3., 8., 5.);
3391 assert_eq_m256d(r, e);
3392 }
3393
3394 #[simd_test(enable = "avx")]
3395 unsafe fn test_mm256_blend_pd() {
3396 let a = _mm256_setr_pd(4., 9., 16., 25.);
3397 let b = _mm256_setr_pd(4., 3., 2., 5.);
3398 let r = _mm256_blend_pd::<0x0>(a, b);
3399 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3400 let r = _mm256_blend_pd::<0x3>(a, b);
3401 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3402 let r = _mm256_blend_pd::<0xF>(a, b);
3403 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3404 }
3405
3406 #[simd_test(enable = "avx")]
3407 unsafe fn test_mm256_blend_ps() {
3408 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3409 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3410 let r = _mm256_blend_ps::<0x0>(a, b);
3411 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3412 let r = _mm256_blend_ps::<0x3>(a, b);
3413 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3414 let r = _mm256_blend_ps::<0xF>(a, b);
3415 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3416 }
3417
3418 #[simd_test(enable = "avx")]
3419 unsafe fn test_mm256_blendv_pd() {
3420 let a = _mm256_setr_pd(4., 9., 16., 25.);
3421 let b = _mm256_setr_pd(4., 3., 2., 5.);
3422 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3423 let r = _mm256_blendv_pd(a, b, c);
3424 let e = _mm256_setr_pd(4., 9., 2., 5.);
3425 assert_eq_m256d(r, e);
3426 }
3427
3428 #[simd_test(enable = "avx")]
3429 unsafe fn test_mm256_blendv_ps() {
3430 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3431 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3432 #[rustfmt::skip]
3433 let c = _mm256_setr_ps(
3434 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3435 );
3436 let r = _mm256_blendv_ps(a, b, c);
3437 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3438 assert_eq_m256(r, e);
3439 }
3440
3441 #[simd_test(enable = "avx")]
3442 unsafe fn test_mm256_dp_ps() {
3443 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3444 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3445 let r = _mm256_dp_ps::<0xFF>(a, b);
3446 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3447 assert_eq_m256(r, e);
3448 }
3449
3450 #[simd_test(enable = "avx")]
3451 unsafe fn test_mm256_hadd_pd() {
3452 let a = _mm256_setr_pd(4., 9., 16., 25.);
3453 let b = _mm256_setr_pd(4., 3., 2., 5.);
3454 let r = _mm256_hadd_pd(a, b);
3455 let e = _mm256_setr_pd(13., 7., 41., 7.);
3456 assert_eq_m256d(r, e);
3457
3458 let a = _mm256_setr_pd(1., 2., 3., 4.);
3459 let b = _mm256_setr_pd(5., 6., 7., 8.);
3460 let r = _mm256_hadd_pd(a, b);
3461 let e = _mm256_setr_pd(3., 11., 7., 15.);
3462 assert_eq_m256d(r, e);
3463 }
3464
3465 #[simd_test(enable = "avx")]
3466 unsafe fn test_mm256_hadd_ps() {
3467 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3468 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3469 let r = _mm256_hadd_ps(a, b);
3470 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3471 assert_eq_m256(r, e);
3472
3473 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3474 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3475 let r = _mm256_hadd_ps(a, b);
3476 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3477 assert_eq_m256(r, e);
3478 }
3479
3480 #[simd_test(enable = "avx")]
3481 unsafe fn test_mm256_hsub_pd() {
3482 let a = _mm256_setr_pd(4., 9., 16., 25.);
3483 let b = _mm256_setr_pd(4., 3., 2., 5.);
3484 let r = _mm256_hsub_pd(a, b);
3485 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3486 assert_eq_m256d(r, e);
3487
3488 let a = _mm256_setr_pd(1., 2., 3., 4.);
3489 let b = _mm256_setr_pd(5., 6., 7., 8.);
3490 let r = _mm256_hsub_pd(a, b);
3491 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3492 assert_eq_m256d(r, e);
3493 }
3494
3495 #[simd_test(enable = "avx")]
3496 unsafe fn test_mm256_hsub_ps() {
3497 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3498 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3499 let r = _mm256_hsub_ps(a, b);
3500 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3501 assert_eq_m256(r, e);
3502
3503 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3504 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3505 let r = _mm256_hsub_ps(a, b);
3506 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3507 assert_eq_m256(r, e);
3508 }
3509
3510 #[simd_test(enable = "avx")]
3511 unsafe fn test_mm256_xor_pd() {
3512 let a = _mm256_setr_pd(4., 9., 16., 25.);
3513 let b = _mm256_set1_pd(0.);
3514 let r = _mm256_xor_pd(a, b);
3515 assert_eq_m256d(r, a);
3516 }
3517
3518 #[simd_test(enable = "avx")]
3519 unsafe fn test_mm256_xor_ps() {
3520 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3521 let b = _mm256_set1_ps(0.);
3522 let r = _mm256_xor_ps(a, b);
3523 assert_eq_m256(r, a);
3524 }
3525
3526 #[simd_test(enable = "avx")]
3527 unsafe fn test_mm_cmp_pd() {
3528 let a = _mm_setr_pd(4., 9.);
3529 let b = _mm_setr_pd(4., 3.);
3530 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3531 assert!(get_m128d(r, 0).is_nan());
3532 assert!(get_m128d(r, 1).is_nan());
3533 }
3534
3535 #[simd_test(enable = "avx")]
3536 unsafe fn test_mm256_cmp_pd() {
3537 let a = _mm256_setr_pd(1., 2., 3., 4.);
3538 let b = _mm256_setr_pd(5., 6., 7., 8.);
3539 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3540 let e = _mm256_set1_pd(0.);
3541 assert_eq_m256d(r, e);
3542 }
3543
3544 #[simd_test(enable = "avx")]
3545 unsafe fn test_mm_cmp_ps() {
3546 let a = _mm_setr_ps(4., 3., 2., 5.);
3547 let b = _mm_setr_ps(4., 9., 16., 25.);
3548 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3549 assert!(get_m128(r, 0).is_nan());
3550 assert_eq!(get_m128(r, 1), 0.);
3551 assert_eq!(get_m128(r, 2), 0.);
3552 assert_eq!(get_m128(r, 3), 0.);
3553 }
3554
3555 #[simd_test(enable = "avx")]
3556 unsafe fn test_mm256_cmp_ps() {
3557 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3558 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3559 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3560 let e = _mm256_set1_ps(0.);
3561 assert_eq_m256(r, e);
3562 }
3563
3564 #[simd_test(enable = "avx")]
3565 unsafe fn test_mm_cmp_sd() {
3566 let a = _mm_setr_pd(4., 9.);
3567 let b = _mm_setr_pd(4., 3.);
3568 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3569 assert!(get_m128d(r, 0).is_nan());
3570 assert_eq!(get_m128d(r, 1), 9.);
3571 }
3572
3573 #[simd_test(enable = "avx")]
3574 unsafe fn test_mm_cmp_ss() {
3575 let a = _mm_setr_ps(4., 3., 2., 5.);
3576 let b = _mm_setr_ps(4., 9., 16., 25.);
3577 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3578 assert!(get_m128(r, 0).is_nan());
3579 assert_eq!(get_m128(r, 1), 3.);
3580 assert_eq!(get_m128(r, 2), 2.);
3581 assert_eq!(get_m128(r, 3), 5.);
3582 }
3583
3584 #[simd_test(enable = "avx")]
3585 unsafe fn test_mm256_cvtepi32_pd() {
3586 let a = _mm_setr_epi32(4, 9, 16, 25);
3587 let r = _mm256_cvtepi32_pd(a);
3588 let e = _mm256_setr_pd(4., 9., 16., 25.);
3589 assert_eq_m256d(r, e);
3590 }
3591
3592 #[simd_test(enable = "avx")]
3593 unsafe fn test_mm256_cvtepi32_ps() {
3594 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3595 let r = _mm256_cvtepi32_ps(a);
3596 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3597 assert_eq_m256(r, e);
3598 }
3599
3600 #[simd_test(enable = "avx")]
3601 unsafe fn test_mm256_cvtpd_ps() {
3602 let a = _mm256_setr_pd(4., 9., 16., 25.);
3603 let r = _mm256_cvtpd_ps(a);
3604 let e = _mm_setr_ps(4., 9., 16., 25.);
3605 assert_eq_m128(r, e);
3606 }
3607
3608 #[simd_test(enable = "avx")]
3609 unsafe fn test_mm256_cvtps_epi32() {
3610 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3611 let r = _mm256_cvtps_epi32(a);
3612 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3613 assert_eq_m256i(r, e);
3614 }
3615
3616 #[simd_test(enable = "avx")]
3617 unsafe fn test_mm256_cvtps_pd() {
3618 let a = _mm_setr_ps(4., 9., 16., 25.);
3619 let r = _mm256_cvtps_pd(a);
3620 let e = _mm256_setr_pd(4., 9., 16., 25.);
3621 assert_eq_m256d(r, e);
3622 }
3623
3624 #[simd_test(enable = "avx")]
3625 unsafe fn test_mm256_cvttpd_epi32() {
3626 let a = _mm256_setr_pd(4., 9., 16., 25.);
3627 let r = _mm256_cvttpd_epi32(a);
3628 let e = _mm_setr_epi32(4, 9, 16, 25);
3629 assert_eq_m128i(r, e);
3630 }
3631
3632 #[simd_test(enable = "avx")]
3633 unsafe fn test_mm256_cvtpd_epi32() {
3634 let a = _mm256_setr_pd(4., 9., 16., 25.);
3635 let r = _mm256_cvtpd_epi32(a);
3636 let e = _mm_setr_epi32(4, 9, 16, 25);
3637 assert_eq_m128i(r, e);
3638 }
3639
3640 #[simd_test(enable = "avx")]
3641 unsafe fn test_mm256_cvttps_epi32() {
3642 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3643 let r = _mm256_cvttps_epi32(a);
3644 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3645 assert_eq_m256i(r, e);
3646 }
3647
3648 #[simd_test(enable = "avx")]
3649 unsafe fn test_mm256_extractf128_ps() {
3650 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3651 let r = _mm256_extractf128_ps::<0>(a);
3652 let e = _mm_setr_ps(4., 3., 2., 5.);
3653 assert_eq_m128(r, e);
3654 }
3655
3656 #[simd_test(enable = "avx")]
3657 unsafe fn test_mm256_extractf128_pd() {
3658 let a = _mm256_setr_pd(4., 3., 2., 5.);
3659 let r = _mm256_extractf128_pd::<0>(a);
3660 let e = _mm_setr_pd(4., 3.);
3661 assert_eq_m128d(r, e);
3662 }
3663
3664 #[simd_test(enable = "avx")]
3665 unsafe fn test_mm256_extractf128_si256() {
3666 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3667 let r = _mm256_extractf128_si256::<0>(a);
3668 let e = _mm_setr_epi64x(4, 3);
3669 assert_eq_m128i(r, e);
3670 }
3671
3672 #[simd_test(enable = "avx")]
3673 unsafe fn test_mm256_zeroall() {
3674 _mm256_zeroall();
3675 }
3676
3677 #[simd_test(enable = "avx")]
3678 unsafe fn test_mm256_zeroupper() {
3679 _mm256_zeroupper();
3680 }
3681
3682 #[simd_test(enable = "avx")]
3683 unsafe fn test_mm256_permutevar_ps() {
3684 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3685 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3686 let r = _mm256_permutevar_ps(a, b);
3687 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
3688 assert_eq_m256(r, e);
3689 }
3690
3691 #[simd_test(enable = "avx")]
3692 unsafe fn test_mm_permutevar_ps() {
3693 let a = _mm_setr_ps(4., 3., 2., 5.);
3694 let b = _mm_setr_epi32(1, 2, 3, 4);
3695 let r = _mm_permutevar_ps(a, b);
3696 let e = _mm_setr_ps(3., 2., 5., 4.);
3697 assert_eq_m128(r, e);
3698 }
3699
3700 #[simd_test(enable = "avx")]
3701 unsafe fn test_mm256_permute_ps() {
3702 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3703 let r = _mm256_permute_ps::<0x1b>(a);
3704 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
3705 assert_eq_m256(r, e);
3706 }
3707
3708 #[simd_test(enable = "avx")]
3709 unsafe fn test_mm_permute_ps() {
3710 let a = _mm_setr_ps(4., 3., 2., 5.);
3711 let r = _mm_permute_ps::<0x1b>(a);
3712 let e = _mm_setr_ps(5., 2., 3., 4.);
3713 assert_eq_m128(r, e);
3714 }
3715
3716 #[simd_test(enable = "avx")]
3717 unsafe fn test_mm256_permutevar_pd() {
3718 let a = _mm256_setr_pd(4., 3., 2., 5.);
3719 let b = _mm256_setr_epi64x(1, 2, 3, 4);
3720 let r = _mm256_permutevar_pd(a, b);
3721 let e = _mm256_setr_pd(4., 3., 5., 2.);
3722 assert_eq_m256d(r, e);
3723 }
3724
3725 #[simd_test(enable = "avx")]
3726 unsafe fn test_mm_permutevar_pd() {
3727 let a = _mm_setr_pd(4., 3.);
3728 let b = _mm_setr_epi64x(3, 0);
3729 let r = _mm_permutevar_pd(a, b);
3730 let e = _mm_setr_pd(3., 4.);
3731 assert_eq_m128d(r, e);
3732 }
3733
3734 #[simd_test(enable = "avx")]
3735 unsafe fn test_mm256_permute_pd() {
3736 let a = _mm256_setr_pd(4., 3., 2., 5.);
3737 let r = _mm256_permute_pd::<5>(a);
3738 let e = _mm256_setr_pd(3., 4., 5., 2.);
3739 assert_eq_m256d(r, e);
3740 }
3741
3742 #[simd_test(enable = "avx")]
3743 unsafe fn test_mm_permute_pd() {
3744 let a = _mm_setr_pd(4., 3.);
3745 let r = _mm_permute_pd::<1>(a);
3746 let e = _mm_setr_pd(3., 4.);
3747 assert_eq_m128d(r, e);
3748 }
3749
3750 #[simd_test(enable = "avx")]
3751 unsafe fn test_mm256_permute2f128_ps() {
3752 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3753 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3754 let r = _mm256_permute2f128_ps::<0x13>(a, b);
3755 let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
3756 assert_eq_m256(r, e);
3757 }
3758
3759 #[simd_test(enable = "avx")]
3760 unsafe fn test_mm256_permute2f128_pd() {
3761 let a = _mm256_setr_pd(1., 2., 3., 4.);
3762 let b = _mm256_setr_pd(5., 6., 7., 8.);
3763 let r = _mm256_permute2f128_pd::<0x31>(a, b);
3764 let e = _mm256_setr_pd(3., 4., 7., 8.);
3765 assert_eq_m256d(r, e);
3766 }
3767
3768 #[simd_test(enable = "avx")]
3769 unsafe fn test_mm256_permute2f128_si256() {
3770 let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
3771 let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
3772 let r = _mm256_permute2f128_si256::<0x20>(a, b);
3773 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3774 assert_eq_m256i(r, e);
3775 }
3776
3777 #[simd_test(enable = "avx")]
3778 unsafe fn test_mm256_broadcast_ss() {
3779 let r = _mm256_broadcast_ss(&3.);
3780 let e = _mm256_set1_ps(3.);
3781 assert_eq_m256(r, e);
3782 }
3783
3784 #[simd_test(enable = "avx")]
3785 unsafe fn test_mm_broadcast_ss() {
3786 let r = _mm_broadcast_ss(&3.);
3787 let e = _mm_set1_ps(3.);
3788 assert_eq_m128(r, e);
3789 }
3790
3791 #[simd_test(enable = "avx")]
3792 unsafe fn test_mm256_broadcast_sd() {
3793 let r = _mm256_broadcast_sd(&3.);
3794 let e = _mm256_set1_pd(3.);
3795 assert_eq_m256d(r, e);
3796 }
3797
3798 #[simd_test(enable = "avx")]
3799 unsafe fn test_mm256_broadcast_ps() {
3800 let a = _mm_setr_ps(4., 3., 2., 5.);
3801 let r = _mm256_broadcast_ps(&a);
3802 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
3803 assert_eq_m256(r, e);
3804 }
3805
3806 #[simd_test(enable = "avx")]
3807 unsafe fn test_mm256_broadcast_pd() {
3808 let a = _mm_setr_pd(4., 3.);
3809 let r = _mm256_broadcast_pd(&a);
3810 let e = _mm256_setr_pd(4., 3., 4., 3.);
3811 assert_eq_m256d(r, e);
3812 }
3813
3814 #[simd_test(enable = "avx")]
3815 unsafe fn test_mm256_insertf128_ps() {
3816 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3817 let b = _mm_setr_ps(4., 9., 16., 25.);
3818 let r = _mm256_insertf128_ps::<0>(a, b);
3819 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3820 assert_eq_m256(r, e);
3821 }
3822
3823 #[simd_test(enable = "avx")]
3824 unsafe fn test_mm256_insertf128_pd() {
3825 let a = _mm256_setr_pd(1., 2., 3., 4.);
3826 let b = _mm_setr_pd(5., 6.);
3827 let r = _mm256_insertf128_pd::<0>(a, b);
3828 let e = _mm256_setr_pd(5., 6., 3., 4.);
3829 assert_eq_m256d(r, e);
3830 }
3831
3832 #[simd_test(enable = "avx")]
3833 unsafe fn test_mm256_insertf128_si256() {
3834 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3835 let b = _mm_setr_epi64x(5, 6);
3836 let r = _mm256_insertf128_si256::<0>(a, b);
3837 let e = _mm256_setr_epi64x(5, 6, 3, 4);
3838 assert_eq_m256i(r, e);
3839 }
3840
3841 #[simd_test(enable = "avx")]
3842 unsafe fn test_mm256_insert_epi8() {
3843 #[rustfmt::skip]
3844 let a = _mm256_setr_epi8(
3845 1, 2, 3, 4, 5, 6, 7, 8,
3846 9, 10, 11, 12, 13, 14, 15, 16,
3847 17, 18, 19, 20, 21, 22, 23, 24,
3848 25, 26, 27, 28, 29, 30, 31, 32,
3849 );
3850 let r = _mm256_insert_epi8::<31>(a, 0);
3851 #[rustfmt::skip]
3852 let e = _mm256_setr_epi8(
3853 1, 2, 3, 4, 5, 6, 7, 8,
3854 9, 10, 11, 12, 13, 14, 15, 16,
3855 17, 18, 19, 20, 21, 22, 23, 24,
3856 25, 26, 27, 28, 29, 30, 31, 0,
3857 );
3858 assert_eq_m256i(r, e);
3859 }
3860
3861 #[simd_test(enable = "avx")]
3862 unsafe fn test_mm256_insert_epi16() {
3863 #[rustfmt::skip]
3864 let a = _mm256_setr_epi16(
3865 0, 1, 2, 3, 4, 5, 6, 7,
3866 8, 9, 10, 11, 12, 13, 14, 15,
3867 );
3868 let r = _mm256_insert_epi16::<15>(a, 0);
3869 #[rustfmt::skip]
3870 let e = _mm256_setr_epi16(
3871 0, 1, 2, 3, 4, 5, 6, 7,
3872 8, 9, 10, 11, 12, 13, 14, 0,
3873 );
3874 assert_eq_m256i(r, e);
3875 }
3876
3877 #[simd_test(enable = "avx")]
3878 unsafe fn test_mm256_insert_epi32() {
3879 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3880 let r = _mm256_insert_epi32::<7>(a, 0);
3881 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
3882 assert_eq_m256i(r, e);
3883 }
3884
3885 #[simd_test(enable = "avx")]
3886 unsafe fn test_mm256_load_pd() {
3887 let a = _mm256_setr_pd(1., 2., 3., 4.);
3888 let p = &a as *const _ as *const f64;
3889 let r = _mm256_load_pd(p);
3890 let e = _mm256_setr_pd(1., 2., 3., 4.);
3891 assert_eq_m256d(r, e);
3892 }
3893
3894 #[simd_test(enable = "avx")]
3895 unsafe fn test_mm256_store_pd() {
3896 let a = _mm256_setr_pd(1., 2., 3., 4.);
3897 let mut r = _mm256_undefined_pd();
3898 _mm256_store_pd(&mut r as *mut _ as *mut f64, a);
3899 assert_eq_m256d(r, a);
3900 }
3901
3902 #[simd_test(enable = "avx")]
3903 unsafe fn test_mm256_load_ps() {
3904 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3905 let p = &a as *const _ as *const f32;
3906 let r = _mm256_load_ps(p);
3907 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3908 assert_eq_m256(r, e);
3909 }
3910
3911 #[simd_test(enable = "avx")]
3912 unsafe fn test_mm256_store_ps() {
3913 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3914 let mut r = _mm256_undefined_ps();
3915 _mm256_store_ps(&mut r as *mut _ as *mut f32, a);
3916 assert_eq_m256(r, a);
3917 }
3918
3919 #[simd_test(enable = "avx")]
3920 unsafe fn test_mm256_loadu_pd() {
3921 let a = &[1.0f64, 2., 3., 4.];
3922 let p = a.as_ptr();
3923 let r = _mm256_loadu_pd(black_box(p));
3924 let e = _mm256_setr_pd(1., 2., 3., 4.);
3925 assert_eq_m256d(r, e);
3926 }
3927
3928 #[simd_test(enable = "avx")]
3929 unsafe fn test_mm256_storeu_pd() {
3930 let a = _mm256_set1_pd(9.);
3931 let mut r = _mm256_undefined_pd();
3932 _mm256_storeu_pd(&mut r as *mut _ as *mut f64, a);
3933 assert_eq_m256d(r, a);
3934 }
3935
3936 #[simd_test(enable = "avx")]
3937 unsafe fn test_mm256_loadu_ps() {
3938 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
3939 let p = a.as_ptr();
3940 let r = _mm256_loadu_ps(black_box(p));
3941 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3942 assert_eq_m256(r, e);
3943 }
3944
3945 #[simd_test(enable = "avx")]
3946 unsafe fn test_mm256_storeu_ps() {
3947 let a = _mm256_set1_ps(9.);
3948 let mut r = _mm256_undefined_ps();
3949 _mm256_storeu_ps(&mut r as *mut _ as *mut f32, a);
3950 assert_eq_m256(r, a);
3951 }
3952
3953 #[simd_test(enable = "avx")]
3954 unsafe fn test_mm256_load_si256() {
3955 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3956 let p = &a as *const _;
3957 let r = _mm256_load_si256(p);
3958 let e = _mm256_setr_epi64x(1, 2, 3, 4);
3959 assert_eq_m256i(r, e);
3960 }
3961
3962 #[simd_test(enable = "avx")]
3963 unsafe fn test_mm256_store_si256() {
3964 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3965 let mut r = _mm256_undefined_si256();
3966 _mm256_store_si256(&mut r as *mut _, a);
3967 assert_eq_m256i(r, a);
3968 }
3969
3970 #[simd_test(enable = "avx")]
3971 unsafe fn test_mm256_loadu_si256() {
3972 let a = _mm256_setr_epi64x(1, 2, 3, 4);
3973 let p = &a as *const _;
3974 let r = _mm256_loadu_si256(black_box(p));
3975 let e = _mm256_setr_epi64x(1, 2, 3, 4);
3976 assert_eq_m256i(r, e);
3977 }
3978
3979 #[simd_test(enable = "avx")]
3980 unsafe fn test_mm256_storeu_si256() {
3981 let a = _mm256_set1_epi8(9);
3982 let mut r = _mm256_undefined_si256();
3983 _mm256_storeu_si256(&mut r as *mut _, a);
3984 assert_eq_m256i(r, a);
3985 }
3986
3987 #[simd_test(enable = "avx")]
3988 unsafe fn test_mm256_maskload_pd() {
3989 let a = &[1.0f64, 2., 3., 4.];
3990 let p = a.as_ptr();
3991 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
3992 let r = _mm256_maskload_pd(black_box(p), mask);
3993 let e = _mm256_setr_pd(0., 2., 0., 4.);
3994 assert_eq_m256d(r, e);
3995 }
3996
3997 #[simd_test(enable = "avx")]
3998 unsafe fn test_mm256_maskstore_pd() {
3999 let mut r = _mm256_set1_pd(0.);
4000 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4001 let a = _mm256_setr_pd(1., 2., 3., 4.);
4002 _mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
4003 let e = _mm256_setr_pd(0., 2., 0., 4.);
4004 assert_eq_m256d(r, e);
4005 }
4006
4007 #[simd_test(enable = "avx")]
4008 unsafe fn test_mm_maskload_pd() {
4009 let a = &[1.0f64, 2.];
4010 let p = a.as_ptr();
4011 let mask = _mm_setr_epi64x(0, !0);
4012 let r = _mm_maskload_pd(black_box(p), mask);
4013 let e = _mm_setr_pd(0., 2.);
4014 assert_eq_m128d(r, e);
4015 }
4016
4017 #[simd_test(enable = "avx")]
4018 unsafe fn test_mm_maskstore_pd() {
4019 let mut r = _mm_set1_pd(0.);
4020 let mask = _mm_setr_epi64x(0, !0);
4021 let a = _mm_setr_pd(1., 2.);
4022 _mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a);
4023 let e = _mm_setr_pd(0., 2.);
4024 assert_eq_m128d(r, e);
4025 }
4026
4027 #[simd_test(enable = "avx")]
4028 unsafe fn test_mm256_maskload_ps() {
4029 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4030 let p = a.as_ptr();
4031 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4032 let r = _mm256_maskload_ps(black_box(p), mask);
4033 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4034 assert_eq_m256(r, e);
4035 }
4036
4037 #[simd_test(enable = "avx")]
4038 unsafe fn test_mm256_maskstore_ps() {
4039 let mut r = _mm256_set1_ps(0.);
4040 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4041 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4042 _mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
4043 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4044 assert_eq_m256(r, e);
4045 }
4046
4047 #[simd_test(enable = "avx")]
4048 unsafe fn test_mm_maskload_ps() {
4049 let a = &[1.0f32, 2., 3., 4.];
4050 let p = a.as_ptr();
4051 let mask = _mm_setr_epi32(0, !0, 0, !0);
4052 let r = _mm_maskload_ps(black_box(p), mask);
4053 let e = _mm_setr_ps(0., 2., 0., 4.);
4054 assert_eq_m128(r, e);
4055 }
4056
4057 #[simd_test(enable = "avx")]
4058 unsafe fn test_mm_maskstore_ps() {
4059 let mut r = _mm_set1_ps(0.);
4060 let mask = _mm_setr_epi32(0, !0, 0, !0);
4061 let a = _mm_setr_ps(1., 2., 3., 4.);
4062 _mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a);
4063 let e = _mm_setr_ps(0., 2., 0., 4.);
4064 assert_eq_m128(r, e);
4065 }
4066
4067 #[simd_test(enable = "avx")]
4068 unsafe fn test_mm256_movehdup_ps() {
4069 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4070 let r = _mm256_movehdup_ps(a);
4071 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4072 assert_eq_m256(r, e);
4073 }
4074
4075 #[simd_test(enable = "avx")]
4076 unsafe fn test_mm256_moveldup_ps() {
4077 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4078 let r = _mm256_moveldup_ps(a);
4079 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4080 assert_eq_m256(r, e);
4081 }
4082
4083 #[simd_test(enable = "avx")]
4084 unsafe fn test_mm256_movedup_pd() {
4085 let a = _mm256_setr_pd(1., 2., 3., 4.);
4086 let r = _mm256_movedup_pd(a);
4087 let e = _mm256_setr_pd(1., 1., 3., 3.);
4088 assert_eq_m256d(r, e);
4089 }
4090
4091 #[simd_test(enable = "avx")]
4092 unsafe fn test_mm256_lddqu_si256() {
4093 #[rustfmt::skip]
4094 let a = _mm256_setr_epi8(
4095 1, 2, 3, 4, 5, 6, 7, 8,
4096 9, 10, 11, 12, 13, 14, 15, 16,
4097 17, 18, 19, 20, 21, 22, 23, 24,
4098 25, 26, 27, 28, 29, 30, 31, 32,
4099 );
4100 let p = &a as *const _;
4101 let r = _mm256_lddqu_si256(black_box(p));
4102 #[rustfmt::skip]
4103 let e = _mm256_setr_epi8(
4104 1, 2, 3, 4, 5, 6, 7, 8,
4105 9, 10, 11, 12, 13, 14, 15, 16,
4106 17, 18, 19, 20, 21, 22, 23, 24,
4107 25, 26, 27, 28, 29, 30, 31, 32,
4108 );
4109 assert_eq_m256i(r, e);
4110 }
4111
4112 #[simd_test(enable = "avx")]
4113 unsafe fn test_mm256_stream_si256() {
4114 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4115 let mut r = _mm256_undefined_si256();
4116 _mm256_stream_si256(&mut r as *mut _, a);
4117 assert_eq_m256i(r, a);
4118 }
4119
4120 #[simd_test(enable = "avx")]
4121 unsafe fn test_mm256_stream_pd() {
4122 #[repr(align(32))]
4123 struct Memory {
4124 pub data: [f64; 4],
4125 }
4126 let a = _mm256_set1_pd(7.0);
4127 let mut mem = Memory { data: [-1.0; 4] };
4128
4129 _mm256_stream_pd(&mut mem.data[0] as *mut f64, a);
4130 for i in 0..4 {
4131 assert_eq!(mem.data[i], get_m256d(a, i));
4132 }
4133 }
4134
4135 #[simd_test(enable = "avx")]
4136 unsafe fn test_mm256_stream_ps() {
4137 #[repr(align(32))]
4138 struct Memory {
4139 pub data: [f32; 8],
4140 }
4141 let a = _mm256_set1_ps(7.0);
4142 let mut mem = Memory { data: [-1.0; 8] };
4143
4144 _mm256_stream_ps(&mut mem.data[0] as *mut f32, a);
4145 for i in 0..8 {
4146 assert_eq!(mem.data[i], get_m256(a, i));
4147 }
4148 }
4149
4150 #[simd_test(enable = "avx")]
4151 unsafe fn test_mm256_rcp_ps() {
4152 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4153 let r = _mm256_rcp_ps(a);
4154 #[rustfmt::skip]
4155 let e = _mm256_setr_ps(
4156 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4157 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4158 );
4159 let rel_err = 0.00048828125;
4160 for i in 0..8 {
4161 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4162 }
4163 }
4164
4165 #[simd_test(enable = "avx")]
4166 unsafe fn test_mm256_rsqrt_ps() {
4167 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4168 let r = _mm256_rsqrt_ps(a);
4169 #[rustfmt::skip]
4170 let e = _mm256_setr_ps(
4171 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4172 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4173 );
4174 let rel_err = 0.00048828125;
4175 for i in 0..8 {
4176 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4177 }
4178 }
4179
4180 #[simd_test(enable = "avx")]
4181 unsafe fn test_mm256_unpackhi_pd() {
4182 let a = _mm256_setr_pd(1., 2., 3., 4.);
4183 let b = _mm256_setr_pd(5., 6., 7., 8.);
4184 let r = _mm256_unpackhi_pd(a, b);
4185 let e = _mm256_setr_pd(2., 6., 4., 8.);
4186 assert_eq_m256d(r, e);
4187 }
4188
4189 #[simd_test(enable = "avx")]
4190 unsafe fn test_mm256_unpackhi_ps() {
4191 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4192 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4193 let r = _mm256_unpackhi_ps(a, b);
4194 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4195 assert_eq_m256(r, e);
4196 }
4197
4198 #[simd_test(enable = "avx")]
4199 unsafe fn test_mm256_unpacklo_pd() {
4200 let a = _mm256_setr_pd(1., 2., 3., 4.);
4201 let b = _mm256_setr_pd(5., 6., 7., 8.);
4202 let r = _mm256_unpacklo_pd(a, b);
4203 let e = _mm256_setr_pd(1., 5., 3., 7.);
4204 assert_eq_m256d(r, e);
4205 }
4206
4207 #[simd_test(enable = "avx")]
4208 unsafe fn test_mm256_unpacklo_ps() {
4209 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4210 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4211 let r = _mm256_unpacklo_ps(a, b);
4212 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4213 assert_eq_m256(r, e);
4214 }
4215
4216 #[simd_test(enable = "avx")]
4217 unsafe fn test_mm256_testz_si256() {
4218 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4219 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4220 let r = _mm256_testz_si256(a, b);
4221 assert_eq!(r, 0);
4222 let b = _mm256_set1_epi64x(0);
4223 let r = _mm256_testz_si256(a, b);
4224 assert_eq!(r, 1);
4225 }
4226
4227 #[simd_test(enable = "avx")]
4228 unsafe fn test_mm256_testc_si256() {
4229 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4230 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4231 let r = _mm256_testc_si256(a, b);
4232 assert_eq!(r, 0);
4233 let b = _mm256_set1_epi64x(0);
4234 let r = _mm256_testc_si256(a, b);
4235 assert_eq!(r, 1);
4236 }
4237
4238 #[simd_test(enable = "avx")]
4239 unsafe fn test_mm256_testnzc_si256() {
4240 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4241 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4242 let r = _mm256_testnzc_si256(a, b);
4243 assert_eq!(r, 1);
4244 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4245 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4246 let r = _mm256_testnzc_si256(a, b);
4247 assert_eq!(r, 0);
4248 }
4249
4250 #[simd_test(enable = "avx")]
4251 unsafe fn test_mm256_testz_pd() {
4252 let a = _mm256_setr_pd(1., 2., 3., 4.);
4253 let b = _mm256_setr_pd(5., 6., 7., 8.);
4254 let r = _mm256_testz_pd(a, b);
4255 assert_eq!(r, 1);
4256 let a = _mm256_set1_pd(-1.);
4257 let r = _mm256_testz_pd(a, a);
4258 assert_eq!(r, 0);
4259 }
4260
4261 #[simd_test(enable = "avx")]
4262 unsafe fn test_mm256_testc_pd() {
4263 let a = _mm256_setr_pd(1., 2., 3., 4.);
4264 let b = _mm256_setr_pd(5., 6., 7., 8.);
4265 let r = _mm256_testc_pd(a, b);
4266 assert_eq!(r, 1);
4267 let a = _mm256_set1_pd(1.);
4268 let b = _mm256_set1_pd(-1.);
4269 let r = _mm256_testc_pd(a, b);
4270 assert_eq!(r, 0);
4271 }
4272
4273 #[simd_test(enable = "avx")]
4274 unsafe fn test_mm256_testnzc_pd() {
4275 let a = _mm256_setr_pd(1., 2., 3., 4.);
4276 let b = _mm256_setr_pd(5., 6., 7., 8.);
4277 let r = _mm256_testnzc_pd(a, b);
4278 assert_eq!(r, 0);
4279 let a = _mm256_setr_pd(1., -1., -1., -1.);
4280 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4281 let r = _mm256_testnzc_pd(a, b);
4282 assert_eq!(r, 1);
4283 }
4284
4285 #[simd_test(enable = "avx")]
4286 unsafe fn test_mm_testz_pd() {
4287 let a = _mm_setr_pd(1., 2.);
4288 let b = _mm_setr_pd(5., 6.);
4289 let r = _mm_testz_pd(a, b);
4290 assert_eq!(r, 1);
4291 let a = _mm_set1_pd(-1.);
4292 let r = _mm_testz_pd(a, a);
4293 assert_eq!(r, 0);
4294 }
4295
4296 #[simd_test(enable = "avx")]
4297 unsafe fn test_mm_testc_pd() {
4298 let a = _mm_setr_pd(1., 2.);
4299 let b = _mm_setr_pd(5., 6.);
4300 let r = _mm_testc_pd(a, b);
4301 assert_eq!(r, 1);
4302 let a = _mm_set1_pd(1.);
4303 let b = _mm_set1_pd(-1.);
4304 let r = _mm_testc_pd(a, b);
4305 assert_eq!(r, 0);
4306 }
4307
4308 #[simd_test(enable = "avx")]
4309 unsafe fn test_mm_testnzc_pd() {
4310 let a = _mm_setr_pd(1., 2.);
4311 let b = _mm_setr_pd(5., 6.);
4312 let r = _mm_testnzc_pd(a, b);
4313 assert_eq!(r, 0);
4314 let a = _mm_setr_pd(1., -1.);
4315 let b = _mm_setr_pd(-1., -1.);
4316 let r = _mm_testnzc_pd(a, b);
4317 assert_eq!(r, 1);
4318 }
4319
4320 #[simd_test(enable = "avx")]
4321 unsafe fn test_mm256_testz_ps() {
4322 let a = _mm256_set1_ps(1.);
4323 let r = _mm256_testz_ps(a, a);
4324 assert_eq!(r, 1);
4325 let a = _mm256_set1_ps(-1.);
4326 let r = _mm256_testz_ps(a, a);
4327 assert_eq!(r, 0);
4328 }
4329
4330 #[simd_test(enable = "avx")]
4331 unsafe fn test_mm256_testc_ps() {
4332 let a = _mm256_set1_ps(1.);
4333 let r = _mm256_testc_ps(a, a);
4334 assert_eq!(r, 1);
4335 let b = _mm256_set1_ps(-1.);
4336 let r = _mm256_testc_ps(a, b);
4337 assert_eq!(r, 0);
4338 }
4339
4340 #[simd_test(enable = "avx")]
4341 unsafe fn test_mm256_testnzc_ps() {
4342 let a = _mm256_set1_ps(1.);
4343 let r = _mm256_testnzc_ps(a, a);
4344 assert_eq!(r, 0);
4345 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4346 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4347 let r = _mm256_testnzc_ps(a, b);
4348 assert_eq!(r, 1);
4349 }
4350
4351 #[simd_test(enable = "avx")]
4352 unsafe fn test_mm_testz_ps() {
4353 let a = _mm_set1_ps(1.);
4354 let r = _mm_testz_ps(a, a);
4355 assert_eq!(r, 1);
4356 let a = _mm_set1_ps(-1.);
4357 let r = _mm_testz_ps(a, a);
4358 assert_eq!(r, 0);
4359 }
4360
4361 #[simd_test(enable = "avx")]
4362 unsafe fn test_mm_testc_ps() {
4363 let a = _mm_set1_ps(1.);
4364 let r = _mm_testc_ps(a, a);
4365 assert_eq!(r, 1);
4366 let b = _mm_set1_ps(-1.);
4367 let r = _mm_testc_ps(a, b);
4368 assert_eq!(r, 0);
4369 }
4370
4371 #[simd_test(enable = "avx")]
4372 unsafe fn test_mm_testnzc_ps() {
4373 let a = _mm_set1_ps(1.);
4374 let r = _mm_testnzc_ps(a, a);
4375 assert_eq!(r, 0);
4376 let a = _mm_setr_ps(1., -1., -1., -1.);
4377 let b = _mm_setr_ps(-1., -1., 1., 1.);
4378 let r = _mm_testnzc_ps(a, b);
4379 assert_eq!(r, 1);
4380 }
4381
4382 #[simd_test(enable = "avx")]
4383 unsafe fn test_mm256_movemask_pd() {
4384 let a = _mm256_setr_pd(1., -2., 3., -4.);
4385 let r = _mm256_movemask_pd(a);
4386 assert_eq!(r, 0xA);
4387 }
4388
4389 #[simd_test(enable = "avx")]
4390 unsafe fn test_mm256_movemask_ps() {
4391 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4392 let r = _mm256_movemask_ps(a);
4393 assert_eq!(r, 0xAA);
4394 }
4395
4396 #[simd_test(enable = "avx")]
4397 unsafe fn test_mm256_setzero_pd() {
4398 let r = _mm256_setzero_pd();
4399 assert_eq_m256d(r, _mm256_set1_pd(0.));
4400 }
4401
4402 #[simd_test(enable = "avx")]
4403 unsafe fn test_mm256_setzero_ps() {
4404 let r = _mm256_setzero_ps();
4405 assert_eq_m256(r, _mm256_set1_ps(0.));
4406 }
4407
4408 #[simd_test(enable = "avx")]
4409 unsafe fn test_mm256_setzero_si256() {
4410 let r = _mm256_setzero_si256();
4411 assert_eq_m256i(r, _mm256_set1_epi8(0));
4412 }
4413
4414 #[simd_test(enable = "avx")]
4415 unsafe fn test_mm256_set_pd() {
4416 let r = _mm256_set_pd(1., 2., 3., 4.);
4417 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4418 }
4419
4420 #[simd_test(enable = "avx")]
4421 unsafe fn test_mm256_set_ps() {
4422 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4423 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4424 }
4425
4426 #[simd_test(enable = "avx")]
4427 unsafe fn test_mm256_set_epi8() {
4428 #[rustfmt::skip]
4429 let r = _mm256_set_epi8(
4430 1, 2, 3, 4, 5, 6, 7, 8,
4431 9, 10, 11, 12, 13, 14, 15, 16,
4432 17, 18, 19, 20, 21, 22, 23, 24,
4433 25, 26, 27, 28, 29, 30, 31, 32,
4434 );
4435 #[rustfmt::skip]
4436 let e = _mm256_setr_epi8(
4437 32, 31, 30, 29, 28, 27, 26, 25,
4438 24, 23, 22, 21, 20, 19, 18, 17,
4439 16, 15, 14, 13, 12, 11, 10, 9,
4440 8, 7, 6, 5, 4, 3, 2, 1
4441 );
4442 assert_eq_m256i(r, e);
4443 }
4444
4445 #[simd_test(enable = "avx")]
4446 unsafe fn test_mm256_set_epi16() {
4447 #[rustfmt::skip]
4448 let r = _mm256_set_epi16(
4449 1, 2, 3, 4, 5, 6, 7, 8,
4450 9, 10, 11, 12, 13, 14, 15, 16,
4451 );
4452 #[rustfmt::skip]
4453 let e = _mm256_setr_epi16(
4454 16, 15, 14, 13, 12, 11, 10, 9, 8,
4455 7, 6, 5, 4, 3, 2, 1,
4456 );
4457 assert_eq_m256i(r, e);
4458 }
4459
4460 #[simd_test(enable = "avx")]
4461 unsafe fn test_mm256_set_epi32() {
4462 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4463 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4464 }
4465
4466 #[simd_test(enable = "avx")]
4467 unsafe fn test_mm256_set_epi64x() {
4468 let r = _mm256_set_epi64x(1, 2, 3, 4);
4469 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4470 }
4471
4472 #[simd_test(enable = "avx")]
4473 unsafe fn test_mm256_setr_pd() {
4474 let r = _mm256_setr_pd(1., 2., 3., 4.);
4475 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4476 }
4477
4478 #[simd_test(enable = "avx")]
4479 unsafe fn test_mm256_setr_ps() {
4480 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4481 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4482 }
4483
4484 #[simd_test(enable = "avx")]
4485 unsafe fn test_mm256_setr_epi8() {
4486 #[rustfmt::skip]
4487 let r = _mm256_setr_epi8(
4488 1, 2, 3, 4, 5, 6, 7, 8,
4489 9, 10, 11, 12, 13, 14, 15, 16,
4490 17, 18, 19, 20, 21, 22, 23, 24,
4491 25, 26, 27, 28, 29, 30, 31, 32,
4492 );
4493 #[rustfmt::skip]
4494 let e = _mm256_setr_epi8(
4495 1, 2, 3, 4, 5, 6, 7, 8,
4496 9, 10, 11, 12, 13, 14, 15, 16,
4497 17, 18, 19, 20, 21, 22, 23, 24,
4498 25, 26, 27, 28, 29, 30, 31, 32
4499 );
4500
4501 assert_eq_m256i(r, e);
4502 }
4503
4504 #[simd_test(enable = "avx")]
4505 unsafe fn test_mm256_setr_epi16() {
4506 #[rustfmt::skip]
4507 let r = _mm256_setr_epi16(
4508 1, 2, 3, 4, 5, 6, 7, 8,
4509 9, 10, 11, 12, 13, 14, 15, 16,
4510 );
4511 #[rustfmt::skip]
4512 let e = _mm256_setr_epi16(
4513 1, 2, 3, 4, 5, 6, 7, 8,
4514 9, 10, 11, 12, 13, 14, 15, 16,
4515 );
4516 assert_eq_m256i(r, e);
4517 }
4518
4519 #[simd_test(enable = "avx")]
4520 unsafe fn test_mm256_setr_epi32() {
4521 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4522 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4523 }
4524
4525 #[simd_test(enable = "avx")]
4526 unsafe fn test_mm256_setr_epi64x() {
4527 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4528 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4529 }
4530
4531 #[simd_test(enable = "avx")]
4532 unsafe fn test_mm256_set1_pd() {
4533 let r = _mm256_set1_pd(1.);
4534 assert_eq_m256d(r, _mm256_set1_pd(1.));
4535 }
4536
4537 #[simd_test(enable = "avx")]
4538 unsafe fn test_mm256_set1_ps() {
4539 let r = _mm256_set1_ps(1.);
4540 assert_eq_m256(r, _mm256_set1_ps(1.));
4541 }
4542
4543 #[simd_test(enable = "avx")]
4544 unsafe fn test_mm256_set1_epi8() {
4545 let r = _mm256_set1_epi8(1);
4546 assert_eq_m256i(r, _mm256_set1_epi8(1));
4547 }
4548
4549 #[simd_test(enable = "avx")]
4550 unsafe fn test_mm256_set1_epi16() {
4551 let r = _mm256_set1_epi16(1);
4552 assert_eq_m256i(r, _mm256_set1_epi16(1));
4553 }
4554
4555 #[simd_test(enable = "avx")]
4556 unsafe fn test_mm256_set1_epi32() {
4557 let r = _mm256_set1_epi32(1);
4558 assert_eq_m256i(r, _mm256_set1_epi32(1));
4559 }
4560
4561 #[simd_test(enable = "avx")]
4562 unsafe fn test_mm256_set1_epi64x() {
4563 let r = _mm256_set1_epi64x(1);
4564 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4565 }
4566
4567 #[simd_test(enable = "avx")]
4568 unsafe fn test_mm256_castpd_ps() {
4569 let a = _mm256_setr_pd(1., 2., 3., 4.);
4570 let r = _mm256_castpd_ps(a);
4571 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4572 assert_eq_m256(r, e);
4573 }
4574
4575 #[simd_test(enable = "avx")]
4576 unsafe fn test_mm256_castps_pd() {
4577 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4578 let r = _mm256_castps_pd(a);
4579 let e = _mm256_setr_pd(1., 2., 3., 4.);
4580 assert_eq_m256d(r, e);
4581 }
4582
4583 #[simd_test(enable = "avx")]
4584 unsafe fn test_mm256_castps_si256() {
4585 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4586 let r = _mm256_castps_si256(a);
4587 #[rustfmt::skip]
4588 let e = _mm256_setr_epi8(
4589 0, 0, -128, 63, 0, 0, 0, 64,
4590 0, 0, 64, 64, 0, 0, -128, 64,
4591 0, 0, -96, 64, 0, 0, -64, 64,
4592 0, 0, -32, 64, 0, 0, 0, 65,
4593 );
4594 assert_eq_m256i(r, e);
4595 }
4596
4597 #[simd_test(enable = "avx")]
4598 unsafe fn test_mm256_castsi256_ps() {
4599 #[rustfmt::skip]
4600 let a = _mm256_setr_epi8(
4601 0, 0, -128, 63, 0, 0, 0, 64,
4602 0, 0, 64, 64, 0, 0, -128, 64,
4603 0, 0, -96, 64, 0, 0, -64, 64,
4604 0, 0, -32, 64, 0, 0, 0, 65,
4605 );
4606 let r = _mm256_castsi256_ps(a);
4607 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4608 assert_eq_m256(r, e);
4609 }
4610
4611 #[simd_test(enable = "avx")]
4612 unsafe fn test_mm256_castpd_si256() {
4613 let a = _mm256_setr_pd(1., 2., 3., 4.);
4614 let r = _mm256_castpd_si256(a);
4615 assert_eq_m256d(transmute(r), a);
4616 }
4617
4618 #[simd_test(enable = "avx")]
4619 unsafe fn test_mm256_castsi256_pd() {
4620 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4621 let r = _mm256_castsi256_pd(a);
4622 assert_eq_m256d(r, transmute(a));
4623 }
4624
4625 #[simd_test(enable = "avx")]
4626 unsafe fn test_mm256_castps256_ps128() {
4627 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4628 let r = _mm256_castps256_ps128(a);
4629 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
4630 }
4631
4632 #[simd_test(enable = "avx")]
4633 unsafe fn test_mm256_castpd256_pd128() {
4634 let a = _mm256_setr_pd(1., 2., 3., 4.);
4635 let r = _mm256_castpd256_pd128(a);
4636 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
4637 }
4638
4639 #[simd_test(enable = "avx")]
4640 unsafe fn test_mm256_castsi256_si128() {
4641 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4642 let r = _mm256_castsi256_si128(a);
4643 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
4644 }
4645
4646 #[simd_test(enable = "avx")]
4647 unsafe fn test_mm256_zextps128_ps256() {
4648 let a = _mm_setr_ps(1., 2., 3., 4.);
4649 let r = _mm256_zextps128_ps256(a);
4650 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
4651 assert_eq_m256(r, e);
4652 }
4653
4654 #[simd_test(enable = "avx")]
4655 unsafe fn test_mm256_zextsi128_si256() {
4656 let a = _mm_setr_epi64x(1, 2);
4657 let r = _mm256_zextsi128_si256(a);
4658 let e = _mm256_setr_epi64x(1, 2, 0, 0);
4659 assert_eq_m256i(r, e);
4660 }
4661
4662 #[simd_test(enable = "avx")]
4663 unsafe fn test_mm256_zextpd128_pd256() {
4664 let a = _mm_setr_pd(1., 2.);
4665 let r = _mm256_zextpd128_pd256(a);
4666 let e = _mm256_setr_pd(1., 2., 0., 0.);
4667 assert_eq_m256d(r, e);
4668 }
4669
4670 #[simd_test(enable = "avx")]
4671 unsafe fn test_mm256_set_m128() {
4672 let hi = _mm_setr_ps(5., 6., 7., 8.);
4673 let lo = _mm_setr_ps(1., 2., 3., 4.);
4674 let r = _mm256_set_m128(hi, lo);
4675 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4676 assert_eq_m256(r, e);
4677 }
4678
4679 #[simd_test(enable = "avx")]
4680 unsafe fn test_mm256_set_m128d() {
4681 let hi = _mm_setr_pd(3., 4.);
4682 let lo = _mm_setr_pd(1., 2.);
4683 let r = _mm256_set_m128d(hi, lo);
4684 let e = _mm256_setr_pd(1., 2., 3., 4.);
4685 assert_eq_m256d(r, e);
4686 }
4687
4688 #[simd_test(enable = "avx")]
4689 unsafe fn test_mm256_set_m128i() {
4690 #[rustfmt::skip]
4691 let hi = _mm_setr_epi8(
4692 17, 18, 19, 20,
4693 21, 22, 23, 24,
4694 25, 26, 27, 28,
4695 29, 30, 31, 32,
4696 );
4697 #[rustfmt::skip]
4698 let lo = _mm_setr_epi8(
4699 1, 2, 3, 4,
4700 5, 6, 7, 8,
4701 9, 10, 11, 12,
4702 13, 14, 15, 16,
4703 );
4704 let r = _mm256_set_m128i(hi, lo);
4705 #[rustfmt::skip]
4706 let e = _mm256_setr_epi8(
4707 1, 2, 3, 4, 5, 6, 7, 8,
4708 9, 10, 11, 12, 13, 14, 15, 16,
4709 17, 18, 19, 20, 21, 22, 23, 24,
4710 25, 26, 27, 28, 29, 30, 31, 32,
4711 );
4712 assert_eq_m256i(r, e);
4713 }
4714
4715 #[simd_test(enable = "avx")]
4716 unsafe fn test_mm256_setr_m128() {
4717 let lo = _mm_setr_ps(1., 2., 3., 4.);
4718 let hi = _mm_setr_ps(5., 6., 7., 8.);
4719 let r = _mm256_setr_m128(lo, hi);
4720 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4721 assert_eq_m256(r, e);
4722 }
4723
4724 #[simd_test(enable = "avx")]
4725 unsafe fn test_mm256_setr_m128d() {
4726 let lo = _mm_setr_pd(1., 2.);
4727 let hi = _mm_setr_pd(3., 4.);
4728 let r = _mm256_setr_m128d(lo, hi);
4729 let e = _mm256_setr_pd(1., 2., 3., 4.);
4730 assert_eq_m256d(r, e);
4731 }
4732
4733 #[simd_test(enable = "avx")]
4734 unsafe fn test_mm256_setr_m128i() {
4735 #[rustfmt::skip]
4736 let lo = _mm_setr_epi8(
4737 1, 2, 3, 4,
4738 5, 6, 7, 8,
4739 9, 10, 11, 12,
4740 13, 14, 15, 16,
4741 );
4742 #[rustfmt::skip]
4743 let hi = _mm_setr_epi8(
4744 17, 18, 19, 20, 21, 22, 23, 24,
4745 25, 26, 27, 28, 29, 30, 31, 32,
4746 );
4747 let r = _mm256_setr_m128i(lo, hi);
4748 #[rustfmt::skip]
4749 let e = _mm256_setr_epi8(
4750 1, 2, 3, 4, 5, 6, 7, 8,
4751 9, 10, 11, 12, 13, 14, 15, 16,
4752 17, 18, 19, 20, 21, 22, 23, 24,
4753 25, 26, 27, 28, 29, 30, 31, 32,
4754 );
4755 assert_eq_m256i(r, e);
4756 }
4757
4758 #[simd_test(enable = "avx")]
4759 unsafe fn test_mm256_loadu2_m128() {
4760 let hi = &[5., 6., 7., 8.];
4761 let hiaddr = hi.as_ptr();
4762 let lo = &[1., 2., 3., 4.];
4763 let loaddr = lo.as_ptr();
4764 let r = _mm256_loadu2_m128(hiaddr, loaddr);
4765 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4766 assert_eq_m256(r, e);
4767 }
4768
4769 #[simd_test(enable = "avx")]
4770 unsafe fn test_mm256_loadu2_m128d() {
4771 let hi = &[3., 4.];
4772 let hiaddr = hi.as_ptr();
4773 let lo = &[1., 2.];
4774 let loaddr = lo.as_ptr();
4775 let r = _mm256_loadu2_m128d(hiaddr, loaddr);
4776 let e = _mm256_setr_pd(1., 2., 3., 4.);
4777 assert_eq_m256d(r, e);
4778 }
4779
4780 #[simd_test(enable = "avx")]
4781 unsafe fn test_mm256_loadu2_m128i() {
4782 #[rustfmt::skip]
4783 let hi = _mm_setr_epi8(
4784 17, 18, 19, 20, 21, 22, 23, 24,
4785 25, 26, 27, 28, 29, 30, 31, 32,
4786 );
4787 #[rustfmt::skip]
4788 let lo = _mm_setr_epi8(
4789 1, 2, 3, 4, 5, 6, 7, 8,
4790 9, 10, 11, 12, 13, 14, 15, 16,
4791 );
4792 let r = _mm256_loadu2_m128i(&hi as *const _ as *const _, &lo as *const _ as *const _);
4793 #[rustfmt::skip]
4794 let e = _mm256_setr_epi8(
4795 1, 2, 3, 4, 5, 6, 7, 8,
4796 9, 10, 11, 12, 13, 14, 15, 16,
4797 17, 18, 19, 20, 21, 22, 23, 24,
4798 25, 26, 27, 28, 29, 30, 31, 32,
4799 );
4800 assert_eq_m256i(r, e);
4801 }
4802
4803 #[simd_test(enable = "avx")]
4804 unsafe fn test_mm256_storeu2_m128() {
4805 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4806 let mut hi = _mm_undefined_ps();
4807 let mut lo = _mm_undefined_ps();
4808 _mm256_storeu2_m128(
4809 &mut hi as *mut _ as *mut f32,
4810 &mut lo as *mut _ as *mut f32,
4811 a,
4812 );
4813 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
4814 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
4815 }
4816
4817 #[simd_test(enable = "avx")]
4818 unsafe fn test_mm256_storeu2_m128d() {
4819 let a = _mm256_setr_pd(1., 2., 3., 4.);
4820 let mut hi = _mm_undefined_pd();
4821 let mut lo = _mm_undefined_pd();
4822 _mm256_storeu2_m128d(
4823 &mut hi as *mut _ as *mut f64,
4824 &mut lo as *mut _ as *mut f64,
4825 a,
4826 );
4827 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
4828 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
4829 }
4830
4831 #[simd_test(enable = "avx")]
4832 unsafe fn test_mm256_storeu2_m128i() {
4833 #[rustfmt::skip]
4834 let a = _mm256_setr_epi8(
4835 1, 2, 3, 4, 5, 6, 7, 8,
4836 9, 10, 11, 12, 13, 14, 15, 16,
4837 17, 18, 19, 20, 21, 22, 23, 24,
4838 25, 26, 27, 28, 29, 30, 31, 32,
4839 );
4840 let mut hi = _mm_undefined_si128();
4841 let mut lo = _mm_undefined_si128();
4842 _mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a);
4843 #[rustfmt::skip]
4844 let e_hi = _mm_setr_epi8(
4845 17, 18, 19, 20, 21, 22, 23, 24,
4846 25, 26, 27, 28, 29, 30, 31, 32
4847 );
4848 #[rustfmt::skip]
4849 let e_lo = _mm_setr_epi8(
4850 1, 2, 3, 4, 5, 6, 7, 8,
4851 9, 10, 11, 12, 13, 14, 15, 16
4852 );
4853
4854 assert_eq_m128i(hi, e_hi);
4855 assert_eq_m128i(lo, e_lo);
4856 }
4857
4858 #[simd_test(enable = "avx")]
4859 unsafe fn test_mm256_cvtss_f32() {
4860 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4861 let r = _mm256_cvtss_f32(a);
4862 assert_eq!(r, 1.);
4863 }
4864}
4865