1//! Advanced Vector Extensions (AVX)
2//!
3//! The references are:
4//!
5//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
6//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
7//! Programmer's Manual, Volume 3: General-Purpose and System
8//! Instructions][amd64_ref].
9//!
10//! [Wikipedia][wiki] provides a quick overview of the instructions available.
11//!
12//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
13//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
14//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
15
16use crate::{
17 core_arch::{simd::*, x86::*},
18 intrinsics::simd::*,
19 mem, ptr,
20};
21
22#[cfg(test)]
23use stdarch_test::assert_instr;
24
25/// Adds packed double-precision (64-bit) floating-point elements
26/// in `a` and `b`.
27///
28/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_pd)
29#[inline]
30#[target_feature(enable = "avx")]
31#[cfg_attr(test, assert_instr(vaddpd))]
32#[stable(feature = "simd_x86", since = "1.27.0")]
33#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34pub const fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
35 unsafe { simd_add(x:a, y:b) }
36}
37
38/// Adds packed single-precision (32-bit) floating-point elements in `a` and
39/// `b`.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_add_ps)
42#[inline]
43#[target_feature(enable = "avx")]
44#[cfg_attr(test, assert_instr(vaddps))]
45#[stable(feature = "simd_x86", since = "1.27.0")]
46#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
47pub const fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
48 unsafe { simd_add(x:a, y:b) }
49}
50
51/// Computes the bitwise AND of a packed double-precision (64-bit)
52/// floating-point elements in `a` and `b`.
53///
54/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
55#[inline]
56#[target_feature(enable = "avx")]
57// See https://github.com/rust-lang/stdarch/issues/71
58#[cfg_attr(test, assert_instr(vandp))]
59#[stable(feature = "simd_x86", since = "1.27.0")]
60#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
61pub const fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
62 unsafe {
63 let a: u64x4 = transmute(src:a);
64 let b: u64x4 = transmute(src:b);
65 transmute(src:simd_and(x:a, y:b))
66 }
67}
68
69/// Computes the bitwise AND of packed single-precision (32-bit) floating-point
70/// elements in `a` and `b`.
71///
72/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_ps)
73#[inline]
74#[target_feature(enable = "avx")]
75#[cfg_attr(test, assert_instr(vandps))]
76#[stable(feature = "simd_x86", since = "1.27.0")]
77#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
78pub const fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
79 unsafe {
80 let a: u32x8 = transmute(src:a);
81 let b: u32x8 = transmute(src:b);
82 transmute(src:simd_and(x:a, y:b))
83 }
84}
85
86/// Computes the bitwise OR packed double-precision (64-bit) floating-point
87/// elements in `a` and `b`.
88///
89/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
90#[inline]
91#[target_feature(enable = "avx")]
92// See <https://github.com/rust-lang/stdarch/issues/71>.
93#[cfg_attr(test, assert_instr(vorp))]
94#[stable(feature = "simd_x86", since = "1.27.0")]
95#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
96pub const fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
97 unsafe {
98 let a: u64x4 = transmute(src:a);
99 let b: u64x4 = transmute(src:b);
100 transmute(src:simd_or(x:a, y:b))
101 }
102}
103
104/// Computes the bitwise OR packed single-precision (32-bit) floating-point
105/// elements in `a` and `b`.
106///
107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_ps)
108#[inline]
109#[target_feature(enable = "avx")]
110#[cfg_attr(test, assert_instr(vorps))]
111#[stable(feature = "simd_x86", since = "1.27.0")]
112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
113pub const fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
114 unsafe {
115 let a: u32x8 = transmute(src:a);
116 let b: u32x8 = transmute(src:b);
117 transmute(src:simd_or(x:a, y:b))
118 }
119}
120
121/// Shuffles double-precision (64-bit) floating-point elements within 128-bit
122/// lanes using the control in `imm8`.
123///
124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_pd)
125#[inline]
126#[target_feature(enable = "avx")]
127#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
128#[rustc_legacy_const_generics(2)]
129#[stable(feature = "simd_x86", since = "1.27.0")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
132 static_assert_uimm_bits!(MASK, 8);
133 unsafe {
134 simd_shuffle!(
135 a,
136 b,
137 [
138 MASK as u32 & 0b1,
139 ((MASK as u32 >> 1) & 0b1) + 4,
140 ((MASK as u32 >> 2) & 0b1) + 2,
141 ((MASK as u32 >> 3) & 0b1) + 6,
142 ],
143 )
144 }
145}
146
147/// Shuffles single-precision (32-bit) floating-point elements in `a` within
148/// 128-bit lanes using the control in `imm8`.
149///
150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_ps)
151#[inline]
152#[target_feature(enable = "avx")]
153#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
154#[rustc_legacy_const_generics(2)]
155#[stable(feature = "simd_x86", since = "1.27.0")]
156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
157pub const fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
158 static_assert_uimm_bits!(MASK, 8);
159 unsafe {
160 simd_shuffle!(
161 a,
162 b,
163 [
164 MASK as u32 & 0b11,
165 (MASK as u32 >> 2) & 0b11,
166 ((MASK as u32 >> 4) & 0b11) + 8,
167 ((MASK as u32 >> 6) & 0b11) + 8,
168 (MASK as u32 & 0b11) + 4,
169 ((MASK as u32 >> 2) & 0b11) + 4,
170 ((MASK as u32 >> 4) & 0b11) + 12,
171 ((MASK as u32 >> 6) & 0b11) + 12,
172 ],
173 )
174 }
175}
176
177/// Computes the bitwise NOT of packed double-precision (64-bit) floating-point
178/// elements in `a`, and then AND with `b`.
179///
180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
181#[inline]
182#[target_feature(enable = "avx")]
183#[cfg_attr(test, assert_instr(vandnp))]
184#[stable(feature = "simd_x86", since = "1.27.0")]
185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
186pub const fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
187 unsafe {
188 let a: u64x4 = transmute(src:a);
189 let b: u64x4 = transmute(src:b);
190 transmute(src:simd_and(x:simd_xor(u64x4::splat(!(0_u64)), a), y:b))
191 }
192}
193
194/// Computes the bitwise NOT of packed single-precision (32-bit) floating-point
195/// elements in `a`
196/// and then AND with `b`.
197///
198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_ps)
199#[inline]
200#[target_feature(enable = "avx")]
201#[cfg_attr(test, assert_instr(vandnps))]
202#[stable(feature = "simd_x86", since = "1.27.0")]
203#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
204pub const fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
205 unsafe {
206 let a: u32x8 = transmute(src:a);
207 let b: u32x8 = transmute(src:b);
208 transmute(src:simd_and(x:simd_xor(u32x8::splat(!(0_u32)), a), y:b))
209 }
210}
211
212/// Compares packed double-precision (64-bit) floating-point elements
213/// in `a` and `b`, and returns packed maximum values
214///
215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_pd)
216#[inline]
217#[target_feature(enable = "avx")]
218#[cfg_attr(test, assert_instr(vmaxpd))]
219#[stable(feature = "simd_x86", since = "1.27.0")]
220pub fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d {
221 unsafe { vmaxpd(a, b) }
222}
223
224/// Compares packed single-precision (32-bit) floating-point elements in `a`
225/// and `b`, and returns packed maximum values
226///
227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_ps)
228#[inline]
229#[target_feature(enable = "avx")]
230#[cfg_attr(test, assert_instr(vmaxps))]
231#[stable(feature = "simd_x86", since = "1.27.0")]
232pub fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 {
233 unsafe { vmaxps(a, b) }
234}
235
236/// Compares packed double-precision (64-bit) floating-point elements
237/// in `a` and `b`, and returns packed minimum values
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_pd)
240#[inline]
241#[target_feature(enable = "avx")]
242#[cfg_attr(test, assert_instr(vminpd))]
243#[stable(feature = "simd_x86", since = "1.27.0")]
244pub fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d {
245 unsafe { vminpd(a, b) }
246}
247
248/// Compares packed single-precision (32-bit) floating-point elements in `a`
249/// and `b`, and returns packed minimum values
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_ps)
252#[inline]
253#[target_feature(enable = "avx")]
254#[cfg_attr(test, assert_instr(vminps))]
255#[stable(feature = "simd_x86", since = "1.27.0")]
256pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
257 unsafe { vminps(a, b) }
258}
259
260/// Multiplies packed double-precision (64-bit) floating-point elements
261/// in `a` and `b`.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_pd)
264#[inline]
265#[target_feature(enable = "avx")]
266#[cfg_attr(test, assert_instr(vmulpd))]
267#[stable(feature = "simd_x86", since = "1.27.0")]
268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269pub const fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
270 unsafe { simd_mul(x:a, y:b) }
271}
272
273/// Multiplies packed single-precision (32-bit) floating-point elements in `a` and
274/// `b`.
275///
276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mul_ps)
277#[inline]
278#[target_feature(enable = "avx")]
279#[cfg_attr(test, assert_instr(vmulps))]
280#[stable(feature = "simd_x86", since = "1.27.0")]
281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
282pub const fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
283 unsafe { simd_mul(x:a, y:b) }
284}
285
286/// Alternatively adds and subtracts packed double-precision (64-bit)
287/// floating-point elements in `a` to/from packed elements in `b`.
288///
289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_pd)
290#[inline]
291#[target_feature(enable = "avx")]
292#[cfg_attr(test, assert_instr(vaddsubpd))]
293#[stable(feature = "simd_x86", since = "1.27.0")]
294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
295pub const fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
296 unsafe {
297 let a: Simd = a.as_f64x4();
298 let b: Simd = b.as_f64x4();
299 let add: Simd = simd_add(x:a, y:b);
300 let sub: Simd = simd_sub(lhs:a, rhs:b);
301 simd_shuffle!(add, sub, [4, 1, 6, 3])
302 }
303}
304
305/// Alternatively adds and subtracts packed single-precision (32-bit)
306/// floating-point elements in `a` to/from packed elements in `b`.
307///
308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_addsub_ps)
309#[inline]
310#[target_feature(enable = "avx")]
311#[cfg_attr(test, assert_instr(vaddsubps))]
312#[stable(feature = "simd_x86", since = "1.27.0")]
313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
314pub const fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
315 unsafe {
316 let a: Simd = a.as_f32x8();
317 let b: Simd = b.as_f32x8();
318 let add: Simd = simd_add(x:a, y:b);
319 let sub: Simd = simd_sub(lhs:a, rhs:b);
320 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
321 }
322}
323
324/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
325/// from packed elements in `a`.
326///
327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_pd)
328#[inline]
329#[target_feature(enable = "avx")]
330#[cfg_attr(test, assert_instr(vsubpd))]
331#[stable(feature = "simd_x86", since = "1.27.0")]
332#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
333pub const fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
334 unsafe { simd_sub(lhs:a, rhs:b) }
335}
336
337/// Subtracts packed single-precision (32-bit) floating-point elements in `b`
338/// from packed elements in `a`.
339///
340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_ps)
341#[inline]
342#[target_feature(enable = "avx")]
343#[cfg_attr(test, assert_instr(vsubps))]
344#[stable(feature = "simd_x86", since = "1.27.0")]
345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
346pub const fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
347 unsafe { simd_sub(lhs:a, rhs:b) }
348}
349
350/// Computes the division of each of the 8 packed 32-bit floating-point elements
351/// in `a` by the corresponding packed elements in `b`.
352///
353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_ps)
354#[inline]
355#[target_feature(enable = "avx")]
356#[cfg_attr(test, assert_instr(vdivps))]
357#[stable(feature = "simd_x86", since = "1.27.0")]
358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
359pub const fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
360 unsafe { simd_div(lhs:a, rhs:b) }
361}
362
363/// Computes the division of each of the 4 packed 64-bit floating-point elements
364/// in `a` by the corresponding packed elements in `b`.
365///
366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_div_pd)
367#[inline]
368#[target_feature(enable = "avx")]
369#[cfg_attr(test, assert_instr(vdivpd))]
370#[stable(feature = "simd_x86", since = "1.27.0")]
371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
372pub const fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
373 unsafe { simd_div(lhs:a, rhs:b) }
374}
375
376/// Rounds packed double-precision (64-bit) floating point elements in `a`
377/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
378///
379/// - `0x00`: Round to the nearest whole number.
380/// - `0x01`: Round down, toward negative infinity.
381/// - `0x02`: Round up, toward positive infinity.
382/// - `0x03`: Truncate the values.
383///
384/// For a complete list of options, check [the LLVM docs][llvm_docs].
385///
386/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
387///
388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_pd)
389#[inline]
390#[target_feature(enable = "avx")]
391#[cfg_attr(test, assert_instr(vroundpd, ROUNDING = 0x3))]
392#[rustc_legacy_const_generics(1)]
393#[stable(feature = "simd_x86", since = "1.27.0")]
394pub fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
395 static_assert_uimm_bits!(ROUNDING, 4);
396 unsafe { roundpd256(a, ROUNDING) }
397}
398
399/// Rounds packed double-precision (64-bit) floating point elements in `a`
400/// toward positive infinity.
401///
402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_pd)
403#[inline]
404#[target_feature(enable = "avx")]
405#[cfg_attr(test, assert_instr(vroundpd))]
406#[stable(feature = "simd_x86", since = "1.27.0")]
407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
408pub const fn _mm256_ceil_pd(a: __m256d) -> __m256d {
409 unsafe { simd_ceil(a) }
410}
411
412/// Rounds packed double-precision (64-bit) floating point elements in `a`
413/// toward negative infinity.
414///
415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_pd)
416#[inline]
417#[target_feature(enable = "avx")]
418#[cfg_attr(test, assert_instr(vroundpd))]
419#[stable(feature = "simd_x86", since = "1.27.0")]
420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
421pub const fn _mm256_floor_pd(a: __m256d) -> __m256d {
422 unsafe { simd_floor(a) }
423}
424
425/// Rounds packed single-precision (32-bit) floating point elements in `a`
426/// according to the flag `ROUNDING`. The value of `ROUNDING` may be as follows:
427///
428/// - `0x00`: Round to the nearest whole number.
429/// - `0x01`: Round down, toward negative infinity.
430/// - `0x02`: Round up, toward positive infinity.
431/// - `0x03`: Truncate the values.
432///
433/// For a complete list of options, check [the LLVM docs][llvm_docs].
434///
435/// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
436///
437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_round_ps)
438#[inline]
439#[target_feature(enable = "avx")]
440#[cfg_attr(test, assert_instr(vroundps, ROUNDING = 0x00))]
441#[rustc_legacy_const_generics(1)]
442#[stable(feature = "simd_x86", since = "1.27.0")]
443pub fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
444 static_assert_uimm_bits!(ROUNDING, 4);
445 unsafe { roundps256(a, ROUNDING) }
446}
447
448/// Rounds packed single-precision (32-bit) floating point elements in `a`
449/// toward positive infinity.
450///
451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ceil_ps)
452#[inline]
453#[target_feature(enable = "avx")]
454#[cfg_attr(test, assert_instr(vroundps))]
455#[stable(feature = "simd_x86", since = "1.27.0")]
456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
457pub const fn _mm256_ceil_ps(a: __m256) -> __m256 {
458 unsafe { simd_ceil(a) }
459}
460
461/// Rounds packed single-precision (32-bit) floating point elements in `a`
462/// toward negative infinity.
463///
464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_floor_ps)
465#[inline]
466#[target_feature(enable = "avx")]
467#[cfg_attr(test, assert_instr(vroundps))]
468#[stable(feature = "simd_x86", since = "1.27.0")]
469#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
470pub const fn _mm256_floor_ps(a: __m256) -> __m256 {
471 unsafe { simd_floor(a) }
472}
473
474/// Returns the square root of packed single-precision (32-bit) floating point
475/// elements in `a`.
476///
477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_ps)
478#[inline]
479#[target_feature(enable = "avx")]
480#[cfg_attr(test, assert_instr(vsqrtps))]
481#[stable(feature = "simd_x86", since = "1.27.0")]
482pub fn _mm256_sqrt_ps(a: __m256) -> __m256 {
483 unsafe { simd_fsqrt(a) }
484}
485
486/// Returns the square root of packed double-precision (64-bit) floating point
487/// elements in `a`.
488///
489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sqrt_pd)
490#[inline]
491#[target_feature(enable = "avx")]
492#[cfg_attr(test, assert_instr(vsqrtpd))]
493#[stable(feature = "simd_x86", since = "1.27.0")]
494pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
495 unsafe { simd_fsqrt(a) }
496}
497
498/// Blends packed double-precision (64-bit) floating-point elements from
499/// `a` and `b` using control mask `imm8`.
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_pd)
502#[inline]
503#[target_feature(enable = "avx")]
504// Note: LLVM7 prefers single-precision blend instructions when
505// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
506// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
507#[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
508#[rustc_legacy_const_generics(2)]
509#[stable(feature = "simd_x86", since = "1.27.0")]
510#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
511pub const fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
512 static_assert_uimm_bits!(IMM4, 4);
513 unsafe {
514 simd_shuffle!(
515 a,
516 b,
517 [
518 ((IMM4 as u32 >> 0) & 1) * 4 + 0,
519 ((IMM4 as u32 >> 1) & 1) * 4 + 1,
520 ((IMM4 as u32 >> 2) & 1) * 4 + 2,
521 ((IMM4 as u32 >> 3) & 1) * 4 + 3,
522 ],
523 )
524 }
525}
526
527/// Blends packed single-precision (32-bit) floating-point elements from
528/// `a` and `b` using control mask `imm8`.
529///
530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blend_ps)
531#[inline]
532#[target_feature(enable = "avx")]
533#[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
534#[rustc_legacy_const_generics(2)]
535#[stable(feature = "simd_x86", since = "1.27.0")]
536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
537pub const fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
538 static_assert_uimm_bits!(IMM8, 8);
539 unsafe {
540 simd_shuffle!(
541 a,
542 b,
543 [
544 ((IMM8 as u32 >> 0) & 1) * 8 + 0,
545 ((IMM8 as u32 >> 1) & 1) * 8 + 1,
546 ((IMM8 as u32 >> 2) & 1) * 8 + 2,
547 ((IMM8 as u32 >> 3) & 1) * 8 + 3,
548 ((IMM8 as u32 >> 4) & 1) * 8 + 4,
549 ((IMM8 as u32 >> 5) & 1) * 8 + 5,
550 ((IMM8 as u32 >> 6) & 1) * 8 + 6,
551 ((IMM8 as u32 >> 7) & 1) * 8 + 7,
552 ],
553 )
554 }
555}
556
557/// Blends packed double-precision (64-bit) floating-point elements from
558/// `a` and `b` using `c` as a mask.
559///
560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_pd)
561#[inline]
562#[target_feature(enable = "avx")]
563#[cfg_attr(test, assert_instr(vblendvpd))]
564#[stable(feature = "simd_x86", since = "1.27.0")]
565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
566pub const fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
567 unsafe {
568 let mask: i64x4 = simd_lt(x:transmute::<_, i64x4>(c), y:i64x4::ZERO);
569 transmute(src:simd_select(mask, if_true:b.as_f64x4(), if_false:a.as_f64x4()))
570 }
571}
572
573/// Blends packed single-precision (32-bit) floating-point elements from
574/// `a` and `b` using `c` as a mask.
575///
576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_blendv_ps)
577#[inline]
578#[target_feature(enable = "avx")]
579#[cfg_attr(test, assert_instr(vblendvps))]
580#[stable(feature = "simd_x86", since = "1.27.0")]
581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
582pub const fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
583 unsafe {
584 let mask: i32x8 = simd_lt(x:transmute::<_, i32x8>(c), y:i32x8::ZERO);
585 transmute(src:simd_select(mask, if_true:b.as_f32x8(), if_false:a.as_f32x8()))
586 }
587}
588
589/// Conditionally multiplies the packed single-precision (32-bit) floating-point
590/// elements in `a` and `b` using the high 4 bits in `imm8`,
591/// sum the four products, and conditionally return the sum
592/// using the low 4 bits of `imm8`.
593///
594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_dp_ps)
595#[inline]
596#[target_feature(enable = "avx")]
597#[cfg_attr(test, assert_instr(vdpps, IMM8 = 0x0))]
598#[rustc_legacy_const_generics(2)]
599#[stable(feature = "simd_x86", since = "1.27.0")]
600pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
601 static_assert_uimm_bits!(IMM8, 8);
602 unsafe { vdpps(a, b, IMM8 as i8) }
603}
604
605/// Horizontal addition of adjacent pairs in the two packed vectors
606/// of 4 64-bit floating points `a` and `b`.
607/// In the result, sums of elements from `a` are returned in even locations,
608/// while sums of elements from `b` are returned in odd locations.
609///
610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_pd)
611#[inline]
612#[target_feature(enable = "avx")]
613#[cfg_attr(test, assert_instr(vhaddpd))]
614#[stable(feature = "simd_x86", since = "1.27.0")]
615#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
616pub const fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
617 unsafe {
618 let even: __m256d = simd_shuffle!(a, b, [0, 4, 2, 6]);
619 let odd: __m256d = simd_shuffle!(a, b, [1, 5, 3, 7]);
620 simd_add(x:even, y:odd)
621 }
622}
623
624/// Horizontal addition of adjacent pairs in the two packed vectors
625/// of 8 32-bit floating points `a` and `b`.
626/// In the result, sums of elements from `a` are returned in locations of
627/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
628/// 2, 3, 6, 7.
629///
630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hadd_ps)
631#[inline]
632#[target_feature(enable = "avx")]
633#[cfg_attr(test, assert_instr(vhaddps))]
634#[stable(feature = "simd_x86", since = "1.27.0")]
635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
636pub const fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
637 unsafe {
638 let even: __m256 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
639 let odd: __m256 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
640 simd_add(x:even, y:odd)
641 }
642}
643
644/// Horizontal subtraction of adjacent pairs in the two packed vectors
645/// of 4 64-bit floating points `a` and `b`.
646/// In the result, sums of elements from `a` are returned in even locations,
647/// while sums of elements from `b` are returned in odd locations.
648///
649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_pd)
650#[inline]
651#[target_feature(enable = "avx")]
652#[cfg_attr(test, assert_instr(vhsubpd))]
653#[stable(feature = "simd_x86", since = "1.27.0")]
654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
655pub const fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
656 unsafe {
657 let even: __m256d = simd_shuffle!(a, b, [0, 4, 2, 6]);
658 let odd: __m256d = simd_shuffle!(a, b, [1, 5, 3, 7]);
659 simd_sub(lhs:even, rhs:odd)
660 }
661}
662
663/// Horizontal subtraction of adjacent pairs in the two packed vectors
664/// of 8 32-bit floating points `a` and `b`.
665/// In the result, sums of elements from `a` are returned in locations of
666/// indices 0, 1, 4, 5; while sums of elements from `b` are locations
667/// 2, 3, 6, 7.
668///
669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_hsub_ps)
670#[inline]
671#[target_feature(enable = "avx")]
672#[cfg_attr(test, assert_instr(vhsubps))]
673#[stable(feature = "simd_x86", since = "1.27.0")]
674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
675pub const fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
676 unsafe {
677 let even: __m256 = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
678 let odd: __m256 = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
679 simd_sub(lhs:even, rhs:odd)
680 }
681}
682
683/// Computes the bitwise XOR of packed double-precision (64-bit) floating-point
684/// elements in `a` and `b`.
685///
686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
687#[inline]
688#[target_feature(enable = "avx")]
689#[cfg_attr(test, assert_instr(vxorp))]
690#[stable(feature = "simd_x86", since = "1.27.0")]
691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
692pub const fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
693 unsafe {
694 let a: u64x4 = transmute(src:a);
695 let b: u64x4 = transmute(src:b);
696 transmute(src:simd_xor(x:a, y:b))
697 }
698}
699
700/// Computes the bitwise XOR of packed single-precision (32-bit) floating-point
701/// elements in `a` and `b`.
702///
703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_ps)
704#[inline]
705#[target_feature(enable = "avx")]
706#[cfg_attr(test, assert_instr(vxorps))]
707#[stable(feature = "simd_x86", since = "1.27.0")]
708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
709pub const fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
710 unsafe {
711 let a: u32x8 = transmute(src:a);
712 let b: u32x8 = transmute(src:b);
713 transmute(src:simd_xor(x:a, y:b))
714 }
715}
716
717/// Equal (ordered, non-signaling)
718#[stable(feature = "simd_x86", since = "1.27.0")]
719pub const _CMP_EQ_OQ: i32 = 0x00;
720/// Less-than (ordered, signaling)
721#[stable(feature = "simd_x86", since = "1.27.0")]
722pub const _CMP_LT_OS: i32 = 0x01;
723/// Less-than-or-equal (ordered, signaling)
724#[stable(feature = "simd_x86", since = "1.27.0")]
725pub const _CMP_LE_OS: i32 = 0x02;
726/// Unordered (non-signaling)
727#[stable(feature = "simd_x86", since = "1.27.0")]
728pub const _CMP_UNORD_Q: i32 = 0x03;
729/// Not-equal (unordered, non-signaling)
730#[stable(feature = "simd_x86", since = "1.27.0")]
731pub const _CMP_NEQ_UQ: i32 = 0x04;
732/// Not-less-than (unordered, signaling)
733#[stable(feature = "simd_x86", since = "1.27.0")]
734pub const _CMP_NLT_US: i32 = 0x05;
735/// Not-less-than-or-equal (unordered, signaling)
736#[stable(feature = "simd_x86", since = "1.27.0")]
737pub const _CMP_NLE_US: i32 = 0x06;
738/// Ordered (non-signaling)
739#[stable(feature = "simd_x86", since = "1.27.0")]
740pub const _CMP_ORD_Q: i32 = 0x07;
741/// Equal (unordered, non-signaling)
742#[stable(feature = "simd_x86", since = "1.27.0")]
743pub const _CMP_EQ_UQ: i32 = 0x08;
744/// Not-greater-than-or-equal (unordered, signaling)
745#[stable(feature = "simd_x86", since = "1.27.0")]
746pub const _CMP_NGE_US: i32 = 0x09;
747/// Not-greater-than (unordered, signaling)
748#[stable(feature = "simd_x86", since = "1.27.0")]
749pub const _CMP_NGT_US: i32 = 0x0a;
750/// False (ordered, non-signaling)
751#[stable(feature = "simd_x86", since = "1.27.0")]
752pub const _CMP_FALSE_OQ: i32 = 0x0b;
753/// Not-equal (ordered, non-signaling)
754#[stable(feature = "simd_x86", since = "1.27.0")]
755pub const _CMP_NEQ_OQ: i32 = 0x0c;
756/// Greater-than-or-equal (ordered, signaling)
757#[stable(feature = "simd_x86", since = "1.27.0")]
758pub const _CMP_GE_OS: i32 = 0x0d;
759/// Greater-than (ordered, signaling)
760#[stable(feature = "simd_x86", since = "1.27.0")]
761pub const _CMP_GT_OS: i32 = 0x0e;
762/// True (unordered, non-signaling)
763#[stable(feature = "simd_x86", since = "1.27.0")]
764pub const _CMP_TRUE_UQ: i32 = 0x0f;
765/// Equal (ordered, signaling)
766#[stable(feature = "simd_x86", since = "1.27.0")]
767pub const _CMP_EQ_OS: i32 = 0x10;
768/// Less-than (ordered, non-signaling)
769#[stable(feature = "simd_x86", since = "1.27.0")]
770pub const _CMP_LT_OQ: i32 = 0x11;
771/// Less-than-or-equal (ordered, non-signaling)
772#[stable(feature = "simd_x86", since = "1.27.0")]
773pub const _CMP_LE_OQ: i32 = 0x12;
774/// Unordered (signaling)
775#[stable(feature = "simd_x86", since = "1.27.0")]
776pub const _CMP_UNORD_S: i32 = 0x13;
777/// Not-equal (unordered, signaling)
778#[stable(feature = "simd_x86", since = "1.27.0")]
779pub const _CMP_NEQ_US: i32 = 0x14;
780/// Not-less-than (unordered, non-signaling)
781#[stable(feature = "simd_x86", since = "1.27.0")]
782pub const _CMP_NLT_UQ: i32 = 0x15;
783/// Not-less-than-or-equal (unordered, non-signaling)
784#[stable(feature = "simd_x86", since = "1.27.0")]
785pub const _CMP_NLE_UQ: i32 = 0x16;
786/// Ordered (signaling)
787#[stable(feature = "simd_x86", since = "1.27.0")]
788pub const _CMP_ORD_S: i32 = 0x17;
789/// Equal (unordered, signaling)
790#[stable(feature = "simd_x86", since = "1.27.0")]
791pub const _CMP_EQ_US: i32 = 0x18;
792/// Not-greater-than-or-equal (unordered, non-signaling)
793#[stable(feature = "simd_x86", since = "1.27.0")]
794pub const _CMP_NGE_UQ: i32 = 0x19;
795/// Not-greater-than (unordered, non-signaling)
796#[stable(feature = "simd_x86", since = "1.27.0")]
797pub const _CMP_NGT_UQ: i32 = 0x1a;
798/// False (ordered, signaling)
799#[stable(feature = "simd_x86", since = "1.27.0")]
800pub const _CMP_FALSE_OS: i32 = 0x1b;
801/// Not-equal (ordered, signaling)
802#[stable(feature = "simd_x86", since = "1.27.0")]
803pub const _CMP_NEQ_OS: i32 = 0x1c;
804/// Greater-than-or-equal (ordered, non-signaling)
805#[stable(feature = "simd_x86", since = "1.27.0")]
806pub const _CMP_GE_OQ: i32 = 0x1d;
807/// Greater-than (ordered, non-signaling)
808#[stable(feature = "simd_x86", since = "1.27.0")]
809pub const _CMP_GT_OQ: i32 = 0x1e;
810/// True (unordered, signaling)
811#[stable(feature = "simd_x86", since = "1.27.0")]
812pub const _CMP_TRUE_US: i32 = 0x1f;
813
814/// Compares packed double-precision (64-bit) floating-point
815/// elements in `a` and `b` based on the comparison operand
816/// specified by `IMM5`.
817///
818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
819#[inline]
820#[target_feature(enable = "avx")]
821#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
822#[rustc_legacy_const_generics(2)]
823#[stable(feature = "simd_x86", since = "1.27.0")]
824pub fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
825 static_assert_uimm_bits!(IMM5, 5);
826 unsafe { vcmppd(a, b, imm8:const { IMM5 as i8 }) }
827}
828
829/// Compares packed double-precision (64-bit) floating-point
830/// elements in `a` and `b` based on the comparison operand
831/// specified by `IMM5`.
832///
833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd)
834#[inline]
835#[target_feature(enable = "avx")]
836#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
837#[rustc_legacy_const_generics(2)]
838#[stable(feature = "simd_x86", since = "1.27.0")]
839pub fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d {
840 static_assert_uimm_bits!(IMM5, 5);
841 unsafe { vcmppd256(a, b, IMM5 as u8) }
842}
843
844/// Compares packed single-precision (32-bit) floating-point
845/// elements in `a` and `b` based on the comparison operand
846/// specified by `IMM5`.
847///
848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
849#[inline]
850#[target_feature(enable = "avx")]
851#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
852#[rustc_legacy_const_generics(2)]
853#[stable(feature = "simd_x86", since = "1.27.0")]
854pub fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
855 static_assert_uimm_bits!(IMM5, 5);
856 unsafe { vcmpps(a, b, imm8:const { IMM5 as i8 }) }
857}
858
859/// Compares packed single-precision (32-bit) floating-point
860/// elements in `a` and `b` based on the comparison operand
861/// specified by `IMM5`.
862///
863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps)
864#[inline]
865#[target_feature(enable = "avx")]
866#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
867#[rustc_legacy_const_generics(2)]
868#[stable(feature = "simd_x86", since = "1.27.0")]
869pub fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
870 static_assert_uimm_bits!(IMM5, 5);
871 unsafe { vcmpps256(a, b, imm8:const { IMM5 as u8 }) }
872}
873
874/// Compares the lower double-precision (64-bit) floating-point element in
875/// `a` and `b` based on the comparison operand specified by `IMM5`,
876/// store the result in the lower element of returned vector,
877/// and copies the upper element from `a` to the upper element of returned
878/// vector.
879///
880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
881#[inline]
882#[target_feature(enable = "avx")]
883#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd
884#[rustc_legacy_const_generics(2)]
885#[stable(feature = "simd_x86", since = "1.27.0")]
886pub fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
887 static_assert_uimm_bits!(IMM5, 5);
888 unsafe { vcmpsd(a, b, IMM5 as i8) }
889}
890
891/// Compares the lower single-precision (32-bit) floating-point element in
892/// `a` and `b` based on the comparison operand specified by `IMM5`,
893/// store the result in the lower element of returned vector,
894/// and copies the upper 3 packed elements from `a` to the upper elements of
895/// returned vector.
896///
897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
898#[inline]
899#[target_feature(enable = "avx")]
900#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss
901#[rustc_legacy_const_generics(2)]
902#[stable(feature = "simd_x86", since = "1.27.0")]
903pub fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
904 static_assert_uimm_bits!(IMM5, 5);
905 unsafe { vcmpss(a, b, IMM5 as i8) }
906}
907
908/// Converts packed 32-bit integers in `a` to packed double-precision (64-bit)
909/// floating-point elements.
910///
911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_pd)
912#[inline]
913#[target_feature(enable = "avx")]
914#[cfg_attr(test, assert_instr(vcvtdq2pd))]
915#[stable(feature = "simd_x86", since = "1.27.0")]
916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
917pub const fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
918 unsafe { simd_cast(a.as_i32x4()) }
919}
920
921/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
922/// floating-point elements.
923///
924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_ps)
925#[inline]
926#[target_feature(enable = "avx")]
927#[cfg_attr(test, assert_instr(vcvtdq2ps))]
928#[stable(feature = "simd_x86", since = "1.27.0")]
929#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
930pub const fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
931 unsafe { simd_cast(a.as_i32x8()) }
932}
933
934/// Converts packed double-precision (64-bit) floating-point elements in `a`
935/// to packed single-precision (32-bit) floating-point elements.
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_ps)
938#[inline]
939#[target_feature(enable = "avx")]
940#[cfg_attr(test, assert_instr(vcvtpd2ps))]
941#[stable(feature = "simd_x86", since = "1.27.0")]
942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
943pub const fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
944 unsafe { simd_cast(a) }
945}
946
947/// Converts packed single-precision (32-bit) floating-point elements in `a`
948/// to packed 32-bit integers.
949///
950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi32)
951#[inline]
952#[target_feature(enable = "avx")]
953#[cfg_attr(test, assert_instr(vcvtps2dq))]
954#[stable(feature = "simd_x86", since = "1.27.0")]
955pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
956 unsafe { transmute(src:vcvtps2dq(a)) }
957}
958
959/// Converts packed single-precision (32-bit) floating-point elements in `a`
960/// to packed double-precision (64-bit) floating-point elements.
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_pd)
963#[inline]
964#[target_feature(enable = "avx")]
965#[cfg_attr(test, assert_instr(vcvtps2pd))]
966#[stable(feature = "simd_x86", since = "1.27.0")]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm256_cvtps_pd(a: __m128) -> __m256d {
969 unsafe { simd_cast(a) }
970}
971
972/// Returns the first element of the input vector of `[4 x double]`.
973///
974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsd_f64)
975#[inline]
976#[target_feature(enable = "avx")]
977//#[cfg_attr(test, assert_instr(movsd))] FIXME
978#[stable(feature = "simd_x86", since = "1.27.0")]
979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
980pub const fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
981 unsafe { simd_extract!(a, 0) }
982}
983
984/// Converts packed double-precision (64-bit) floating-point elements in `a`
985/// to packed 32-bit integers with truncation.
986///
987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi32)
988#[inline]
989#[target_feature(enable = "avx")]
990#[cfg_attr(test, assert_instr(vcvttpd2dq))]
991#[stable(feature = "simd_x86", since = "1.27.0")]
992pub fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i {
993 unsafe { transmute(src:vcvttpd2dq(a)) }
994}
995
996/// Converts packed double-precision (64-bit) floating-point elements in `a`
997/// to packed 32-bit integers.
998///
999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi32)
1000#[inline]
1001#[target_feature(enable = "avx")]
1002#[cfg_attr(test, assert_instr(vcvtpd2dq))]
1003#[stable(feature = "simd_x86", since = "1.27.0")]
1004pub fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i {
1005 unsafe { transmute(src:vcvtpd2dq(a)) }
1006}
1007
1008/// Converts packed single-precision (32-bit) floating-point elements in `a`
1009/// to packed 32-bit integers with truncation.
1010///
1011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi32)
1012#[inline]
1013#[target_feature(enable = "avx")]
1014#[cfg_attr(test, assert_instr(vcvttps2dq))]
1015#[stable(feature = "simd_x86", since = "1.27.0")]
1016pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
1017 unsafe { transmute(src:vcvttps2dq(a)) }
1018}
1019
1020/// Extracts 128 bits (composed of 4 packed single-precision (32-bit)
1021/// floating-point elements) from `a`, selected with `imm8`.
1022///
1023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_ps)
1024#[inline]
1025#[target_feature(enable = "avx")]
1026#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1027#[rustc_legacy_const_generics(1)]
1028#[stable(feature = "simd_x86", since = "1.27.0")]
1029#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1030pub const fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
1031 static_assert_uimm_bits!(IMM1, 1);
1032 unsafe {
1033 simd_shuffle!(
1034 a,
1035 _mm256_undefined_ps(),
1036 [[0, 1, 2, 3], [4, 5, 6, 7]][IMM1 as usize],
1037 )
1038 }
1039}
1040
1041/// Extracts 128 bits (composed of 2 packed double-precision (64-bit)
1042/// floating-point elements) from `a`, selected with `imm8`.
1043///
1044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_pd)
1045#[inline]
1046#[target_feature(enable = "avx")]
1047#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1048#[rustc_legacy_const_generics(1)]
1049#[stable(feature = "simd_x86", since = "1.27.0")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
1052 static_assert_uimm_bits!(IMM1, 1);
1053 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) }
1054}
1055
1056/// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`.
1057///
1058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf128_si256)
1059#[inline]
1060#[target_feature(enable = "avx")]
1061#[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
1062#[rustc_legacy_const_generics(1)]
1063#[stable(feature = "simd_x86", since = "1.27.0")]
1064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1065pub const fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
1066 static_assert_uimm_bits!(IMM1, 1);
1067 unsafe {
1068 let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
1069 transmute(src:dst)
1070 }
1071}
1072
1073/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
1074///
1075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
1076#[inline]
1077#[target_feature(enable = "avx")]
1078// This intrinsic has no corresponding instruction.
1079#[rustc_legacy_const_generics(1)]
1080#[stable(feature = "simd_x86", since = "1.27.0")]
1081#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1082pub const fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
1083 static_assert_uimm_bits!(INDEX, 3);
1084 unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) }
1085}
1086
1087/// Returns the first element of the input vector of `[8 x i32]`.
1088///
1089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
1090#[inline]
1091#[target_feature(enable = "avx")]
1092#[stable(feature = "simd_x86", since = "1.27.0")]
1093#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1094pub const fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1095 unsafe { simd_extract!(a.as_i32x8(), 0) }
1096}
1097
1098/// Zeroes the contents of all XMM or YMM registers.
1099///
1100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
1101#[inline]
1102#[target_feature(enable = "avx")]
1103#[cfg_attr(test, assert_instr(vzeroall))]
1104#[stable(feature = "simd_x86", since = "1.27.0")]
1105pub fn _mm256_zeroall() {
1106 unsafe { vzeroall() }
1107}
1108
1109/// Zeroes the upper 128 bits of all YMM registers;
1110/// the lower 128-bits of the registers are unmodified.
1111///
1112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
1113#[inline]
1114#[target_feature(enable = "avx")]
1115#[cfg_attr(test, assert_instr(vzeroupper))]
1116#[stable(feature = "simd_x86", since = "1.27.0")]
1117pub fn _mm256_zeroupper() {
1118 unsafe { vzeroupper() }
1119}
1120
1121/// Shuffles single-precision (32-bit) floating-point elements in `a`
1122/// within 128-bit lanes using the control in `b`.
1123///
1124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_ps)
1125#[inline]
1126#[target_feature(enable = "avx")]
1127#[cfg_attr(test, assert_instr(vpermilps))]
1128#[stable(feature = "simd_x86", since = "1.27.0")]
1129pub fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 {
1130 unsafe { vpermilps256(a, b.as_i32x8()) }
1131}
1132
1133/// Shuffles single-precision (32-bit) floating-point elements in `a`
1134/// using the control in `b`.
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_ps)
1137#[inline]
1138#[target_feature(enable = "avx")]
1139#[cfg_attr(test, assert_instr(vpermilps))]
1140#[stable(feature = "simd_x86", since = "1.27.0")]
1141pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
1142 unsafe { vpermilps(a, b.as_i32x4()) }
1143}
1144
1145/// Shuffles single-precision (32-bit) floating-point elements in `a`
1146/// within 128-bit lanes using the control in `imm8`.
1147///
1148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_ps)
1149#[inline]
1150#[target_feature(enable = "avx")]
1151#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1152#[rustc_legacy_const_generics(1)]
1153#[stable(feature = "simd_x86", since = "1.27.0")]
1154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1155pub const fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
1156 static_assert_uimm_bits!(IMM8, 8);
1157 unsafe {
1158 simd_shuffle!(
1159 a,
1160 _mm256_undefined_ps(),
1161 [
1162 (IMM8 as u32 >> 0) & 0b11,
1163 (IMM8 as u32 >> 2) & 0b11,
1164 (IMM8 as u32 >> 4) & 0b11,
1165 (IMM8 as u32 >> 6) & 0b11,
1166 ((IMM8 as u32 >> 0) & 0b11) + 4,
1167 ((IMM8 as u32 >> 2) & 0b11) + 4,
1168 ((IMM8 as u32 >> 4) & 0b11) + 4,
1169 ((IMM8 as u32 >> 6) & 0b11) + 4,
1170 ],
1171 )
1172 }
1173}
1174
1175/// Shuffles single-precision (32-bit) floating-point elements in `a`
1176/// using the control in `imm8`.
1177///
1178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
1179#[inline]
1180#[target_feature(enable = "avx")]
1181#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
1182#[rustc_legacy_const_generics(1)]
1183#[stable(feature = "simd_x86", since = "1.27.0")]
1184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1185pub const fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
1186 static_assert_uimm_bits!(IMM8, 8);
1187 unsafe {
1188 simd_shuffle!(
1189 a,
1190 _mm_undefined_ps(),
1191 [
1192 (IMM8 as u32 >> 0) & 0b11,
1193 (IMM8 as u32 >> 2) & 0b11,
1194 (IMM8 as u32 >> 4) & 0b11,
1195 (IMM8 as u32 >> 6) & 0b11,
1196 ],
1197 )
1198 }
1199}
1200
1201/// Shuffles double-precision (64-bit) floating-point elements in `a`
1202/// within 256-bit lanes using the control in `b`.
1203///
1204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutevar_pd)
1205#[inline]
1206#[target_feature(enable = "avx")]
1207#[cfg_attr(test, assert_instr(vpermilpd))]
1208#[stable(feature = "simd_x86", since = "1.27.0")]
1209pub fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d {
1210 unsafe { vpermilpd256(a, b.as_i64x4()) }
1211}
1212
1213/// Shuffles double-precision (64-bit) floating-point elements in `a`
1214/// using the control in `b`.
1215///
1216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutevar_pd)
1217#[inline]
1218#[target_feature(enable = "avx")]
1219#[cfg_attr(test, assert_instr(vpermilpd))]
1220#[stable(feature = "simd_x86", since = "1.27.0")]
1221pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
1222 unsafe { vpermilpd(a, b.as_i64x2()) }
1223}
1224
1225/// Shuffles double-precision (64-bit) floating-point elements in `a`
1226/// within 128-bit lanes using the control in `imm8`.
1227///
1228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute_pd)
1229#[inline]
1230#[target_feature(enable = "avx")]
1231#[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
1232#[rustc_legacy_const_generics(1)]
1233#[stable(feature = "simd_x86", since = "1.27.0")]
1234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1235pub const fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
1236 static_assert_uimm_bits!(IMM4, 4);
1237 unsafe {
1238 simd_shuffle!(
1239 a,
1240 _mm256_undefined_pd(),
1241 [
1242 ((IMM4 as u32 >> 0) & 1),
1243 ((IMM4 as u32 >> 1) & 1),
1244 ((IMM4 as u32 >> 2) & 1) + 2,
1245 ((IMM4 as u32 >> 3) & 1) + 2,
1246 ],
1247 )
1248 }
1249}
1250
1251/// Shuffles double-precision (64-bit) floating-point elements in `a`
1252/// using the control in `imm8`.
1253///
1254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
1255#[inline]
1256#[target_feature(enable = "avx")]
1257#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
1258#[rustc_legacy_const_generics(1)]
1259#[stable(feature = "simd_x86", since = "1.27.0")]
1260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1261pub const fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
1262 static_assert_uimm_bits!(IMM2, 2);
1263 unsafe {
1264 simd_shuffle!(
1265 a,
1266 _mm_undefined_pd(),
1267 [(IMM2 as u32) & 1, (IMM2 as u32 >> 1) & 1],
1268 )
1269 }
1270}
1271
1272/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
1273/// floating-point elements) selected by `imm8` from `a` and `b`.
1274///
1275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_ps)
1276#[inline]
1277#[target_feature(enable = "avx")]
1278#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
1279#[rustc_legacy_const_generics(2)]
1280#[stable(feature = "simd_x86", since = "1.27.0")]
1281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1282pub const fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
1283 static_assert_uimm_bits!(IMM8, 8);
1284 _mm256_castsi256_ps(_mm256_permute2f128_si256::<IMM8>(
1285 a:_mm256_castps_si256(a),
1286 b:_mm256_castps_si256(b),
1287 ))
1288}
1289
1290/// Shuffles 256 bits (composed of 4 packed double-precision (64-bit)
1291/// floating-point elements) selected by `imm8` from `a` and `b`.
1292///
1293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_pd)
1294#[inline]
1295#[target_feature(enable = "avx")]
1296#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1297#[rustc_legacy_const_generics(2)]
1298#[stable(feature = "simd_x86", since = "1.27.0")]
1299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1300pub const fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
1301 static_assert_uimm_bits!(IMM8, 8);
1302 _mm256_castsi256_pd(_mm256_permute2f128_si256::<IMM8>(
1303 a:_mm256_castpd_si256(a),
1304 b:_mm256_castpd_si256(b),
1305 ))
1306}
1307
1308/// Shuffles 128-bits (composed of integer data) selected by `imm8`
1309/// from `a` and `b`.
1310///
1311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permute2f128_si256)
1312#[inline]
1313#[target_feature(enable = "avx")]
1314#[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
1315#[rustc_legacy_const_generics(2)]
1316#[stable(feature = "simd_x86", since = "1.27.0")]
1317#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1318pub const fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
1319 static_assert_uimm_bits!(IMM8, 8);
1320 const fn idx(imm8: i32, pos: u32) -> u32 {
1321 let part = if pos < 2 {
1322 imm8 & 0xf
1323 } else {
1324 (imm8 & 0xf0) >> 4
1325 };
1326 2 * (part as u32 & 0b11) + (pos & 1)
1327 }
1328 const fn idx0(imm8: i32, pos: u32) -> u32 {
1329 let part = if pos < 2 {
1330 imm8 & 0xf
1331 } else {
1332 (imm8 & 0xf0) >> 4
1333 };
1334 if part & 0b1000 != 0 { 4 } else { pos }
1335 }
1336 unsafe {
1337 let r = simd_shuffle!(
1338 a.as_i64x4(),
1339 b.as_i64x4(),
1340 [idx(IMM8, 0), idx(IMM8, 1), idx(IMM8, 2), idx(IMM8, 3)]
1341 );
1342 let r: i64x4 = simd_shuffle!(
1343 r,
1344 i64x4::ZERO,
1345 [idx0(IMM8, 0), idx0(IMM8, 1), idx0(IMM8, 2), idx0(IMM8, 3)]
1346 );
1347 r.as_m256i()
1348 }
1349}
1350
1351/// Broadcasts a single-precision (32-bit) floating-point element from memory
1352/// to all elements of the returned vector.
1353///
1354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ss)
1355#[inline]
1356#[target_feature(enable = "avx")]
1357#[cfg_attr(test, assert_instr(vbroadcastss))]
1358#[stable(feature = "simd_x86", since = "1.27.0")]
1359#[allow(clippy::trivially_copy_pass_by_ref)]
1360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361pub const fn _mm256_broadcast_ss(f: &f32) -> __m256 {
1362 _mm256_set1_ps(*f)
1363}
1364
1365/// Broadcasts a single-precision (32-bit) floating-point element from memory
1366/// to all elements of the returned vector.
1367///
1368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_ss)
1369#[inline]
1370#[target_feature(enable = "avx")]
1371#[cfg_attr(test, assert_instr(vbroadcastss))]
1372#[stable(feature = "simd_x86", since = "1.27.0")]
1373#[allow(clippy::trivially_copy_pass_by_ref)]
1374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1375pub const fn _mm_broadcast_ss(f: &f32) -> __m128 {
1376 _mm_set1_ps(*f)
1377}
1378
1379/// Broadcasts a double-precision (64-bit) floating-point element from memory
1380/// to all elements of the returned vector.
1381///
1382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_sd)
1383#[inline]
1384#[target_feature(enable = "avx")]
1385#[cfg_attr(test, assert_instr(vbroadcastsd))]
1386#[stable(feature = "simd_x86", since = "1.27.0")]
1387#[allow(clippy::trivially_copy_pass_by_ref)]
1388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1389pub const fn _mm256_broadcast_sd(f: &f64) -> __m256d {
1390 _mm256_set1_pd(*f)
1391}
1392
1393/// Broadcasts 128 bits from memory (composed of 4 packed single-precision
1394/// (32-bit) floating-point elements) to all elements of the returned vector.
1395///
1396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_ps)
1397#[inline]
1398#[target_feature(enable = "avx")]
1399#[cfg_attr(test, assert_instr(vbroadcastf128))]
1400#[stable(feature = "simd_x86", since = "1.27.0")]
1401#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1402pub const fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
1403 unsafe { simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) }
1404}
1405
1406/// Broadcasts 128 bits from memory (composed of 2 packed double-precision
1407/// (64-bit) floating-point elements) to all elements of the returned vector.
1408///
1409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_pd)
1410#[inline]
1411#[target_feature(enable = "avx")]
1412#[cfg_attr(test, assert_instr(vbroadcastf128))]
1413#[stable(feature = "simd_x86", since = "1.27.0")]
1414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1415pub const fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
1416 unsafe { simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) }
1417}
1418
1419/// Copies `a` to result, then inserts 128 bits (composed of 4 packed
1420/// single-precision (32-bit) floating-point elements) from `b` into result
1421/// at the location specified by `imm8`.
1422///
1423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_ps)
1424#[inline]
1425#[target_feature(enable = "avx")]
1426#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1427#[rustc_legacy_const_generics(2)]
1428#[stable(feature = "simd_x86", since = "1.27.0")]
1429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1430pub const fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
1431 static_assert_uimm_bits!(IMM1, 1);
1432 unsafe {
1433 simd_shuffle!(
1434 a,
1435 _mm256_castps128_ps256(b),
1436 [[8, 9, 10, 11, 4, 5, 6, 7], [0, 1, 2, 3, 8, 9, 10, 11]][IMM1 as usize],
1437 )
1438 }
1439}
1440
1441/// Copies `a` to result, then inserts 128 bits (composed of 2 packed
1442/// double-precision (64-bit) floating-point elements) from `b` into result
1443/// at the location specified by `imm8`.
1444///
1445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_pd)
1446#[inline]
1447#[target_feature(enable = "avx")]
1448#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1449#[rustc_legacy_const_generics(2)]
1450#[stable(feature = "simd_x86", since = "1.27.0")]
1451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1452pub const fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
1453 static_assert_uimm_bits!(IMM1, 1);
1454 unsafe {
1455 simd_shuffle!(
1456 a,
1457 _mm256_castpd128_pd256(b),
1458 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1459 )
1460 }
1461}
1462
1463/// Copies `a` to result, then inserts 128 bits from `b` into result
1464/// at the location specified by `imm8`.
1465///
1466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf128_si256)
1467#[inline]
1468#[target_feature(enable = "avx")]
1469#[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
1470#[rustc_legacy_const_generics(2)]
1471#[stable(feature = "simd_x86", since = "1.27.0")]
1472#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1473pub const fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
1474 static_assert_uimm_bits!(IMM1, 1);
1475 unsafe {
1476 let dst: i64x4 = simd_shuffle!(
1477 a.as_i64x4(),
1478 _mm256_castsi128_si256(b).as_i64x4(),
1479 [[4, 5, 2, 3], [0, 1, 4, 5]][IMM1 as usize],
1480 );
1481 transmute(src:dst)
1482 }
1483}
1484
1485/// Copies `a` to result, and inserts the 8-bit integer `i` into result
1486/// at the location specified by `index`.
1487///
1488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi8)
1489#[inline]
1490#[target_feature(enable = "avx")]
1491// This intrinsic has no corresponding instruction.
1492#[rustc_legacy_const_generics(2)]
1493#[stable(feature = "simd_x86", since = "1.27.0")]
1494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1495pub const fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
1496 static_assert_uimm_bits!(INDEX, 5);
1497 unsafe { transmute(src:simd_insert!(a.as_i8x32(), INDEX as u32, i)) }
1498}
1499
1500/// Copies `a` to result, and inserts the 16-bit integer `i` into result
1501/// at the location specified by `index`.
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi16)
1504#[inline]
1505#[target_feature(enable = "avx")]
1506// This intrinsic has no corresponding instruction.
1507#[rustc_legacy_const_generics(2)]
1508#[stable(feature = "simd_x86", since = "1.27.0")]
1509#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1510pub const fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
1511 static_assert_uimm_bits!(INDEX, 4);
1512 unsafe { transmute(src:simd_insert!(a.as_i16x16(), INDEX as u32, i)) }
1513}
1514
1515/// Copies `a` to result, and inserts the 32-bit integer `i` into result
1516/// at the location specified by `index`.
1517///
1518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insert_epi32)
1519#[inline]
1520#[target_feature(enable = "avx")]
1521// This intrinsic has no corresponding instruction.
1522#[rustc_legacy_const_generics(2)]
1523#[stable(feature = "simd_x86", since = "1.27.0")]
1524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1525pub const fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
1526 static_assert_uimm_bits!(INDEX, 3);
1527 unsafe { transmute(src:simd_insert!(a.as_i32x8(), INDEX as u32, i)) }
1528}
1529
1530/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1531/// floating-point elements) from memory into result.
1532/// `mem_addr` must be aligned on a 32-byte boundary or a
1533/// general-protection exception may be generated.
1534///
1535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1536#[inline]
1537#[target_feature(enable = "avx")]
1538#[cfg_attr(
1539 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1540 assert_instr(vmovap)
1541)]
1542#[stable(feature = "simd_x86", since = "1.27.0")]
1543#[allow(clippy::cast_ptr_alignment)]
1544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1545pub const unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1546 *(mem_addr as *const __m256d)
1547}
1548
1549/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1550/// floating-point elements) from `a` into memory.
1551/// `mem_addr` must be aligned on a 32-byte boundary or a
1552/// general-protection exception may be generated.
1553///
1554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1555#[inline]
1556#[target_feature(enable = "avx")]
1557#[cfg_attr(
1558 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1559 assert_instr(vmovap)
1560)]
1561#[stable(feature = "simd_x86", since = "1.27.0")]
1562#[allow(clippy::cast_ptr_alignment)]
1563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1564pub const unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
1565 *(mem_addr as *mut __m256d) = a;
1566}
1567
1568/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1569/// floating-point elements) from memory into result.
1570/// `mem_addr` must be aligned on a 32-byte boundary or a
1571/// general-protection exception may be generated.
1572///
1573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_ps)
1574#[inline]
1575#[target_feature(enable = "avx")]
1576#[cfg_attr(
1577 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1578 assert_instr(vmovaps)
1579)]
1580#[stable(feature = "simd_x86", since = "1.27.0")]
1581#[allow(clippy::cast_ptr_alignment)]
1582#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1583pub const unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
1584 *(mem_addr as *const __m256)
1585}
1586
1587/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1588/// floating-point elements) from `a` into memory.
1589/// `mem_addr` must be aligned on a 32-byte boundary or a
1590/// general-protection exception may be generated.
1591///
1592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_ps)
1593#[inline]
1594#[target_feature(enable = "avx")]
1595#[cfg_attr(
1596 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1597 assert_instr(vmovaps)
1598)]
1599#[stable(feature = "simd_x86", since = "1.27.0")]
1600#[allow(clippy::cast_ptr_alignment)]
1601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1602pub const unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1603 *(mem_addr as *mut __m256) = a;
1604}
1605
1606/// Loads 256-bits (composed of 4 packed double-precision (64-bit)
1607/// floating-point elements) from memory into result.
1608/// `mem_addr` does not need to be aligned on any particular boundary.
1609///
1610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1611#[inline]
1612#[target_feature(enable = "avx")]
1613#[cfg_attr(test, assert_instr(vmovup))]
1614#[stable(feature = "simd_x86", since = "1.27.0")]
1615#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1616pub const unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1617 let mut dst: __m256d = _mm256_undefined_pd();
1618 ptr::copy_nonoverlapping(
1619 src:mem_addr as *const u8,
1620 dst:ptr::addr_of_mut!(dst) as *mut u8,
1621 count:mem::size_of::<__m256d>(),
1622 );
1623 dst
1624}
1625
1626/// Stores 256-bits (composed of 4 packed double-precision (64-bit)
1627/// floating-point elements) from `a` into memory.
1628/// `mem_addr` does not need to be aligned on any particular boundary.
1629///
1630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1631#[inline]
1632#[target_feature(enable = "avx")]
1633#[cfg_attr(test, assert_instr(vmovup))]
1634#[stable(feature = "simd_x86", since = "1.27.0")]
1635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1636pub const unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
1637 mem_addr.cast::<__m256d>().write_unaligned(val:a);
1638}
1639
1640/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
1641/// floating-point elements) from memory into result.
1642/// `mem_addr` does not need to be aligned on any particular boundary.
1643///
1644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_ps)
1645#[inline]
1646#[target_feature(enable = "avx")]
1647#[cfg_attr(test, assert_instr(vmovups))]
1648#[stable(feature = "simd_x86", since = "1.27.0")]
1649#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1650pub const unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
1651 let mut dst: __m256 = _mm256_undefined_ps();
1652 ptr::copy_nonoverlapping(
1653 src:mem_addr as *const u8,
1654 dst:ptr::addr_of_mut!(dst) as *mut u8,
1655 count:mem::size_of::<__m256>(),
1656 );
1657 dst
1658}
1659
1660/// Stores 256-bits (composed of 8 packed single-precision (32-bit)
1661/// floating-point elements) from `a` into memory.
1662/// `mem_addr` does not need to be aligned on any particular boundary.
1663///
1664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_ps)
1665#[inline]
1666#[target_feature(enable = "avx")]
1667#[cfg_attr(test, assert_instr(vmovups))]
1668#[stable(feature = "simd_x86", since = "1.27.0")]
1669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1670pub const unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
1671 mem_addr.cast::<__m256>().write_unaligned(val:a);
1672}
1673
1674/// Loads 256-bits of integer data from memory into result.
1675/// `mem_addr` must be aligned on a 32-byte boundary or a
1676/// general-protection exception may be generated.
1677///
1678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_si256)
1679#[inline]
1680#[target_feature(enable = "avx")]
1681#[cfg_attr(
1682 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1683 assert_instr(vmovaps)
1684)] // FIXME vmovdqa expected
1685#[stable(feature = "simd_x86", since = "1.27.0")]
1686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1687pub const unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
1688 *mem_addr
1689}
1690
1691/// Stores 256-bits of integer data from `a` into memory.
1692/// `mem_addr` must be aligned on a 32-byte boundary or a
1693/// general-protection exception may be generated.
1694///
1695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_si256)
1696#[inline]
1697#[target_feature(enable = "avx")]
1698#[cfg_attr(
1699 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
1700 assert_instr(vmovaps)
1701)] // FIXME vmovdqa expected
1702#[stable(feature = "simd_x86", since = "1.27.0")]
1703#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1704pub const unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
1705 *mem_addr = a;
1706}
1707
1708/// Loads 256-bits of integer data from memory into result.
1709/// `mem_addr` does not need to be aligned on any particular boundary.
1710///
1711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_si256)
1712#[inline]
1713#[target_feature(enable = "avx")]
1714#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1715#[stable(feature = "simd_x86", since = "1.27.0")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
1718 let mut dst: __m256i = _mm256_undefined_si256();
1719 ptr::copy_nonoverlapping(
1720 src:mem_addr as *const u8,
1721 dst:ptr::addr_of_mut!(dst) as *mut u8,
1722 count:mem::size_of::<__m256i>(),
1723 );
1724 dst
1725}
1726
1727/// Stores 256-bits of integer data from `a` into memory.
1728/// `mem_addr` does not need to be aligned on any particular boundary.
1729///
1730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_si256)
1731#[inline]
1732#[target_feature(enable = "avx")]
1733#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
1734#[stable(feature = "simd_x86", since = "1.27.0")]
1735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1736pub const unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
1737 mem_addr.write_unaligned(val:a);
1738}
1739
1740/// Loads packed double-precision (64-bit) floating-point elements from memory
1741/// into result using `mask` (elements are zeroed out when the high bit of the
1742/// corresponding element is not set).
1743///
1744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_pd)
1745#[inline]
1746#[target_feature(enable = "avx")]
1747#[cfg_attr(test, assert_instr(vmaskmovpd))]
1748#[stable(feature = "simd_x86", since = "1.27.0")]
1749#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1750pub const unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
1751 let mask: Simd = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1752 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
1753}
1754
1755/// Stores packed double-precision (64-bit) floating-point elements from `a`
1756/// into memory using `mask`.
1757///
1758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_pd)
1759#[inline]
1760#[target_feature(enable = "avx")]
1761#[cfg_attr(test, assert_instr(vmaskmovpd))]
1762#[stable(feature = "simd_x86", since = "1.27.0")]
1763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1764pub const unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
1765 let mask: Simd = simd_shr(lhs:mask.as_i64x4(), rhs:i64x4::splat(63));
1766 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
1767}
1768
1769/// Loads packed double-precision (64-bit) floating-point elements from memory
1770/// into result using `mask` (elements are zeroed out when the high bit of the
1771/// corresponding element is not set).
1772///
1773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_pd)
1774#[inline]
1775#[target_feature(enable = "avx")]
1776#[cfg_attr(test, assert_instr(vmaskmovpd))]
1777#[stable(feature = "simd_x86", since = "1.27.0")]
1778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1779pub const unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
1780 let mask: Simd = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1781 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
1782}
1783
1784/// Stores packed double-precision (64-bit) floating-point elements from `a`
1785/// into memory using `mask`.
1786///
1787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_pd)
1788#[inline]
1789#[target_feature(enable = "avx")]
1790#[cfg_attr(test, assert_instr(vmaskmovpd))]
1791#[stable(feature = "simd_x86", since = "1.27.0")]
1792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1793pub const unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
1794 let mask: Simd = simd_shr(lhs:mask.as_i64x2(), rhs:i64x2::splat(63));
1795 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
1796}
1797
1798/// Loads packed single-precision (32-bit) floating-point elements from memory
1799/// into result using `mask` (elements are zeroed out when the high bit of the
1800/// corresponding element is not set).
1801///
1802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskload_ps)
1803#[inline]
1804#[target_feature(enable = "avx")]
1805#[cfg_attr(test, assert_instr(vmaskmovps))]
1806#[stable(feature = "simd_x86", since = "1.27.0")]
1807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1808pub const unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
1809 let mask: Simd = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1810 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
1811}
1812
1813/// Stores packed single-precision (32-bit) floating-point elements from `a`
1814/// into memory using `mask`.
1815///
1816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskstore_ps)
1817#[inline]
1818#[target_feature(enable = "avx")]
1819#[cfg_attr(test, assert_instr(vmaskmovps))]
1820#[stable(feature = "simd_x86", since = "1.27.0")]
1821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1822pub const unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
1823 let mask: Simd = simd_shr(lhs:mask.as_i32x8(), rhs:i32x8::splat(31));
1824 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
1825}
1826
1827/// Loads packed single-precision (32-bit) floating-point elements from memory
1828/// into result using `mask` (elements are zeroed out when the high bit of the
1829/// corresponding element is not set).
1830///
1831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskload_ps)
1832#[inline]
1833#[target_feature(enable = "avx")]
1834#[cfg_attr(test, assert_instr(vmaskmovps))]
1835#[stable(feature = "simd_x86", since = "1.27.0")]
1836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1837pub const unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
1838 let mask: Simd = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1839 simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
1840}
1841
1842/// Stores packed single-precision (32-bit) floating-point elements from `a`
1843/// into memory using `mask`.
1844///
1845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskstore_ps)
1846#[inline]
1847#[target_feature(enable = "avx")]
1848#[cfg_attr(test, assert_instr(vmaskmovps))]
1849#[stable(feature = "simd_x86", since = "1.27.0")]
1850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1851pub const unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
1852 let mask: Simd = simd_shr(lhs:mask.as_i32x4(), rhs:i32x4::splat(31));
1853 simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
1854}
1855
1856/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
1857/// from `a`, and returns the results.
1858///
1859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movehdup_ps)
1860#[inline]
1861#[target_feature(enable = "avx")]
1862#[cfg_attr(test, assert_instr(vmovshdup))]
1863#[stable(feature = "simd_x86", since = "1.27.0")]
1864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1865pub const fn _mm256_movehdup_ps(a: __m256) -> __m256 {
1866 unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) }
1867}
1868
1869/// Duplicate even-indexed single-precision (32-bit) floating-point elements
1870/// from `a`, and returns the results.
1871///
1872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_moveldup_ps)
1873#[inline]
1874#[target_feature(enable = "avx")]
1875#[cfg_attr(test, assert_instr(vmovsldup))]
1876#[stable(feature = "simd_x86", since = "1.27.0")]
1877#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1878pub const fn _mm256_moveldup_ps(a: __m256) -> __m256 {
1879 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) }
1880}
1881
1882/// Duplicate even-indexed double-precision (64-bit) floating-point elements
1883/// from `a`, and returns the results.
1884///
1885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movedup_pd)
1886#[inline]
1887#[target_feature(enable = "avx")]
1888#[cfg_attr(test, assert_instr(vmovddup))]
1889#[stable(feature = "simd_x86", since = "1.27.0")]
1890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1891pub const fn _mm256_movedup_pd(a: __m256d) -> __m256d {
1892 unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
1893}
1894
1895/// Loads 256-bits of integer data from unaligned memory into result.
1896/// This intrinsic may perform better than `_mm256_loadu_si256` when the
1897/// data crosses a cache line boundary.
1898///
1899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lddqu_si256)
1900#[inline]
1901#[target_feature(enable = "avx")]
1902#[cfg_attr(test, assert_instr(vlddqu))]
1903#[stable(feature = "simd_x86", since = "1.27.0")]
1904pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
1905 transmute(src:vlddqu(mem_addr as *const i8))
1906}
1907
1908/// Moves integer data from a 256-bit integer vector to a 32-byte
1909/// aligned memory location. To minimize caching, the data is flagged as
1910/// non-temporal (unlikely to be used again soon)
1911///
1912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_si256)
1913///
1914/// # Safety of non-temporal stores
1915///
1916/// After using this intrinsic, but before any other access to the memory that this intrinsic
1917/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1918/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1919/// return.
1920///
1921/// See [`_mm_sfence`] for details.
1922#[inline]
1923#[target_feature(enable = "avx")]
1924#[cfg_attr(test, assert_instr(vmovntdq))]
1925#[stable(feature = "simd_x86", since = "1.27.0")]
1926pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
1927 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1928 crate::arch::asm!(
1929 vps!("vmovntdq", ",{a}"),
1930 p = in(reg) mem_addr,
1931 a = in(ymm_reg) a,
1932 options(nostack, preserves_flags),
1933 );
1934}
1935
1936/// Moves double-precision values from a 256-bit vector of `[4 x double]`
1937/// to a 32-byte aligned memory location. To minimize caching, the data is
1938/// flagged as non-temporal (unlikely to be used again soon).
1939///
1940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_pd)
1941///
1942/// # Safety of non-temporal stores
1943///
1944/// After using this intrinsic, but before any other access to the memory that this intrinsic
1945/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1946/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1947/// return.
1948///
1949/// See [`_mm_sfence`] for details.
1950#[inline]
1951#[target_feature(enable = "avx")]
1952#[cfg_attr(test, assert_instr(vmovntpd))]
1953#[stable(feature = "simd_x86", since = "1.27.0")]
1954#[allow(clippy::cast_ptr_alignment)]
1955pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
1956 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1957 crate::arch::asm!(
1958 vps!("vmovntpd", ",{a}"),
1959 p = in(reg) mem_addr,
1960 a = in(ymm_reg) a,
1961 options(nostack, preserves_flags),
1962 );
1963}
1964
1965/// Moves single-precision floating point values from a 256-bit vector
1966/// of `[8 x float]` to a 32-byte aligned memory location. To minimize
1967/// caching, the data is flagged as non-temporal (unlikely to be used again
1968/// soon).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_stream_ps)
1971///
1972/// # Safety of non-temporal stores
1973///
1974/// After using this intrinsic, but before any other access to the memory that this intrinsic
1975/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
1976/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
1977/// return.
1978///
1979/// See [`_mm_sfence`] for details.
1980#[inline]
1981#[target_feature(enable = "avx")]
1982#[cfg_attr(test, assert_instr(vmovntps))]
1983#[stable(feature = "simd_x86", since = "1.27.0")]
1984#[allow(clippy::cast_ptr_alignment)]
1985pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) {
1986 // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
1987 crate::arch::asm!(
1988 vps!("vmovntps", ",{a}"),
1989 p = in(reg) mem_addr,
1990 a = in(ymm_reg) a,
1991 options(nostack, preserves_flags),
1992 );
1993}
1994
1995/// Computes the approximate reciprocal of packed single-precision (32-bit)
1996/// floating-point elements in `a`, and returns the results. The maximum
1997/// relative error for this approximation is less than 1.5*2^-12.
1998///
1999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp_ps)
2000#[inline]
2001#[target_feature(enable = "avx")]
2002#[cfg_attr(test, assert_instr(vrcpps))]
2003#[stable(feature = "simd_x86", since = "1.27.0")]
2004pub fn _mm256_rcp_ps(a: __m256) -> __m256 {
2005 unsafe { vrcpps(a) }
2006}
2007
2008/// Computes the approximate reciprocal square root of packed single-precision
2009/// (32-bit) floating-point elements in `a`, and returns the results.
2010/// The maximum relative error for this approximation is less than 1.5*2^-12.
2011///
2012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt_ps)
2013#[inline]
2014#[target_feature(enable = "avx")]
2015#[cfg_attr(test, assert_instr(vrsqrtps))]
2016#[stable(feature = "simd_x86", since = "1.27.0")]
2017pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
2018 unsafe { vrsqrtps(a) }
2019}
2020
2021/// Unpacks and interleave double-precision (64-bit) floating-point elements
2022/// from the high half of each 128-bit lane in `a` and `b`.
2023///
2024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_pd)
2025#[inline]
2026#[target_feature(enable = "avx")]
2027#[cfg_attr(test, assert_instr(vunpckhpd))]
2028#[stable(feature = "simd_x86", since = "1.27.0")]
2029#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2030pub const fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
2031 unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
2032}
2033
2034/// Unpacks and interleave single-precision (32-bit) floating-point elements
2035/// from the high half of each 128-bit lane in `a` and `b`.
2036///
2037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpackhi_ps)
2038#[inline]
2039#[target_feature(enable = "avx")]
2040#[cfg_attr(test, assert_instr(vunpckhps))]
2041#[stable(feature = "simd_x86", since = "1.27.0")]
2042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2043pub const fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
2044 unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) }
2045}
2046
2047/// Unpacks and interleave double-precision (64-bit) floating-point elements
2048/// from the low half of each 128-bit lane in `a` and `b`.
2049///
2050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_pd)
2051#[inline]
2052#[target_feature(enable = "avx")]
2053#[cfg_attr(test, assert_instr(vunpcklpd))]
2054#[stable(feature = "simd_x86", since = "1.27.0")]
2055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2056pub const fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
2057 unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
2058}
2059
2060/// Unpacks and interleave single-precision (32-bit) floating-point elements
2061/// from the low half of each 128-bit lane in `a` and `b`.
2062///
2063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_unpacklo_ps)
2064#[inline]
2065#[target_feature(enable = "avx")]
2066#[cfg_attr(test, assert_instr(vunpcklps))]
2067#[stable(feature = "simd_x86", since = "1.27.0")]
2068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2069pub const fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
2070 unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) }
2071}
2072
2073/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
2074/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
2075/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
2076/// the result is zero, otherwise set `CF` to 0. Return the `ZF` value.
2077///
2078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_si256)
2079#[inline]
2080#[target_feature(enable = "avx")]
2081#[cfg_attr(test, assert_instr(vptest))]
2082#[stable(feature = "simd_x86", since = "1.27.0")]
2083#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2084pub const fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
2085 unsafe {
2086 let r: Simd = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
2087 (0i64 == simd_reduce_or(r)) as i32
2088 }
2089}
2090
2091/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
2092/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
2093/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
2094/// the result is zero, otherwise set `CF` to 0. Return the `CF` value.
2095///
2096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_si256)
2097#[inline]
2098#[target_feature(enable = "avx")]
2099#[cfg_attr(test, assert_instr(vptest))]
2100#[stable(feature = "simd_x86", since = "1.27.0")]
2101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2102pub const fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
2103 unsafe {
2104 let r: Simd = simd_and(x:simd_xor(a.as_i64x4(), i64x4::splat(!0)), y:b.as_i64x4());
2105 (0i64 == simd_reduce_or(r)) as i32
2106 }
2107}
2108
2109/// Computes the bitwise AND of 256 bits (representing integer data) in `a` and
2110/// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0.
2111/// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if
2112/// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and
2113/// `CF` values are zero, otherwise return 0.
2114///
2115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_si256)
2116#[inline]
2117#[target_feature(enable = "avx")]
2118#[cfg_attr(test, assert_instr(vptest))]
2119#[stable(feature = "simd_x86", since = "1.27.0")]
2120pub fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 {
2121 unsafe { ptestnzc256(a.as_i64x4(), b.as_i64x4()) }
2122}
2123
2124/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
2125/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2126/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2127/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2128/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2129/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2130/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2131///
2132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_pd)
2133#[inline]
2134#[target_feature(enable = "avx")]
2135#[cfg_attr(test, assert_instr(vtestpd))]
2136#[stable(feature = "simd_x86", since = "1.27.0")]
2137pub fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 {
2138 unsafe { vtestzpd256(a, b) }
2139}
2140
2141/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
2142/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2143/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2144/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2145/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2146/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2147/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2148///
2149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_pd)
2150#[inline]
2151#[target_feature(enable = "avx")]
2152#[cfg_attr(test, assert_instr(vtestpd))]
2153#[stable(feature = "simd_x86", since = "1.27.0")]
2154pub fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 {
2155 unsafe { vtestcpd256(a, b) }
2156}
2157
2158/// Computes the bitwise AND of 256 bits (representing double-precision (64-bit)
2159/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2160/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2161/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2162/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2163/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2164/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2165/// are zero, otherwise return 0.
2166///
2167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_pd)
2168#[inline]
2169#[target_feature(enable = "avx")]
2170#[cfg_attr(test, assert_instr(vtestpd))]
2171#[stable(feature = "simd_x86", since = "1.27.0")]
2172pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
2173 unsafe { vtestnzcpd256(a, b) }
2174}
2175
2176/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
2177/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2178/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2179/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2180/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2181/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2182/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2183///
2184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_pd)
2185#[inline]
2186#[target_feature(enable = "avx")]
2187#[cfg_attr(test, assert_instr(vtestpd))]
2188#[stable(feature = "simd_x86", since = "1.27.0")]
2189#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2190pub const fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
2191 unsafe {
2192 let r: i64x2 = simd_lt(x:transmute(_mm_and_pd(a, b)), y:i64x2::ZERO);
2193 (0i64 == simd_reduce_or(r)) as i32
2194 }
2195}
2196
2197/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
2198/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2199/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2200/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2201/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2202/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2203/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2204///
2205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_pd)
2206#[inline]
2207#[target_feature(enable = "avx")]
2208#[cfg_attr(test, assert_instr(vtestpd))]
2209#[stable(feature = "simd_x86", since = "1.27.0")]
2210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2211pub const fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
2212 unsafe {
2213 let r: i64x2 = simd_lt(x:transmute(_mm_andnot_pd(a, b)), y:i64x2::ZERO);
2214 (0i64 == simd_reduce_or(r)) as i32
2215 }
2216}
2217
2218/// Computes the bitwise AND of 128 bits (representing double-precision (64-bit)
2219/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2220/// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the
2221/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2222/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2223/// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value
2224/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2225/// are zero, otherwise return 0.
2226///
2227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_pd)
2228#[inline]
2229#[target_feature(enable = "avx")]
2230#[cfg_attr(test, assert_instr(vtestpd))]
2231#[stable(feature = "simd_x86", since = "1.27.0")]
2232pub fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 {
2233 unsafe { vtestnzcpd(a, b) }
2234}
2235
2236/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
2237/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2238/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2239/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2240/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2241/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2242/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2243///
2244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testz_ps)
2245#[inline]
2246#[target_feature(enable = "avx")]
2247#[cfg_attr(test, assert_instr(vtestps))]
2248#[stable(feature = "simd_x86", since = "1.27.0")]
2249pub fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 {
2250 unsafe { vtestzps256(a, b) }
2251}
2252
2253/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
2254/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2255/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2256/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2257/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2258/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2259/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2260///
2261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testc_ps)
2262#[inline]
2263#[target_feature(enable = "avx")]
2264#[cfg_attr(test, assert_instr(vtestps))]
2265#[stable(feature = "simd_x86", since = "1.27.0")]
2266pub fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 {
2267 unsafe { vtestcps256(a, b) }
2268}
2269
2270/// Computes the bitwise AND of 256 bits (representing single-precision (32-bit)
2271/// floating-point elements) in `a` and `b`, producing an intermediate 256-bit
2272/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2273/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2274/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2275/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2276/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2277/// are zero, otherwise return 0.
2278///
2279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testnzc_ps)
2280#[inline]
2281#[target_feature(enable = "avx")]
2282#[cfg_attr(test, assert_instr(vtestps))]
2283#[stable(feature = "simd_x86", since = "1.27.0")]
2284pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
2285 unsafe { vtestnzcps256(a, b) }
2286}
2287
2288/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2289/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2290/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2291/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2292/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2293/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2294/// is zero, otherwise set `CF` to 0. Return the `ZF` value.
2295///
2296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_ps)
2297#[inline]
2298#[target_feature(enable = "avx")]
2299#[cfg_attr(test, assert_instr(vtestps))]
2300#[stable(feature = "simd_x86", since = "1.27.0")]
2301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2302pub const fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
2303 unsafe {
2304 let r: i32x4 = simd_lt(x:transmute(_mm_and_ps(a, b)), y:i32x4::ZERO);
2305 (0i32 == simd_reduce_or(r)) as i32
2306 }
2307}
2308
2309/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2310/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2311/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2312/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2313/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2314/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2315/// is zero, otherwise set `CF` to 0. Return the `CF` value.
2316///
2317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_ps)
2318#[inline]
2319#[target_feature(enable = "avx")]
2320#[cfg_attr(test, assert_instr(vtestps))]
2321#[stable(feature = "simd_x86", since = "1.27.0")]
2322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2323pub const fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
2324 unsafe {
2325 let r: i32x4 = simd_lt(x:transmute(_mm_andnot_ps(a, b)), y:i32x4::ZERO);
2326 (0i32 == simd_reduce_or(r)) as i32
2327 }
2328}
2329
2330/// Computes the bitwise AND of 128 bits (representing single-precision (32-bit)
2331/// floating-point elements) in `a` and `b`, producing an intermediate 128-bit
2332/// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the
2333/// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise
2334/// NOT of `a` and then AND with `b`, producing an intermediate value, and set
2335/// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value
2336/// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values
2337/// are zero, otherwise return 0.
2338///
2339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_ps)
2340#[inline]
2341#[target_feature(enable = "avx")]
2342#[cfg_attr(test, assert_instr(vtestps))]
2343#[stable(feature = "simd_x86", since = "1.27.0")]
2344pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
2345 unsafe { vtestnzcps(a, b) }
2346}
2347
2348/// Sets each bit of the returned mask based on the most significant bit of the
2349/// corresponding packed double-precision (64-bit) floating-point element in
2350/// `a`.
2351///
2352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_pd)
2353#[inline]
2354#[target_feature(enable = "avx")]
2355#[cfg_attr(test, assert_instr(vmovmskpd))]
2356#[stable(feature = "simd_x86", since = "1.27.0")]
2357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2358pub const fn _mm256_movemask_pd(a: __m256d) -> i32 {
2359 // Propagate the highest bit to the rest, because simd_bitmask
2360 // requires all-1 or all-0.
2361 unsafe {
2362 let mask: i64x4 = simd_lt(x:transmute(a), y:i64x4::ZERO);
2363 simd_bitmask::<i64x4, u8>(mask) as i32
2364 }
2365}
2366
2367/// Sets each bit of the returned mask based on the most significant bit of the
2368/// corresponding packed single-precision (32-bit) floating-point element in
2369/// `a`.
2370///
2371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movemask_ps)
2372#[inline]
2373#[target_feature(enable = "avx")]
2374#[cfg_attr(test, assert_instr(vmovmskps))]
2375#[stable(feature = "simd_x86", since = "1.27.0")]
2376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2377pub const fn _mm256_movemask_ps(a: __m256) -> i32 {
2378 // Propagate the highest bit to the rest, because simd_bitmask
2379 // requires all-1 or all-0.
2380 unsafe {
2381 let mask: i32x8 = simd_lt(x:transmute(a), y:i32x8::ZERO);
2382 simd_bitmask::<i32x8, u8>(mask) as i32
2383 }
2384}
2385
2386/// Returns vector of type __m256d with all elements set to zero.
2387///
2388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2389#[inline]
2390#[target_feature(enable = "avx")]
2391#[cfg_attr(test, assert_instr(vxorp))]
2392#[stable(feature = "simd_x86", since = "1.27.0")]
2393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2394pub const fn _mm256_setzero_pd() -> __m256d {
2395 const { unsafe { mem::zeroed() } }
2396}
2397
2398/// Returns vector of type __m256 with all elements set to zero.
2399///
2400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_ps)
2401#[inline]
2402#[target_feature(enable = "avx")]
2403#[cfg_attr(test, assert_instr(vxorps))]
2404#[stable(feature = "simd_x86", since = "1.27.0")]
2405#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2406pub const fn _mm256_setzero_ps() -> __m256 {
2407 const { unsafe { mem::zeroed() } }
2408}
2409
2410/// Returns vector of type __m256i with all elements set to zero.
2411///
2412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_si256)
2413#[inline]
2414#[target_feature(enable = "avx")]
2415#[cfg_attr(test, assert_instr(vxor))]
2416#[stable(feature = "simd_x86", since = "1.27.0")]
2417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2418pub const fn _mm256_setzero_si256() -> __m256i {
2419 const { unsafe { mem::zeroed() } }
2420}
2421
2422/// Sets packed double-precision (64-bit) floating-point elements in returned
2423/// vector with the supplied values.
2424///
2425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_pd)
2426#[inline]
2427#[target_feature(enable = "avx")]
2428// This intrinsic has no corresponding instruction.
2429#[stable(feature = "simd_x86", since = "1.27.0")]
2430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2431pub const fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2432 _mm256_setr_pd(a:d, b:c, c:b, d:a)
2433}
2434
2435/// Sets packed single-precision (32-bit) floating-point elements in returned
2436/// vector with the supplied values.
2437///
2438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_ps)
2439#[inline]
2440#[target_feature(enable = "avx")]
2441// This intrinsic has no corresponding instruction.
2442#[stable(feature = "simd_x86", since = "1.27.0")]
2443#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2444pub const fn _mm256_set_ps(
2445 a: f32,
2446 b: f32,
2447 c: f32,
2448 d: f32,
2449 e: f32,
2450 f: f32,
2451 g: f32,
2452 h: f32,
2453) -> __m256 {
2454 _mm256_setr_ps(a:h, b:g, c:f, d:e, e:d, f:c, g:b, h:a)
2455}
2456
2457/// Sets packed 8-bit integers in returned vector with the supplied values.
2458///
2459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi8)
2460#[inline]
2461#[target_feature(enable = "avx")]
2462// This intrinsic has no corresponding instruction.
2463#[stable(feature = "simd_x86", since = "1.27.0")]
2464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2465pub const fn _mm256_set_epi8(
2466 e00: i8,
2467 e01: i8,
2468 e02: i8,
2469 e03: i8,
2470 e04: i8,
2471 e05: i8,
2472 e06: i8,
2473 e07: i8,
2474 e08: i8,
2475 e09: i8,
2476 e10: i8,
2477 e11: i8,
2478 e12: i8,
2479 e13: i8,
2480 e14: i8,
2481 e15: i8,
2482 e16: i8,
2483 e17: i8,
2484 e18: i8,
2485 e19: i8,
2486 e20: i8,
2487 e21: i8,
2488 e22: i8,
2489 e23: i8,
2490 e24: i8,
2491 e25: i8,
2492 e26: i8,
2493 e27: i8,
2494 e28: i8,
2495 e29: i8,
2496 e30: i8,
2497 e31: i8,
2498) -> __m256i {
2499 #[rustfmt::skip]
2500 _mm256_setr_epi8(
2501 e00:e31, e01:e30, e02:e29, e03:e28, e04:e27, e05:e26, e06:e25, e07:e24,
2502 e08:e23, e09:e22, e10:e21, e11:e20, e12:e19, e13:e18, e14:e17, e15:e16,
2503 e16:e15, e17:e14, e18:e13, e19:e12, e20:e11, e21:e10, e22:e09, e23:e08,
2504 e24:e07, e25:e06, e26:e05, e27:e04, e28:e03, e29:e02, e30:e01, e31:e00,
2505 )
2506}
2507
2508/// Sets packed 16-bit integers in returned vector with the supplied values.
2509///
2510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi16)
2511#[inline]
2512#[target_feature(enable = "avx")]
2513// This intrinsic has no corresponding instruction.
2514#[stable(feature = "simd_x86", since = "1.27.0")]
2515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2516pub const fn _mm256_set_epi16(
2517 e00: i16,
2518 e01: i16,
2519 e02: i16,
2520 e03: i16,
2521 e04: i16,
2522 e05: i16,
2523 e06: i16,
2524 e07: i16,
2525 e08: i16,
2526 e09: i16,
2527 e10: i16,
2528 e11: i16,
2529 e12: i16,
2530 e13: i16,
2531 e14: i16,
2532 e15: i16,
2533) -> __m256i {
2534 #[rustfmt::skip]
2535 _mm256_setr_epi16(
2536 e00:e15, e01:e14, e02:e13, e03:e12,
2537 e04:e11, e05:e10, e06:e09, e07:e08,
2538 e08:e07, e09:e06, e10:e05, e11:e04,
2539 e12:e03, e13:e02, e14:e01, e15:e00,
2540 )
2541}
2542
2543/// Sets packed 32-bit integers in returned vector with the supplied values.
2544///
2545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi32)
2546#[inline]
2547#[target_feature(enable = "avx")]
2548// This intrinsic has no corresponding instruction.
2549#[stable(feature = "simd_x86", since = "1.27.0")]
2550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2551pub const fn _mm256_set_epi32(
2552 e0: i32,
2553 e1: i32,
2554 e2: i32,
2555 e3: i32,
2556 e4: i32,
2557 e5: i32,
2558 e6: i32,
2559 e7: i32,
2560) -> __m256i {
2561 _mm256_setr_epi32(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
2562}
2563
2564/// Sets packed 64-bit integers in returned vector with the supplied values.
2565///
2566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_epi64x)
2567#[inline]
2568#[target_feature(enable = "avx")]
2569// This intrinsic has no corresponding instruction.
2570#[stable(feature = "simd_x86", since = "1.27.0")]
2571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2572pub const fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2573 _mm256_setr_epi64x(a:d, b:c, c:b, d:a)
2574}
2575
2576/// Sets packed double-precision (64-bit) floating-point elements in returned
2577/// vector with the supplied values in reverse order.
2578///
2579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_pd)
2580#[inline]
2581#[target_feature(enable = "avx")]
2582// This intrinsic has no corresponding instruction.
2583#[stable(feature = "simd_x86", since = "1.27.0")]
2584#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2585pub const fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
2586 __m256d([a, b, c, d])
2587}
2588
2589/// Sets packed single-precision (32-bit) floating-point elements in returned
2590/// vector with the supplied values in reverse order.
2591///
2592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_ps)
2593#[inline]
2594#[target_feature(enable = "avx")]
2595// This intrinsic has no corresponding instruction.
2596#[stable(feature = "simd_x86", since = "1.27.0")]
2597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2598pub const fn _mm256_setr_ps(
2599 a: f32,
2600 b: f32,
2601 c: f32,
2602 d: f32,
2603 e: f32,
2604 f: f32,
2605 g: f32,
2606 h: f32,
2607) -> __m256 {
2608 __m256([a, b, c, d, e, f, g, h])
2609}
2610
2611/// Sets packed 8-bit integers in returned vector with the supplied values in
2612/// reverse order.
2613///
2614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi8)
2615#[inline]
2616#[target_feature(enable = "avx")]
2617// This intrinsic has no corresponding instruction.
2618#[stable(feature = "simd_x86", since = "1.27.0")]
2619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2620pub const fn _mm256_setr_epi8(
2621 e00: i8,
2622 e01: i8,
2623 e02: i8,
2624 e03: i8,
2625 e04: i8,
2626 e05: i8,
2627 e06: i8,
2628 e07: i8,
2629 e08: i8,
2630 e09: i8,
2631 e10: i8,
2632 e11: i8,
2633 e12: i8,
2634 e13: i8,
2635 e14: i8,
2636 e15: i8,
2637 e16: i8,
2638 e17: i8,
2639 e18: i8,
2640 e19: i8,
2641 e20: i8,
2642 e21: i8,
2643 e22: i8,
2644 e23: i8,
2645 e24: i8,
2646 e25: i8,
2647 e26: i8,
2648 e27: i8,
2649 e28: i8,
2650 e29: i8,
2651 e30: i8,
2652 e31: i8,
2653) -> __m256i {
2654 unsafe {
2655 #[rustfmt::skip]
2656 transmute(src:i8x32::new(
2657 x0:e00, x1:e01, x2:e02, x3:e03, x4:e04, x5:e05, x6:e06, x7:e07,
2658 x8:e08, x9:e09, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
2659 x16:e16, x17:e17, x18:e18, x19:e19, x20:e20, x21:e21, x22:e22, x23:e23,
2660 x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
2661 ))
2662 }
2663}
2664
2665/// Sets packed 16-bit integers in returned vector with the supplied values in
2666/// reverse order.
2667///
2668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi16)
2669#[inline]
2670#[target_feature(enable = "avx")]
2671// This intrinsic has no corresponding instruction.
2672#[stable(feature = "simd_x86", since = "1.27.0")]
2673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2674pub const fn _mm256_setr_epi16(
2675 e00: i16,
2676 e01: i16,
2677 e02: i16,
2678 e03: i16,
2679 e04: i16,
2680 e05: i16,
2681 e06: i16,
2682 e07: i16,
2683 e08: i16,
2684 e09: i16,
2685 e10: i16,
2686 e11: i16,
2687 e12: i16,
2688 e13: i16,
2689 e14: i16,
2690 e15: i16,
2691) -> __m256i {
2692 unsafe {
2693 #[rustfmt::skip]
2694 transmute(src:i16x16::new(
2695 x0:e00, x1:e01, x2:e02, x3:e03,
2696 x4:e04, x5:e05, x6:e06, x7:e07,
2697 x8:e08, x9:e09, x10:e10, x11:e11,
2698 x12:e12, x13:e13, x14:e14, x15:e15,
2699 ))
2700 }
2701}
2702
2703/// Sets packed 32-bit integers in returned vector with the supplied values in
2704/// reverse order.
2705///
2706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi32)
2707#[inline]
2708#[target_feature(enable = "avx")]
2709// This intrinsic has no corresponding instruction.
2710#[stable(feature = "simd_x86", since = "1.27.0")]
2711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2712pub const fn _mm256_setr_epi32(
2713 e0: i32,
2714 e1: i32,
2715 e2: i32,
2716 e3: i32,
2717 e4: i32,
2718 e5: i32,
2719 e6: i32,
2720 e7: i32,
2721) -> __m256i {
2722 unsafe { transmute(src:i32x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7)) }
2723}
2724
2725/// Sets packed 64-bit integers in returned vector with the supplied values in
2726/// reverse order.
2727///
2728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_epi64x)
2729#[inline]
2730#[target_feature(enable = "avx")]
2731// This intrinsic has no corresponding instruction.
2732#[stable(feature = "simd_x86", since = "1.27.0")]
2733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2734pub const fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
2735 unsafe { transmute(src:i64x4::new(x0:a, x1:b, x2:c, x3:d)) }
2736}
2737
2738/// Broadcasts double-precision (64-bit) floating-point value `a` to all
2739/// elements of returned vector.
2740///
2741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_pd)
2742#[inline]
2743#[target_feature(enable = "avx")]
2744// This intrinsic has no corresponding instruction.
2745#[stable(feature = "simd_x86", since = "1.27.0")]
2746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2747pub const fn _mm256_set1_pd(a: f64) -> __m256d {
2748 f64x4::splat(a).as_m256d()
2749}
2750
2751/// Broadcasts single-precision (32-bit) floating-point value `a` to all
2752/// elements of returned vector.
2753///
2754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_ps)
2755#[inline]
2756#[target_feature(enable = "avx")]
2757// This intrinsic has no corresponding instruction.
2758#[stable(feature = "simd_x86", since = "1.27.0")]
2759#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2760pub const fn _mm256_set1_ps(a: f32) -> __m256 {
2761 f32x8::splat(a).as_m256()
2762}
2763
2764/// Broadcasts 8-bit integer `a` to all elements of returned vector.
2765/// This intrinsic may generate the `vpbroadcastb`.
2766///
2767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi8)
2768#[inline]
2769#[target_feature(enable = "avx")]
2770// This intrinsic has no corresponding instruction.
2771#[stable(feature = "simd_x86", since = "1.27.0")]
2772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2773pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
2774 i8x32::splat(a).as_m256i()
2775}
2776
2777/// Broadcasts 16-bit integer `a` to all elements of returned vector.
2778/// This intrinsic may generate the `vpbroadcastw`.
2779///
2780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi16)
2781#[inline]
2782#[target_feature(enable = "avx")]
2783//#[cfg_attr(test, assert_instr(vpshufb))]
2784#[cfg_attr(test, assert_instr(vinsertf128))]
2785// This intrinsic has no corresponding instruction.
2786#[stable(feature = "simd_x86", since = "1.27.0")]
2787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2788pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
2789 i16x16::splat(a).as_m256i()
2790}
2791
2792/// Broadcasts 32-bit integer `a` to all elements of returned vector.
2793/// This intrinsic may generate the `vpbroadcastd`.
2794///
2795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi32)
2796#[inline]
2797#[target_feature(enable = "avx")]
2798// This intrinsic has no corresponding instruction.
2799#[stable(feature = "simd_x86", since = "1.27.0")]
2800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2801pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
2802 i32x8::splat(a).as_m256i()
2803}
2804
2805/// Broadcasts 64-bit integer `a` to all elements of returned vector.
2806/// This intrinsic may generate the `vpbroadcastq`.
2807///
2808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set1_epi64x)
2809#[inline]
2810#[target_feature(enable = "avx")]
2811#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(vinsertf128))]
2812#[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
2813// This intrinsic has no corresponding instruction.
2814#[stable(feature = "simd_x86", since = "1.27.0")]
2815#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2816pub const fn _mm256_set1_epi64x(a: i64) -> __m256i {
2817 i64x4::splat(a).as_m256i()
2818}
2819
2820/// Cast vector of type __m256d to type __m256.
2821///
2822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_ps)
2823#[inline]
2824#[target_feature(enable = "avx")]
2825// This intrinsic is only used for compilation and does not generate any
2826// instructions, thus it has zero latency.
2827#[stable(feature = "simd_x86", since = "1.27.0")]
2828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2829pub const fn _mm256_castpd_ps(a: __m256d) -> __m256 {
2830 unsafe { transmute(src:a) }
2831}
2832
2833/// Cast vector of type __m256 to type __m256d.
2834///
2835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_pd)
2836#[inline]
2837#[target_feature(enable = "avx")]
2838// This intrinsic is only used for compilation and does not generate any
2839// instructions, thus it has zero latency.
2840#[stable(feature = "simd_x86", since = "1.27.0")]
2841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2842pub const fn _mm256_castps_pd(a: __m256) -> __m256d {
2843 unsafe { transmute(src:a) }
2844}
2845
2846/// Casts vector of type __m256 to type __m256i.
2847///
2848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps_si256)
2849#[inline]
2850#[target_feature(enable = "avx")]
2851// This intrinsic is only used for compilation and does not generate any
2852// instructions, thus it has zero latency.
2853#[stable(feature = "simd_x86", since = "1.27.0")]
2854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2855pub const fn _mm256_castps_si256(a: __m256) -> __m256i {
2856 unsafe { transmute(src:a) }
2857}
2858
2859/// Casts vector of type __m256i to type __m256.
2860///
2861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_ps)
2862#[inline]
2863#[target_feature(enable = "avx")]
2864// This intrinsic is only used for compilation and does not generate any
2865// instructions, thus it has zero latency.
2866#[stable(feature = "simd_x86", since = "1.27.0")]
2867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2868pub const fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
2869 unsafe { transmute(src:a) }
2870}
2871
2872/// Casts vector of type __m256d to type __m256i.
2873///
2874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd_si256)
2875#[inline]
2876#[target_feature(enable = "avx")]
2877// This intrinsic is only used for compilation and does not generate any
2878// instructions, thus it has zero latency.
2879#[stable(feature = "simd_x86", since = "1.27.0")]
2880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2881pub const fn _mm256_castpd_si256(a: __m256d) -> __m256i {
2882 unsafe { transmute(src:a) }
2883}
2884
2885/// Casts vector of type __m256i to type __m256d.
2886///
2887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_pd)
2888#[inline]
2889#[target_feature(enable = "avx")]
2890// This intrinsic is only used for compilation and does not generate any
2891// instructions, thus it has zero latency.
2892#[stable(feature = "simd_x86", since = "1.27.0")]
2893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2894pub const fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
2895 unsafe { transmute(src:a) }
2896}
2897
2898/// Casts vector of type __m256 to type __m128.
2899///
2900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps256_ps128)
2901#[inline]
2902#[target_feature(enable = "avx")]
2903// This intrinsic is only used for compilation and does not generate any
2904// instructions, thus it has zero latency.
2905#[stable(feature = "simd_x86", since = "1.27.0")]
2906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907pub const fn _mm256_castps256_ps128(a: __m256) -> __m128 {
2908 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
2909}
2910
2911/// Casts vector of type __m256d to type __m128d.
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd256_pd128)
2914#[inline]
2915#[target_feature(enable = "avx")]
2916// This intrinsic is only used for compilation and does not generate any
2917// instructions, thus it has zero latency.
2918#[stable(feature = "simd_x86", since = "1.27.0")]
2919#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2920pub const fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
2921 unsafe { simd_shuffle!(a, a, [0, 1]) }
2922}
2923
2924/// Casts vector of type __m256i to type __m128i.
2925///
2926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi256_si128)
2927#[inline]
2928#[target_feature(enable = "avx")]
2929// This intrinsic is only used for compilation and does not generate any
2930// instructions, thus it has zero latency.
2931#[stable(feature = "simd_x86", since = "1.27.0")]
2932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2933pub const fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2934 unsafe {
2935 let a: Simd = a.as_i64x4();
2936 let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
2937 transmute(src:dst)
2938 }
2939}
2940
2941/// Casts vector of type __m128 to type __m256;
2942/// the upper 128 bits of the result are indeterminate.
2943///
2944/// In the Intel documentation, the upper bits are declared to be "undefined".
2945/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
2946/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
2947///
2948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
2949#[inline]
2950#[target_feature(enable = "avx")]
2951// This intrinsic is only used for compilation and does not generate any
2952// instructions, thus it has zero latency.
2953#[stable(feature = "simd_x86", since = "1.27.0")]
2954#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2955pub const fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2956 unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) }
2957}
2958
2959/// Casts vector of type __m128d to type __m256d;
2960/// the upper 128 bits of the result are indeterminate.
2961///
2962/// In the Intel documentation, the upper bits are declared to be "undefined".
2963/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
2964/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
2965///
2966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
2967#[inline]
2968#[target_feature(enable = "avx")]
2969// This intrinsic is only used for compilation and does not generate any
2970// instructions, thus it has zero latency.
2971#[stable(feature = "simd_x86", since = "1.27.0")]
2972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2973pub const fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2974 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) }
2975}
2976
2977/// Casts vector of type __m128i to type __m256i;
2978/// the upper 128 bits of the result are indeterminate.
2979///
2980/// In the Intel documentation, the upper bits are declared to be "undefined".
2981/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
2982/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
2983///
2984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
2985#[inline]
2986#[target_feature(enable = "avx")]
2987// This intrinsic is only used for compilation and does not generate any
2988// instructions, thus it has zero latency.
2989#[stable(feature = "simd_x86", since = "1.27.0")]
2990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2991pub const fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
2992 unsafe {
2993 let a: Simd = a.as_i64x2();
2994 let undefined: Simd = i64x2::ZERO;
2995 let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
2996 transmute(src:dst)
2997 }
2998}
2999
3000/// Constructs a 256-bit floating-point vector of `[8 x float]` from a
3001/// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain
3002/// the value of the source vector. The upper 128 bits are set to zero.
3003///
3004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
3005#[inline]
3006#[target_feature(enable = "avx")]
3007// This intrinsic is only used for compilation and does not generate any
3008// instructions, thus it has zero latency.
3009#[stable(feature = "simd_x86", since = "1.27.0")]
3010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3011pub const fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
3012 unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) }
3013}
3014
3015/// Constructs a 256-bit integer vector from a 128-bit integer vector.
3016/// The lower 128 bits contain the value of the source vector. The upper
3017/// 128 bits are set to zero.
3018///
3019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
3020#[inline]
3021#[target_feature(enable = "avx")]
3022// This intrinsic is only used for compilation and does not generate any
3023// instructions, thus it has zero latency.
3024#[stable(feature = "simd_x86", since = "1.27.0")]
3025#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3026pub const fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
3027 unsafe {
3028 let b: Simd = i64x2::ZERO;
3029 let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
3030 transmute(src:dst)
3031 }
3032}
3033
3034/// Constructs a 256-bit floating-point vector of `[4 x double]` from a
3035/// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits
3036/// contain the value of the source vector. The upper 128 bits are set
3037/// to zero.
3038///
3039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
3040#[inline]
3041#[target_feature(enable = "avx")]
3042// This intrinsic is only used for compilation and does not generate any
3043// instructions, thus it has zero latency.
3044#[stable(feature = "simd_x86", since = "1.27.0")]
3045#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3046pub const fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
3047 unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) }
3048}
3049
3050/// Returns vector of type `__m256` with indeterminate elements.
3051/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3052/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3053/// In practice, this is typically equivalent to [`mem::zeroed`].
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_ps)
3056#[inline]
3057#[target_feature(enable = "avx")]
3058// This intrinsic has no corresponding instruction.
3059#[stable(feature = "simd_x86", since = "1.27.0")]
3060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3061pub const fn _mm256_undefined_ps() -> __m256 {
3062 const { unsafe { mem::zeroed() } }
3063}
3064
3065/// Returns vector of type `__m256d` with indeterminate elements.
3066/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3067/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3068/// In practice, this is typically equivalent to [`mem::zeroed`].
3069///
3070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_pd)
3071#[inline]
3072#[target_feature(enable = "avx")]
3073// This intrinsic has no corresponding instruction.
3074#[stable(feature = "simd_x86", since = "1.27.0")]
3075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3076pub const fn _mm256_undefined_pd() -> __m256d {
3077 const { unsafe { mem::zeroed() } }
3078}
3079
3080/// Returns vector of type __m256i with with indeterminate elements.
3081/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
3082/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
3083/// In practice, this is typically equivalent to [`mem::zeroed`].
3084///
3085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_undefined_si256)
3086#[inline]
3087#[target_feature(enable = "avx")]
3088// This intrinsic has no corresponding instruction.
3089#[stable(feature = "simd_x86", since = "1.27.0")]
3090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3091pub const fn _mm256_undefined_si256() -> __m256i {
3092 const { unsafe { mem::zeroed() } }
3093}
3094
3095/// Sets packed __m256 returned vector with the supplied values.
3096///
3097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128)
3098#[inline]
3099#[target_feature(enable = "avx")]
3100#[cfg_attr(test, assert_instr(vinsertf128))]
3101#[stable(feature = "simd_x86", since = "1.27.0")]
3102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3103pub const fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
3104 unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) }
3105}
3106
3107/// Sets packed __m256d returned vector with the supplied values.
3108///
3109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128d)
3110#[inline]
3111#[target_feature(enable = "avx")]
3112#[cfg_attr(test, assert_instr(vinsertf128))]
3113#[stable(feature = "simd_x86", since = "1.27.0")]
3114#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3115pub const fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
3116 unsafe {
3117 let hi: __m128 = transmute(src:hi);
3118 let lo: __m128 = transmute(src:lo);
3119 transmute(src:_mm256_set_m128(hi, lo))
3120 }
3121}
3122
3123/// Sets packed __m256i returned vector with the supplied values.
3124///
3125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_set_m128i)
3126#[inline]
3127#[target_feature(enable = "avx")]
3128#[cfg_attr(test, assert_instr(vinsertf128))]
3129#[stable(feature = "simd_x86", since = "1.27.0")]
3130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3131pub const fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
3132 unsafe {
3133 let hi: __m128 = transmute(src:hi);
3134 let lo: __m128 = transmute(src:lo);
3135 transmute(src:_mm256_set_m128(hi, lo))
3136 }
3137}
3138
3139/// Sets packed __m256 returned vector with the supplied values.
3140///
3141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128)
3142#[inline]
3143#[target_feature(enable = "avx")]
3144#[cfg_attr(test, assert_instr(vinsertf128))]
3145#[stable(feature = "simd_x86", since = "1.27.0")]
3146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3147pub const fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
3148 _mm256_set_m128(hi, lo)
3149}
3150
3151/// Sets packed __m256d returned vector with the supplied values.
3152///
3153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128d)
3154#[inline]
3155#[target_feature(enable = "avx")]
3156#[cfg_attr(test, assert_instr(vinsertf128))]
3157#[stable(feature = "simd_x86", since = "1.27.0")]
3158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3159pub const fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
3160 _mm256_set_m128d(hi, lo)
3161}
3162
3163/// Sets packed __m256i returned vector with the supplied values.
3164///
3165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setr_m128i)
3166#[inline]
3167#[target_feature(enable = "avx")]
3168#[cfg_attr(test, assert_instr(vinsertf128))]
3169#[stable(feature = "simd_x86", since = "1.27.0")]
3170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3171pub const fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
3172 _mm256_set_m128i(hi, lo)
3173}
3174
3175/// Loads two 128-bit values (composed of 4 packed single-precision (32-bit)
3176/// floating-point elements) from memory, and combine them into a 256-bit
3177/// value.
3178/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3179///
3180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
3181#[inline]
3182#[target_feature(enable = "avx")]
3183// This intrinsic has no corresponding instruction.
3184#[stable(feature = "simd_x86", since = "1.27.0")]
3185#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3186pub const unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
3187 let a: __m256 = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
3188 _mm256_insertf128_ps::<1>(a, b:_mm_loadu_ps(hiaddr))
3189}
3190
3191/// Loads two 128-bit values (composed of 2 packed double-precision (64-bit)
3192/// floating-point elements) from memory, and combine them into a 256-bit
3193/// value.
3194/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3195///
3196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
3197#[inline]
3198#[target_feature(enable = "avx")]
3199// This intrinsic has no corresponding instruction.
3200#[stable(feature = "simd_x86", since = "1.27.0")]
3201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3202pub const unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
3203 let a: __m256d = _mm256_castpd128_pd256(_mm_loadu_pd(mem_addr:loaddr));
3204 _mm256_insertf128_pd::<1>(a, b:_mm_loadu_pd(mem_addr:hiaddr))
3205}
3206
3207/// Loads two 128-bit values (composed of integer data) from memory, and combine
3208/// them into a 256-bit value.
3209/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3210///
3211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
3212#[inline]
3213#[target_feature(enable = "avx")]
3214// This intrinsic has no corresponding instruction.
3215#[stable(feature = "simd_x86", since = "1.27.0")]
3216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3217pub const unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
3218 let a: __m256i = _mm256_castsi128_si256(_mm_loadu_si128(mem_addr:loaddr));
3219 _mm256_insertf128_si256::<1>(a, b:_mm_loadu_si128(mem_addr:hiaddr))
3220}
3221
3222/// Stores the high and low 128-bit halves (each composed of 4 packed
3223/// single-precision (32-bit) floating-point elements) from `a` into memory two
3224/// different 128-bit locations.
3225/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3226///
3227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
3228#[inline]
3229#[target_feature(enable = "avx")]
3230// This intrinsic has no corresponding instruction.
3231#[stable(feature = "simd_x86", since = "1.27.0")]
3232#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3233pub const unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
3234 let lo: __m128 = _mm256_castps256_ps128(a);
3235 _mm_storeu_ps(p:loaddr, a:lo);
3236 let hi: __m128 = _mm256_extractf128_ps::<1>(a);
3237 _mm_storeu_ps(p:hiaddr, a:hi);
3238}
3239
3240/// Stores the high and low 128-bit halves (each composed of 2 packed
3241/// double-precision (64-bit) floating-point elements) from `a` into memory two
3242/// different 128-bit locations.
3243/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3244///
3245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
3246#[inline]
3247#[target_feature(enable = "avx")]
3248// This intrinsic has no corresponding instruction.
3249#[stable(feature = "simd_x86", since = "1.27.0")]
3250#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3251pub const unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
3252 let lo: __m128d = _mm256_castpd256_pd128(a);
3253 _mm_storeu_pd(mem_addr:loaddr, a:lo);
3254 let hi: __m128d = _mm256_extractf128_pd::<1>(a);
3255 _mm_storeu_pd(mem_addr:hiaddr, a:hi);
3256}
3257
3258/// Stores the high and low 128-bit halves (each composed of integer data) from
3259/// `a` into memory two different 128-bit locations.
3260/// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary.
3261///
3262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
3263#[inline]
3264#[target_feature(enable = "avx")]
3265// This intrinsic has no corresponding instruction.
3266#[stable(feature = "simd_x86", since = "1.27.0")]
3267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3268pub const unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
3269 let lo: __m128i = _mm256_castsi256_si128(a);
3270 _mm_storeu_si128(mem_addr:loaddr, a:lo);
3271 let hi: __m128i = _mm256_extractf128_si256::<1>(a);
3272 _mm_storeu_si128(mem_addr:hiaddr, a:hi);
3273}
3274
3275/// Returns the first element of the input vector of `[8 x float]`.
3276///
3277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtss_f32)
3278#[inline]
3279#[target_feature(enable = "avx")]
3280//#[cfg_attr(test, assert_instr(movss))] FIXME
3281#[stable(feature = "simd_x86", since = "1.27.0")]
3282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3283pub const fn _mm256_cvtss_f32(a: __m256) -> f32 {
3284 unsafe { simd_extract!(a, 0) }
3285}
3286
3287// LLVM intrinsics used in the above functions
3288#[allow(improper_ctypes)]
3289unsafe extern "C" {
3290 #[link_name = "llvm.x86.avx.round.pd.256"]
3291 unsafefn roundpd256(a: __m256d, b: i32) -> __m256d;
3292 #[link_name = "llvm.x86.avx.round.ps.256"]
3293 unsafefn roundps256(a: __m256, b: i32) -> __m256;
3294 #[link_name = "llvm.x86.avx.dp.ps.256"]
3295 unsafefn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
3296 #[link_name = "llvm.x86.sse2.cmp.pd"]
3297 unsafefn vcmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3298 #[link_name = "llvm.x86.avx.cmp.pd.256"]
3299 unsafefn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d;
3300 #[link_name = "llvm.x86.sse.cmp.ps"]
3301 unsafefn vcmpps(a: __m128, b: __m128, imm8: i8) -> __m128;
3302 #[link_name = "llvm.x86.avx.cmp.ps.256"]
3303 unsafefn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256;
3304 #[link_name = "llvm.x86.sse2.cmp.sd"]
3305 unsafefn vcmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3306 #[link_name = "llvm.x86.sse.cmp.ss"]
3307 unsafefn vcmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
3308 #[link_name = "llvm.x86.avx.cvt.ps2dq.256"]
3309 unsafefn vcvtps2dq(a: __m256) -> i32x8;
3310 #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"]
3311 unsafefn vcvttpd2dq(a: __m256d) -> i32x4;
3312 #[link_name = "llvm.x86.avx.cvt.pd2dq.256"]
3313 unsafefn vcvtpd2dq(a: __m256d) -> i32x4;
3314 #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"]
3315 unsafefn vcvttps2dq(a: __m256) -> i32x8;
3316 #[link_name = "llvm.x86.avx.vzeroall"]
3317 unsafefn vzeroall();
3318 #[link_name = "llvm.x86.avx.vzeroupper"]
3319 unsafefn vzeroupper();
3320 #[link_name = "llvm.x86.avx.vpermilvar.ps.256"]
3321 unsafefn vpermilps256(a: __m256, b: i32x8) -> __m256;
3322 #[link_name = "llvm.x86.avx.vpermilvar.ps"]
3323 unsafefn vpermilps(a: __m128, b: i32x4) -> __m128;
3324 #[link_name = "llvm.x86.avx.vpermilvar.pd.256"]
3325 unsafefn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
3326 #[link_name = "llvm.x86.avx.vpermilvar.pd"]
3327 unsafefn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
3328 #[link_name = "llvm.x86.avx.ldu.dq.256"]
3329 unsafefn vlddqu(mem_addr: *const i8) -> i8x32;
3330 #[link_name = "llvm.x86.avx.rcp.ps.256"]
3331 unsafefn vrcpps(a: __m256) -> __m256;
3332 #[link_name = "llvm.x86.avx.rsqrt.ps.256"]
3333 unsafefn vrsqrtps(a: __m256) -> __m256;
3334 #[link_name = "llvm.x86.avx.ptestnzc.256"]
3335 unsafefn ptestnzc256(a: i64x4, b: i64x4) -> i32;
3336 #[link_name = "llvm.x86.avx.vtestz.pd.256"]
3337 unsafefn vtestzpd256(a: __m256d, b: __m256d) -> i32;
3338 #[link_name = "llvm.x86.avx.vtestc.pd.256"]
3339 unsafefn vtestcpd256(a: __m256d, b: __m256d) -> i32;
3340 #[link_name = "llvm.x86.avx.vtestnzc.pd.256"]
3341 unsafefn vtestnzcpd256(a: __m256d, b: __m256d) -> i32;
3342 #[link_name = "llvm.x86.avx.vtestnzc.pd"]
3343 unsafefn vtestnzcpd(a: __m128d, b: __m128d) -> i32;
3344 #[link_name = "llvm.x86.avx.vtestz.ps.256"]
3345 unsafefn vtestzps256(a: __m256, b: __m256) -> i32;
3346 #[link_name = "llvm.x86.avx.vtestc.ps.256"]
3347 unsafefn vtestcps256(a: __m256, b: __m256) -> i32;
3348 #[link_name = "llvm.x86.avx.vtestnzc.ps.256"]
3349 unsafefn vtestnzcps256(a: __m256, b: __m256) -> i32;
3350 #[link_name = "llvm.x86.avx.vtestnzc.ps"]
3351 unsafefn vtestnzcps(a: __m128, b: __m128) -> i32;
3352 #[link_name = "llvm.x86.avx.min.ps.256"]
3353 unsafefn vminps(a: __m256, b: __m256) -> __m256;
3354 #[link_name = "llvm.x86.avx.max.ps.256"]
3355 unsafefn vmaxps(a: __m256, b: __m256) -> __m256;
3356 #[link_name = "llvm.x86.avx.min.pd.256"]
3357 unsafefn vminpd(a: __m256d, b: __m256d) -> __m256d;
3358 #[link_name = "llvm.x86.avx.max.pd.256"]
3359 unsafefn vmaxpd(a: __m256d, b: __m256d) -> __m256d;
3360}
3361
3362#[cfg(test)]
3363mod tests {
3364 use crate::core_arch::assert_eq_const as assert_eq;
3365 use crate::core_arch::simd::*;
3366 use crate::hint::black_box;
3367 use crate::ptr;
3368 use stdarch_test::simd_test;
3369
3370 use crate::core_arch::x86::*;
3371
3372 #[simd_test(enable = "avx")]
3373 const fn test_mm256_add_pd() {
3374 let a = _mm256_setr_pd(1., 2., 3., 4.);
3375 let b = _mm256_setr_pd(5., 6., 7., 8.);
3376 let r = _mm256_add_pd(a, b);
3377 let e = _mm256_setr_pd(6., 8., 10., 12.);
3378 assert_eq_m256d(r, e);
3379 }
3380
3381 #[simd_test(enable = "avx")]
3382 const fn test_mm256_add_ps() {
3383 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3384 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3385 let r = _mm256_add_ps(a, b);
3386 let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.);
3387 assert_eq_m256(r, e);
3388 }
3389
3390 #[simd_test(enable = "avx")]
3391 const fn test_mm256_and_pd() {
3392 let a = _mm256_set1_pd(1.);
3393 let b = _mm256_set1_pd(0.6);
3394 let r = _mm256_and_pd(a, b);
3395 let e = _mm256_set1_pd(0.5);
3396 assert_eq_m256d(r, e);
3397 }
3398
3399 #[simd_test(enable = "avx")]
3400 const fn test_mm256_and_ps() {
3401 let a = _mm256_set1_ps(1.);
3402 let b = _mm256_set1_ps(0.6);
3403 let r = _mm256_and_ps(a, b);
3404 let e = _mm256_set1_ps(0.5);
3405 assert_eq_m256(r, e);
3406 }
3407
3408 #[simd_test(enable = "avx")]
3409 const fn test_mm256_or_pd() {
3410 let a = _mm256_set1_pd(1.);
3411 let b = _mm256_set1_pd(0.6);
3412 let r = _mm256_or_pd(a, b);
3413 let e = _mm256_set1_pd(1.2);
3414 assert_eq_m256d(r, e);
3415 }
3416
3417 #[simd_test(enable = "avx")]
3418 const fn test_mm256_or_ps() {
3419 let a = _mm256_set1_ps(1.);
3420 let b = _mm256_set1_ps(0.6);
3421 let r = _mm256_or_ps(a, b);
3422 let e = _mm256_set1_ps(1.2);
3423 assert_eq_m256(r, e);
3424 }
3425
3426 #[simd_test(enable = "avx")]
3427 const fn test_mm256_shuffle_pd() {
3428 let a = _mm256_setr_pd(1., 4., 5., 8.);
3429 let b = _mm256_setr_pd(2., 3., 6., 7.);
3430 let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
3431 let e = _mm256_setr_pd(4., 3., 8., 7.);
3432 assert_eq_m256d(r, e);
3433 }
3434
3435 #[simd_test(enable = "avx")]
3436 const fn test_mm256_shuffle_ps() {
3437 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3438 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3439 let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
3440 let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.);
3441 assert_eq_m256(r, e);
3442 }
3443
3444 #[simd_test(enable = "avx")]
3445 const fn test_mm256_andnot_pd() {
3446 let a = _mm256_set1_pd(0.);
3447 let b = _mm256_set1_pd(0.6);
3448 let r = _mm256_andnot_pd(a, b);
3449 assert_eq_m256d(r, b);
3450 }
3451
3452 #[simd_test(enable = "avx")]
3453 const fn test_mm256_andnot_ps() {
3454 let a = _mm256_set1_ps(0.);
3455 let b = _mm256_set1_ps(0.6);
3456 let r = _mm256_andnot_ps(a, b);
3457 assert_eq_m256(r, b);
3458 }
3459
3460 #[simd_test(enable = "avx")]
3461 fn test_mm256_max_pd() {
3462 let a = _mm256_setr_pd(1., 4., 5., 8.);
3463 let b = _mm256_setr_pd(2., 3., 6., 7.);
3464 let r = _mm256_max_pd(a, b);
3465 let e = _mm256_setr_pd(2., 4., 6., 8.);
3466 assert_eq_m256d(r, e);
3467 // > If the values being compared are both 0.0s (of either sign), the
3468 // > value in the second operand (source operand) is returned.
3469 let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3470 let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3471 let wu = _mm256_castpd_si256(w).as_u64x4();
3472 let xu = _mm256_castpd_si256(x).as_u64x4();
3473 assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
3474 assert_eq!(xu, u64x4::splat(0u64));
3475 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3476 // > second operand (source operand), either a NaN or a valid
3477 // > floating-point value, is written to the result.
3478 let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3479 let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3480 assert_eq_m256d(y, _mm256_set1_pd(0.0));
3481 let zf = *z.as_f64x4().as_array();
3482 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3483 }
3484
3485 #[simd_test(enable = "avx")]
3486 fn test_mm256_max_ps() {
3487 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3488 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3489 let r = _mm256_max_ps(a, b);
3490 let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
3491 assert_eq_m256(r, e);
3492 // > If the values being compared are both 0.0s (of either sign), the
3493 // > value in the second operand (source operand) is returned.
3494 let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3495 let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3496 let wu = _mm256_castps_si256(w).as_u32x8();
3497 let xu = _mm256_castps_si256(x).as_u32x8();
3498 assert_eq!(wu, u32x8::splat(0x8000_0000u32));
3499 assert_eq!(xu, u32x8::splat(0u32));
3500 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3501 // > second operand (source operand), either a NaN or a valid
3502 // > floating-point value, is written to the result.
3503 let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3504 let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3505 assert_eq_m256(y, _mm256_set1_ps(0.0));
3506 let zf = *z.as_f32x8().as_array();
3507 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3508 }
3509
3510 #[simd_test(enable = "avx")]
3511 fn test_mm256_min_pd() {
3512 let a = _mm256_setr_pd(1., 4., 5., 8.);
3513 let b = _mm256_setr_pd(2., 3., 6., 7.);
3514 let r = _mm256_min_pd(a, b);
3515 let e = _mm256_setr_pd(1., 3., 5., 7.);
3516 assert_eq_m256d(r, e);
3517 // > If the values being compared are both 0.0s (of either sign), the
3518 // > value in the second operand (source operand) is returned.
3519 let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
3520 let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
3521 let wu = _mm256_castpd_si256(w).as_u64x4();
3522 let xu = _mm256_castpd_si256(x).as_u64x4();
3523 assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
3524 assert_eq!(xu, u64x4::splat(0u64));
3525 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3526 // > second operand (source operand), either a NaN or a valid
3527 // > floating-point value, is written to the result.
3528 let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
3529 let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
3530 assert_eq_m256d(y, _mm256_set1_pd(0.0));
3531 let zf = *z.as_f64x4().as_array();
3532 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3533 }
3534
3535 #[simd_test(enable = "avx")]
3536 fn test_mm256_min_ps() {
3537 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3538 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3539 let r = _mm256_min_ps(a, b);
3540 let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
3541 assert_eq_m256(r, e);
3542 // > If the values being compared are both 0.0s (of either sign), the
3543 // > value in the second operand (source operand) is returned.
3544 let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
3545 let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
3546 let wu = _mm256_castps_si256(w).as_u32x8();
3547 let xu = _mm256_castps_si256(x).as_u32x8();
3548 assert_eq!(wu, u32x8::splat(0x8000_0000u32));
3549 assert_eq!(xu, u32x8::splat(0u32));
3550 // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
3551 // > second operand (source operand), either a NaN or a valid
3552 // > floating-point value, is written to the result.
3553 let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
3554 let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
3555 assert_eq_m256(y, _mm256_set1_ps(0.0));
3556 let zf = *z.as_f32x8().as_array();
3557 assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
3558 }
3559
3560 #[simd_test(enable = "avx")]
3561 const fn test_mm256_mul_pd() {
3562 let a = _mm256_setr_pd(1., 2., 3., 4.);
3563 let b = _mm256_setr_pd(5., 6., 7., 8.);
3564 let r = _mm256_mul_pd(a, b);
3565 let e = _mm256_setr_pd(5., 12., 21., 32.);
3566 assert_eq_m256d(r, e);
3567 }
3568
3569 #[simd_test(enable = "avx")]
3570 const fn test_mm256_mul_ps() {
3571 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
3572 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
3573 let r = _mm256_mul_ps(a, b);
3574 let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.);
3575 assert_eq_m256(r, e);
3576 }
3577
3578 #[simd_test(enable = "avx")]
3579 const fn test_mm256_addsub_pd() {
3580 let a = _mm256_setr_pd(1., 2., 3., 4.);
3581 let b = _mm256_setr_pd(5., 6., 7., 8.);
3582 let r = _mm256_addsub_pd(a, b);
3583 let e = _mm256_setr_pd(-4., 8., -4., 12.);
3584 assert_eq_m256d(r, e);
3585 }
3586
3587 #[simd_test(enable = "avx")]
3588 const fn test_mm256_addsub_ps() {
3589 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3590 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3591 let r = _mm256_addsub_ps(a, b);
3592 let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.);
3593 assert_eq_m256(r, e);
3594 }
3595
3596 #[simd_test(enable = "avx")]
3597 const fn test_mm256_sub_pd() {
3598 let a = _mm256_setr_pd(1., 2., 3., 4.);
3599 let b = _mm256_setr_pd(5., 6., 7., 8.);
3600 let r = _mm256_sub_pd(a, b);
3601 let e = _mm256_setr_pd(-4., -4., -4., -4.);
3602 assert_eq_m256d(r, e);
3603 }
3604
3605 #[simd_test(enable = "avx")]
3606 const fn test_mm256_sub_ps() {
3607 let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
3608 let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
3609 let r = _mm256_sub_ps(a, b);
3610 let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.);
3611 assert_eq_m256(r, e);
3612 }
3613
3614 #[simd_test(enable = "avx")]
3615 fn test_mm256_round_pd() {
3616 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3617 let result_closest = _mm256_round_pd::<0b0000>(a);
3618 let result_down = _mm256_round_pd::<0b0001>(a);
3619 let result_up = _mm256_round_pd::<0b0010>(a);
3620 let expected_closest = _mm256_setr_pd(2., 2., 4., -1.);
3621 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3622 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3623 assert_eq_m256d(result_closest, expected_closest);
3624 assert_eq_m256d(result_down, expected_down);
3625 assert_eq_m256d(result_up, expected_up);
3626 }
3627
3628 #[simd_test(enable = "avx")]
3629 const fn test_mm256_floor_pd() {
3630 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3631 let result_down = _mm256_floor_pd(a);
3632 let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
3633 assert_eq_m256d(result_down, expected_down);
3634 }
3635
3636 #[simd_test(enable = "avx")]
3637 const fn test_mm256_ceil_pd() {
3638 let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
3639 let result_up = _mm256_ceil_pd(a);
3640 let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
3641 assert_eq_m256d(result_up, expected_up);
3642 }
3643
3644 #[simd_test(enable = "avx")]
3645 fn test_mm256_round_ps() {
3646 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3647 let result_closest = _mm256_round_ps::<0b0000>(a);
3648 let result_down = _mm256_round_ps::<0b0001>(a);
3649 let result_up = _mm256_round_ps::<0b0010>(a);
3650 let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.);
3651 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3652 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3653 assert_eq_m256(result_closest, expected_closest);
3654 assert_eq_m256(result_down, expected_down);
3655 assert_eq_m256(result_up, expected_up);
3656 }
3657
3658 #[simd_test(enable = "avx")]
3659 const fn test_mm256_floor_ps() {
3660 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3661 let result_down = _mm256_floor_ps(a);
3662 let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
3663 assert_eq_m256(result_down, expected_down);
3664 }
3665
3666 #[simd_test(enable = "avx")]
3667 const fn test_mm256_ceil_ps() {
3668 let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
3669 let result_up = _mm256_ceil_ps(a);
3670 let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
3671 assert_eq_m256(result_up, expected_up);
3672 }
3673
3674 #[simd_test(enable = "avx")]
3675 fn test_mm256_sqrt_pd() {
3676 let a = _mm256_setr_pd(4., 9., 16., 25.);
3677 let r = _mm256_sqrt_pd(a);
3678 let e = _mm256_setr_pd(2., 3., 4., 5.);
3679 assert_eq_m256d(r, e);
3680 }
3681
3682 #[simd_test(enable = "avx")]
3683 fn test_mm256_sqrt_ps() {
3684 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3685 let r = _mm256_sqrt_ps(a);
3686 let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
3687 assert_eq_m256(r, e);
3688 }
3689
3690 #[simd_test(enable = "avx")]
3691 const fn test_mm256_div_ps() {
3692 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3693 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3694 let r = _mm256_div_ps(a, b);
3695 let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5);
3696 assert_eq_m256(r, e);
3697 }
3698
3699 #[simd_test(enable = "avx")]
3700 const fn test_mm256_div_pd() {
3701 let a = _mm256_setr_pd(4., 9., 16., 25.);
3702 let b = _mm256_setr_pd(4., 3., 2., 5.);
3703 let r = _mm256_div_pd(a, b);
3704 let e = _mm256_setr_pd(1., 3., 8., 5.);
3705 assert_eq_m256d(r, e);
3706 }
3707
3708 #[simd_test(enable = "avx")]
3709 const fn test_mm256_blend_pd() {
3710 let a = _mm256_setr_pd(4., 9., 16., 25.);
3711 let b = _mm256_setr_pd(4., 3., 2., 5.);
3712 let r = _mm256_blend_pd::<0x0>(a, b);
3713 assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.));
3714 let r = _mm256_blend_pd::<0x3>(a, b);
3715 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.));
3716 let r = _mm256_blend_pd::<0xF>(a, b);
3717 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.));
3718 }
3719
3720 #[simd_test(enable = "avx")]
3721 const fn test_mm256_blend_ps() {
3722 let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
3723 let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
3724 let r = _mm256_blend_ps::<0x0>(a, b);
3725 assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.));
3726 let r = _mm256_blend_ps::<0x3>(a, b);
3727 assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.));
3728 let r = _mm256_blend_ps::<0xF>(a, b);
3729 assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.));
3730 }
3731
3732 #[simd_test(enable = "avx")]
3733 const fn test_mm256_blendv_pd() {
3734 let a = _mm256_setr_pd(4., 9., 16., 25.);
3735 let b = _mm256_setr_pd(4., 3., 2., 5.);
3736 let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
3737 let r = _mm256_blendv_pd(a, b, c);
3738 let e = _mm256_setr_pd(4., 9., 2., 5.);
3739 assert_eq_m256d(r, e);
3740 }
3741
3742 #[simd_test(enable = "avx")]
3743 const fn test_mm256_blendv_ps() {
3744 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3745 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3746 #[rustfmt::skip]
3747 let c = _mm256_setr_ps(
3748 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32,
3749 );
3750 let r = _mm256_blendv_ps(a, b, c);
3751 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
3752 assert_eq_m256(r, e);
3753 }
3754
3755 #[simd_test(enable = "avx")]
3756 fn test_mm256_dp_ps() {
3757 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3758 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3759 let r = _mm256_dp_ps::<0xFF>(a, b);
3760 let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.);
3761 assert_eq_m256(r, e);
3762 }
3763
3764 #[simd_test(enable = "avx")]
3765 const fn test_mm256_hadd_pd() {
3766 let a = _mm256_setr_pd(4., 9., 16., 25.);
3767 let b = _mm256_setr_pd(4., 3., 2., 5.);
3768 let r = _mm256_hadd_pd(a, b);
3769 let e = _mm256_setr_pd(13., 7., 41., 7.);
3770 assert_eq_m256d(r, e);
3771
3772 let a = _mm256_setr_pd(1., 2., 3., 4.);
3773 let b = _mm256_setr_pd(5., 6., 7., 8.);
3774 let r = _mm256_hadd_pd(a, b);
3775 let e = _mm256_setr_pd(3., 11., 7., 15.);
3776 assert_eq_m256d(r, e);
3777 }
3778
3779 #[simd_test(enable = "avx")]
3780 const fn test_mm256_hadd_ps() {
3781 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3782 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3783 let r = _mm256_hadd_ps(a, b);
3784 let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.);
3785 assert_eq_m256(r, e);
3786
3787 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3788 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3789 let r = _mm256_hadd_ps(a, b);
3790 let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.);
3791 assert_eq_m256(r, e);
3792 }
3793
3794 #[simd_test(enable = "avx")]
3795 const fn test_mm256_hsub_pd() {
3796 let a = _mm256_setr_pd(4., 9., 16., 25.);
3797 let b = _mm256_setr_pd(4., 3., 2., 5.);
3798 let r = _mm256_hsub_pd(a, b);
3799 let e = _mm256_setr_pd(-5., 1., -9., -3.);
3800 assert_eq_m256d(r, e);
3801
3802 let a = _mm256_setr_pd(1., 2., 3., 4.);
3803 let b = _mm256_setr_pd(5., 6., 7., 8.);
3804 let r = _mm256_hsub_pd(a, b);
3805 let e = _mm256_setr_pd(-1., -1., -1., -1.);
3806 assert_eq_m256d(r, e);
3807 }
3808
3809 #[simd_test(enable = "avx")]
3810 const fn test_mm256_hsub_ps() {
3811 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3812 let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3813 let r = _mm256_hsub_ps(a, b);
3814 let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.);
3815 assert_eq_m256(r, e);
3816
3817 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3818 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3819 let r = _mm256_hsub_ps(a, b);
3820 let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.);
3821 assert_eq_m256(r, e);
3822 }
3823
3824 #[simd_test(enable = "avx")]
3825 const fn test_mm256_xor_pd() {
3826 let a = _mm256_setr_pd(4., 9., 16., 25.);
3827 let b = _mm256_set1_pd(0.);
3828 let r = _mm256_xor_pd(a, b);
3829 assert_eq_m256d(r, a);
3830 }
3831
3832 #[simd_test(enable = "avx")]
3833 const fn test_mm256_xor_ps() {
3834 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3835 let b = _mm256_set1_ps(0.);
3836 let r = _mm256_xor_ps(a, b);
3837 assert_eq_m256(r, a);
3838 }
3839
3840 #[simd_test(enable = "avx")]
3841 fn test_mm_cmp_pd() {
3842 let a = _mm_setr_pd(4., 9.);
3843 let b = _mm_setr_pd(4., 3.);
3844 let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
3845 assert!(get_m128d(r, 0).is_nan());
3846 assert!(get_m128d(r, 1).is_nan());
3847 }
3848
3849 #[simd_test(enable = "avx")]
3850 fn test_mm256_cmp_pd() {
3851 let a = _mm256_setr_pd(1., 2., 3., 4.);
3852 let b = _mm256_setr_pd(5., 6., 7., 8.);
3853 let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
3854 let e = _mm256_set1_pd(0.);
3855 assert_eq_m256d(r, e);
3856 }
3857
3858 #[simd_test(enable = "avx")]
3859 fn test_mm_cmp_ps() {
3860 let a = _mm_setr_ps(4., 3., 2., 5.);
3861 let b = _mm_setr_ps(4., 9., 16., 25.);
3862 let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
3863 assert!(get_m128(r, 0).is_nan());
3864 assert_eq!(get_m128(r, 1), 0.);
3865 assert_eq!(get_m128(r, 2), 0.);
3866 assert_eq!(get_m128(r, 3), 0.);
3867 }
3868
3869 #[simd_test(enable = "avx")]
3870 fn test_mm256_cmp_ps() {
3871 let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
3872 let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
3873 let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
3874 let e = _mm256_set1_ps(0.);
3875 assert_eq_m256(r, e);
3876 }
3877
3878 #[simd_test(enable = "avx")]
3879 fn test_mm_cmp_sd() {
3880 let a = _mm_setr_pd(4., 9.);
3881 let b = _mm_setr_pd(4., 3.);
3882 let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
3883 assert!(get_m128d(r, 0).is_nan());
3884 assert_eq!(get_m128d(r, 1), 9.);
3885 }
3886
3887 #[simd_test(enable = "avx")]
3888 fn test_mm_cmp_ss() {
3889 let a = _mm_setr_ps(4., 3., 2., 5.);
3890 let b = _mm_setr_ps(4., 9., 16., 25.);
3891 let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
3892 assert!(get_m128(r, 0).is_nan());
3893 assert_eq!(get_m128(r, 1), 3.);
3894 assert_eq!(get_m128(r, 2), 2.);
3895 assert_eq!(get_m128(r, 3), 5.);
3896 }
3897
3898 #[simd_test(enable = "avx")]
3899 const fn test_mm256_cvtepi32_pd() {
3900 let a = _mm_setr_epi32(4, 9, 16, 25);
3901 let r = _mm256_cvtepi32_pd(a);
3902 let e = _mm256_setr_pd(4., 9., 16., 25.);
3903 assert_eq_m256d(r, e);
3904 }
3905
3906 #[simd_test(enable = "avx")]
3907 const fn test_mm256_cvtepi32_ps() {
3908 let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3909 let r = _mm256_cvtepi32_ps(a);
3910 let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3911 assert_eq_m256(r, e);
3912 }
3913
3914 #[simd_test(enable = "avx")]
3915 const fn test_mm256_cvtpd_ps() {
3916 let a = _mm256_setr_pd(4., 9., 16., 25.);
3917 let r = _mm256_cvtpd_ps(a);
3918 let e = _mm_setr_ps(4., 9., 16., 25.);
3919 assert_eq_m128(r, e);
3920 }
3921
3922 #[simd_test(enable = "avx")]
3923 fn test_mm256_cvtps_epi32() {
3924 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3925 let r = _mm256_cvtps_epi32(a);
3926 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3927 assert_eq_m256i(r, e);
3928 }
3929
3930 #[simd_test(enable = "avx")]
3931 const fn test_mm256_cvtps_pd() {
3932 let a = _mm_setr_ps(4., 9., 16., 25.);
3933 let r = _mm256_cvtps_pd(a);
3934 let e = _mm256_setr_pd(4., 9., 16., 25.);
3935 assert_eq_m256d(r, e);
3936 }
3937
3938 #[simd_test(enable = "avx")]
3939 const fn test_mm256_cvtsd_f64() {
3940 let a = _mm256_setr_pd(1., 2., 3., 4.);
3941 let r = _mm256_cvtsd_f64(a);
3942 assert_eq!(r, 1.);
3943 }
3944
3945 #[simd_test(enable = "avx")]
3946 fn test_mm256_cvttpd_epi32() {
3947 let a = _mm256_setr_pd(4., 9., 16., 25.);
3948 let r = _mm256_cvttpd_epi32(a);
3949 let e = _mm_setr_epi32(4, 9, 16, 25);
3950 assert_eq_m128i(r, e);
3951 }
3952
3953 #[simd_test(enable = "avx")]
3954 fn test_mm256_cvtpd_epi32() {
3955 let a = _mm256_setr_pd(4., 9., 16., 25.);
3956 let r = _mm256_cvtpd_epi32(a);
3957 let e = _mm_setr_epi32(4, 9, 16, 25);
3958 assert_eq_m128i(r, e);
3959 }
3960
3961 #[simd_test(enable = "avx")]
3962 fn test_mm256_cvttps_epi32() {
3963 let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
3964 let r = _mm256_cvttps_epi32(a);
3965 let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
3966 assert_eq_m256i(r, e);
3967 }
3968
3969 #[simd_test(enable = "avx")]
3970 const fn test_mm256_extractf128_ps() {
3971 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
3972 let r = _mm256_extractf128_ps::<0>(a);
3973 let e = _mm_setr_ps(4., 3., 2., 5.);
3974 assert_eq_m128(r, e);
3975 }
3976
3977 #[simd_test(enable = "avx")]
3978 const fn test_mm256_extractf128_pd() {
3979 let a = _mm256_setr_pd(4., 3., 2., 5.);
3980 let r = _mm256_extractf128_pd::<0>(a);
3981 let e = _mm_setr_pd(4., 3.);
3982 assert_eq_m128d(r, e);
3983 }
3984
3985 #[simd_test(enable = "avx")]
3986 const fn test_mm256_extractf128_si256() {
3987 let a = _mm256_setr_epi64x(4, 3, 2, 5);
3988 let r = _mm256_extractf128_si256::<0>(a);
3989 let e = _mm_setr_epi64x(4, 3);
3990 assert_eq_m128i(r, e);
3991 }
3992
3993 #[simd_test(enable = "avx")]
3994 const fn test_mm256_extract_epi32() {
3995 let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3996 let r1 = _mm256_extract_epi32::<0>(a);
3997 let r2 = _mm256_extract_epi32::<3>(a);
3998 assert_eq!(r1, -1);
3999 assert_eq!(r2, 3);
4000 }
4001
4002 #[simd_test(enable = "avx")]
4003 const fn test_mm256_cvtsi256_si32() {
4004 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4005 let r = _mm256_cvtsi256_si32(a);
4006 assert_eq!(r, 1);
4007 }
4008
4009 #[simd_test(enable = "avx")]
4010 #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
4011 fn test_mm256_zeroall() {
4012 _mm256_zeroall();
4013 }
4014
4015 #[simd_test(enable = "avx")]
4016 #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
4017 fn test_mm256_zeroupper() {
4018 _mm256_zeroupper();
4019 }
4020
4021 #[simd_test(enable = "avx")]
4022 fn test_mm256_permutevar_ps() {
4023 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4024 let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4025 let r = _mm256_permutevar_ps(a, b);
4026 let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.);
4027 assert_eq_m256(r, e);
4028 }
4029
4030 #[simd_test(enable = "avx")]
4031 fn test_mm_permutevar_ps() {
4032 let a = _mm_setr_ps(4., 3., 2., 5.);
4033 let b = _mm_setr_epi32(1, 2, 3, 4);
4034 let r = _mm_permutevar_ps(a, b);
4035 let e = _mm_setr_ps(3., 2., 5., 4.);
4036 assert_eq_m128(r, e);
4037 }
4038
4039 #[simd_test(enable = "avx")]
4040 const fn test_mm256_permute_ps() {
4041 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4042 let r = _mm256_permute_ps::<0x1b>(a);
4043 let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
4044 assert_eq_m256(r, e);
4045 }
4046
4047 #[simd_test(enable = "avx")]
4048 const fn test_mm_permute_ps() {
4049 let a = _mm_setr_ps(4., 3., 2., 5.);
4050 let r = _mm_permute_ps::<0x1b>(a);
4051 let e = _mm_setr_ps(5., 2., 3., 4.);
4052 assert_eq_m128(r, e);
4053 }
4054
4055 #[simd_test(enable = "avx")]
4056 fn test_mm256_permutevar_pd() {
4057 let a = _mm256_setr_pd(4., 3., 2., 5.);
4058 let b = _mm256_setr_epi64x(1, 2, 3, 4);
4059 let r = _mm256_permutevar_pd(a, b);
4060 let e = _mm256_setr_pd(4., 3., 5., 2.);
4061 assert_eq_m256d(r, e);
4062 }
4063
4064 #[simd_test(enable = "avx")]
4065 fn test_mm_permutevar_pd() {
4066 let a = _mm_setr_pd(4., 3.);
4067 let b = _mm_setr_epi64x(3, 0);
4068 let r = _mm_permutevar_pd(a, b);
4069 let e = _mm_setr_pd(3., 4.);
4070 assert_eq_m128d(r, e);
4071 }
4072
4073 #[simd_test(enable = "avx")]
4074 const fn test_mm256_permute_pd() {
4075 let a = _mm256_setr_pd(4., 3., 2., 5.);
4076 let r = _mm256_permute_pd::<5>(a);
4077 let e = _mm256_setr_pd(3., 4., 5., 2.);
4078 assert_eq_m256d(r, e);
4079 }
4080
4081 #[simd_test(enable = "avx")]
4082 const fn test_mm_permute_pd() {
4083 let a = _mm_setr_pd(4., 3.);
4084 let r = _mm_permute_pd::<1>(a);
4085 let e = _mm_setr_pd(3., 4.);
4086 assert_eq_m128d(r, e);
4087 }
4088
4089 #[simd_test(enable = "avx")]
4090 const fn test_mm256_permute2f128_ps() {
4091 let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.);
4092 let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.);
4093 let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b);
4094 let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.);
4095 assert_eq_m256(r, e);
4096
4097 // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
4098 let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b);
4099 let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
4100 assert_eq_m256(r, z);
4101 }
4102
4103 #[simd_test(enable = "avx")]
4104 const fn test_mm256_permute2f128_pd() {
4105 let a = _mm256_setr_pd(1., 2., 3., 4.);
4106 let b = _mm256_setr_pd(5., 6., 7., 8.);
4107 let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b);
4108 let e = _mm256_setr_pd(3., 4., 7., 8.);
4109 assert_eq_m256d(r, e);
4110
4111 // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
4112 let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b);
4113 let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
4114 assert_eq_m256d(r, e);
4115 }
4116
4117 #[simd_test(enable = "avx")]
4118 const fn test_mm256_permute2f128_si256() {
4119 let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18);
4120 let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28);
4121 let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b);
4122 let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24);
4123 assert_eq_m256i(r, e);
4124
4125 // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
4126 let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b);
4127 let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
4128 assert_eq_m256i(r, e);
4129 }
4130
4131 #[simd_test(enable = "avx")]
4132 const fn test_mm256_broadcast_ss() {
4133 let r = _mm256_broadcast_ss(&3.);
4134 let e = _mm256_set1_ps(3.);
4135 assert_eq_m256(r, e);
4136 }
4137
4138 #[simd_test(enable = "avx")]
4139 const fn test_mm_broadcast_ss() {
4140 let r = _mm_broadcast_ss(&3.);
4141 let e = _mm_set1_ps(3.);
4142 assert_eq_m128(r, e);
4143 }
4144
4145 #[simd_test(enable = "avx")]
4146 const fn test_mm256_broadcast_sd() {
4147 let r = _mm256_broadcast_sd(&3.);
4148 let e = _mm256_set1_pd(3.);
4149 assert_eq_m256d(r, e);
4150 }
4151
4152 #[simd_test(enable = "avx")]
4153 const fn test_mm256_broadcast_ps() {
4154 let a = _mm_setr_ps(4., 3., 2., 5.);
4155 let r = _mm256_broadcast_ps(&a);
4156 let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
4157 assert_eq_m256(r, e);
4158 }
4159
4160 #[simd_test(enable = "avx")]
4161 const fn test_mm256_broadcast_pd() {
4162 let a = _mm_setr_pd(4., 3.);
4163 let r = _mm256_broadcast_pd(&a);
4164 let e = _mm256_setr_pd(4., 3., 4., 3.);
4165 assert_eq_m256d(r, e);
4166 }
4167
4168 #[simd_test(enable = "avx")]
4169 const fn test_mm256_insertf128_ps() {
4170 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4171 let b = _mm_setr_ps(4., 9., 16., 25.);
4172 let r = _mm256_insertf128_ps::<0>(a, b);
4173 let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.);
4174 assert_eq_m256(r, e);
4175 }
4176
4177 #[simd_test(enable = "avx")]
4178 const fn test_mm256_insertf128_pd() {
4179 let a = _mm256_setr_pd(1., 2., 3., 4.);
4180 let b = _mm_setr_pd(5., 6.);
4181 let r = _mm256_insertf128_pd::<0>(a, b);
4182 let e = _mm256_setr_pd(5., 6., 3., 4.);
4183 assert_eq_m256d(r, e);
4184 }
4185
4186 #[simd_test(enable = "avx")]
4187 const fn test_mm256_insertf128_si256() {
4188 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4189 let b = _mm_setr_epi64x(5, 6);
4190 let r = _mm256_insertf128_si256::<0>(a, b);
4191 let e = _mm256_setr_epi64x(5, 6, 3, 4);
4192 assert_eq_m256i(r, e);
4193 }
4194
4195 #[simd_test(enable = "avx")]
4196 const fn test_mm256_insert_epi8() {
4197 #[rustfmt::skip]
4198 let a = _mm256_setr_epi8(
4199 1, 2, 3, 4, 5, 6, 7, 8,
4200 9, 10, 11, 12, 13, 14, 15, 16,
4201 17, 18, 19, 20, 21, 22, 23, 24,
4202 25, 26, 27, 28, 29, 30, 31, 32,
4203 );
4204 let r = _mm256_insert_epi8::<31>(a, 0);
4205 #[rustfmt::skip]
4206 let e = _mm256_setr_epi8(
4207 1, 2, 3, 4, 5, 6, 7, 8,
4208 9, 10, 11, 12, 13, 14, 15, 16,
4209 17, 18, 19, 20, 21, 22, 23, 24,
4210 25, 26, 27, 28, 29, 30, 31, 0,
4211 );
4212 assert_eq_m256i(r, e);
4213 }
4214
4215 #[simd_test(enable = "avx")]
4216 const fn test_mm256_insert_epi16() {
4217 #[rustfmt::skip]
4218 let a = _mm256_setr_epi16(
4219 0, 1, 2, 3, 4, 5, 6, 7,
4220 8, 9, 10, 11, 12, 13, 14, 15,
4221 );
4222 let r = _mm256_insert_epi16::<15>(a, 0);
4223 #[rustfmt::skip]
4224 let e = _mm256_setr_epi16(
4225 0, 1, 2, 3, 4, 5, 6, 7,
4226 8, 9, 10, 11, 12, 13, 14, 0,
4227 );
4228 assert_eq_m256i(r, e);
4229 }
4230
4231 #[simd_test(enable = "avx")]
4232 const fn test_mm256_insert_epi32() {
4233 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4234 let r = _mm256_insert_epi32::<7>(a, 0);
4235 let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
4236 assert_eq_m256i(r, e);
4237 }
4238
4239 #[simd_test(enable = "avx")]
4240 const fn test_mm256_load_pd() {
4241 let a = _mm256_setr_pd(1., 2., 3., 4.);
4242 let p = ptr::addr_of!(a) as *const f64;
4243 let r = unsafe { _mm256_load_pd(p) };
4244 let e = _mm256_setr_pd(1., 2., 3., 4.);
4245 assert_eq_m256d(r, e);
4246 }
4247
4248 #[simd_test(enable = "avx")]
4249 const fn test_mm256_store_pd() {
4250 let a = _mm256_setr_pd(1., 2., 3., 4.);
4251 let mut r = _mm256_undefined_pd();
4252 unsafe {
4253 _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4254 }
4255 assert_eq_m256d(r, a);
4256 }
4257
4258 #[simd_test(enable = "avx")]
4259 const fn test_mm256_load_ps() {
4260 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4261 let p = ptr::addr_of!(a) as *const f32;
4262 let r = unsafe { _mm256_load_ps(p) };
4263 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4264 assert_eq_m256(r, e);
4265 }
4266
4267 #[simd_test(enable = "avx")]
4268 const fn test_mm256_store_ps() {
4269 let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4270 let mut r = _mm256_undefined_ps();
4271 unsafe {
4272 _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4273 }
4274 assert_eq_m256(r, a);
4275 }
4276
4277 #[simd_test(enable = "avx")]
4278 const fn test_mm256_loadu_pd() {
4279 let a = &[1.0f64, 2., 3., 4.];
4280 let p = a.as_ptr();
4281 let r = unsafe { _mm256_loadu_pd(black_box(p)) };
4282 let e = _mm256_setr_pd(1., 2., 3., 4.);
4283 assert_eq_m256d(r, e);
4284 }
4285
4286 #[simd_test(enable = "avx")]
4287 const fn test_mm256_storeu_pd() {
4288 let a = _mm256_set1_pd(9.);
4289 let mut r = _mm256_undefined_pd();
4290 unsafe {
4291 _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
4292 }
4293 assert_eq_m256d(r, a);
4294 }
4295
4296 #[simd_test(enable = "avx")]
4297 const fn test_mm256_loadu_ps() {
4298 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
4299 let p = a.as_ptr();
4300 let r = unsafe { _mm256_loadu_ps(black_box(p)) };
4301 let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
4302 assert_eq_m256(r, e);
4303 }
4304
4305 #[simd_test(enable = "avx")]
4306 const fn test_mm256_storeu_ps() {
4307 let a = _mm256_set1_ps(9.);
4308 let mut r = _mm256_undefined_ps();
4309 unsafe {
4310 _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
4311 }
4312 assert_eq_m256(r, a);
4313 }
4314
4315 #[simd_test(enable = "avx")]
4316 const fn test_mm256_load_si256() {
4317 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4318 let p = ptr::addr_of!(a);
4319 let r = unsafe { _mm256_load_si256(p) };
4320 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4321 assert_eq_m256i(r, e);
4322 }
4323
4324 #[simd_test(enable = "avx")]
4325 const fn test_mm256_store_si256() {
4326 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4327 let mut r = _mm256_undefined_si256();
4328 unsafe {
4329 _mm256_store_si256(ptr::addr_of_mut!(r), a);
4330 }
4331 assert_eq_m256i(r, a);
4332 }
4333
4334 #[simd_test(enable = "avx")]
4335 const fn test_mm256_loadu_si256() {
4336 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4337 let p = ptr::addr_of!(a);
4338 let r = unsafe { _mm256_loadu_si256(black_box(p)) };
4339 let e = _mm256_setr_epi64x(1, 2, 3, 4);
4340 assert_eq_m256i(r, e);
4341 }
4342
4343 #[simd_test(enable = "avx")]
4344 const fn test_mm256_storeu_si256() {
4345 let a = _mm256_set1_epi8(9);
4346 let mut r = _mm256_undefined_si256();
4347 unsafe {
4348 _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
4349 }
4350 assert_eq_m256i(r, a);
4351 }
4352
4353 #[simd_test(enable = "avx")]
4354 const fn test_mm256_maskload_pd() {
4355 let a = &[1.0f64, 2., 3., 4.];
4356 let p = a.as_ptr();
4357 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4358 let r = unsafe { _mm256_maskload_pd(black_box(p), mask) };
4359 let e = _mm256_setr_pd(0., 2., 0., 4.);
4360 assert_eq_m256d(r, e);
4361 }
4362
4363 #[simd_test(enable = "avx")]
4364 const fn test_mm256_maskstore_pd() {
4365 let mut r = _mm256_set1_pd(0.);
4366 let mask = _mm256_setr_epi64x(0, !0, 0, !0);
4367 let a = _mm256_setr_pd(1., 2., 3., 4.);
4368 unsafe {
4369 _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4370 }
4371 let e = _mm256_setr_pd(0., 2., 0., 4.);
4372 assert_eq_m256d(r, e);
4373 }
4374
4375 #[simd_test(enable = "avx")]
4376 const fn test_mm_maskload_pd() {
4377 let a = &[1.0f64, 2.];
4378 let p = a.as_ptr();
4379 let mask = _mm_setr_epi64x(0, !0);
4380 let r = unsafe { _mm_maskload_pd(black_box(p), mask) };
4381 let e = _mm_setr_pd(0., 2.);
4382 assert_eq_m128d(r, e);
4383 }
4384
4385 #[simd_test(enable = "avx")]
4386 const fn test_mm_maskstore_pd() {
4387 let mut r = _mm_set1_pd(0.);
4388 let mask = _mm_setr_epi64x(0, !0);
4389 let a = _mm_setr_pd(1., 2.);
4390 unsafe {
4391 _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
4392 }
4393 let e = _mm_setr_pd(0., 2.);
4394 assert_eq_m128d(r, e);
4395 }
4396
4397 #[simd_test(enable = "avx")]
4398 const fn test_mm256_maskload_ps() {
4399 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
4400 let p = a.as_ptr();
4401 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4402 let r = unsafe { _mm256_maskload_ps(black_box(p), mask) };
4403 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4404 assert_eq_m256(r, e);
4405 }
4406
4407 #[simd_test(enable = "avx")]
4408 const fn test_mm256_maskstore_ps() {
4409 let mut r = _mm256_set1_ps(0.);
4410 let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
4411 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4412 unsafe {
4413 _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4414 }
4415 let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
4416 assert_eq_m256(r, e);
4417 }
4418
4419 #[simd_test(enable = "avx")]
4420 const fn test_mm_maskload_ps() {
4421 let a = &[1.0f32, 2., 3., 4.];
4422 let p = a.as_ptr();
4423 let mask = _mm_setr_epi32(0, !0, 0, !0);
4424 let r = unsafe { _mm_maskload_ps(black_box(p), mask) };
4425 let e = _mm_setr_ps(0., 2., 0., 4.);
4426 assert_eq_m128(r, e);
4427 }
4428
4429 #[simd_test(enable = "avx")]
4430 const fn test_mm_maskstore_ps() {
4431 let mut r = _mm_set1_ps(0.);
4432 let mask = _mm_setr_epi32(0, !0, 0, !0);
4433 let a = _mm_setr_ps(1., 2., 3., 4.);
4434 unsafe {
4435 _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
4436 }
4437 let e = _mm_setr_ps(0., 2., 0., 4.);
4438 assert_eq_m128(r, e);
4439 }
4440
4441 #[simd_test(enable = "avx")]
4442 const fn test_mm256_movehdup_ps() {
4443 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4444 let r = _mm256_movehdup_ps(a);
4445 let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
4446 assert_eq_m256(r, e);
4447 }
4448
4449 #[simd_test(enable = "avx")]
4450 const fn test_mm256_moveldup_ps() {
4451 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4452 let r = _mm256_moveldup_ps(a);
4453 let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
4454 assert_eq_m256(r, e);
4455 }
4456
4457 #[simd_test(enable = "avx")]
4458 const fn test_mm256_movedup_pd() {
4459 let a = _mm256_setr_pd(1., 2., 3., 4.);
4460 let r = _mm256_movedup_pd(a);
4461 let e = _mm256_setr_pd(1., 1., 3., 3.);
4462 assert_eq_m256d(r, e);
4463 }
4464
4465 #[simd_test(enable = "avx")]
4466 fn test_mm256_lddqu_si256() {
4467 #[rustfmt::skip]
4468 let a = _mm256_setr_epi8(
4469 1, 2, 3, 4, 5, 6, 7, 8,
4470 9, 10, 11, 12, 13, 14, 15, 16,
4471 17, 18, 19, 20, 21, 22, 23, 24,
4472 25, 26, 27, 28, 29, 30, 31, 32,
4473 );
4474 let p = ptr::addr_of!(a);
4475 let r = unsafe { _mm256_lddqu_si256(black_box(p)) };
4476 #[rustfmt::skip]
4477 let e = _mm256_setr_epi8(
4478 1, 2, 3, 4, 5, 6, 7, 8,
4479 9, 10, 11, 12, 13, 14, 15, 16,
4480 17, 18, 19, 20, 21, 22, 23, 24,
4481 25, 26, 27, 28, 29, 30, 31, 32,
4482 );
4483 assert_eq_m256i(r, e);
4484 }
4485
4486 #[simd_test(enable = "avx")]
4487 #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
4488 fn test_mm256_stream_si256() {
4489 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4490 let mut r = _mm256_undefined_si256();
4491 unsafe {
4492 _mm256_stream_si256(ptr::addr_of_mut!(r), a);
4493 }
4494 _mm_sfence();
4495 assert_eq_m256i(r, a);
4496 }
4497
4498 #[simd_test(enable = "avx")]
4499 #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
4500 fn test_mm256_stream_pd() {
4501 #[repr(align(32))]
4502 struct Memory {
4503 pub data: [f64; 4],
4504 }
4505 let a = _mm256_set1_pd(7.0);
4506 let mut mem = Memory { data: [-1.0; 4] };
4507
4508 unsafe {
4509 _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
4510 }
4511 _mm_sfence();
4512 for i in 0..4 {
4513 assert_eq!(mem.data[i], get_m256d(a, i));
4514 }
4515 }
4516
4517 #[simd_test(enable = "avx")]
4518 #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
4519 fn test_mm256_stream_ps() {
4520 #[repr(align(32))]
4521 struct Memory {
4522 pub data: [f32; 8],
4523 }
4524 let a = _mm256_set1_ps(7.0);
4525 let mut mem = Memory { data: [-1.0; 8] };
4526
4527 unsafe {
4528 _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
4529 }
4530 _mm_sfence();
4531 for i in 0..8 {
4532 assert_eq!(mem.data[i], get_m256(a, i));
4533 }
4534 }
4535
4536 #[simd_test(enable = "avx")]
4537 fn test_mm256_rcp_ps() {
4538 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4539 let r = _mm256_rcp_ps(a);
4540 #[rustfmt::skip]
4541 let e = _mm256_setr_ps(
4542 0.99975586, 0.49987793, 0.33325195, 0.24993896,
4543 0.19995117, 0.16662598, 0.14282227, 0.12496948,
4544 );
4545 let rel_err = 0.00048828125;
4546 for i in 0..8 {
4547 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4548 }
4549 }
4550
4551 #[simd_test(enable = "avx")]
4552 fn test_mm256_rsqrt_ps() {
4553 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4554 let r = _mm256_rsqrt_ps(a);
4555 #[rustfmt::skip]
4556 let e = _mm256_setr_ps(
4557 0.99975586, 0.7069092, 0.5772705, 0.49987793,
4558 0.44714355, 0.40820313, 0.3779297, 0.3534546,
4559 );
4560 let rel_err = 0.00048828125;
4561 for i in 0..8 {
4562 assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err);
4563 }
4564 }
4565
4566 #[simd_test(enable = "avx")]
4567 const fn test_mm256_unpackhi_pd() {
4568 let a = _mm256_setr_pd(1., 2., 3., 4.);
4569 let b = _mm256_setr_pd(5., 6., 7., 8.);
4570 let r = _mm256_unpackhi_pd(a, b);
4571 let e = _mm256_setr_pd(2., 6., 4., 8.);
4572 assert_eq_m256d(r, e);
4573 }
4574
4575 #[simd_test(enable = "avx")]
4576 const fn test_mm256_unpackhi_ps() {
4577 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4578 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4579 let r = _mm256_unpackhi_ps(a, b);
4580 let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.);
4581 assert_eq_m256(r, e);
4582 }
4583
4584 #[simd_test(enable = "avx")]
4585 const fn test_mm256_unpacklo_pd() {
4586 let a = _mm256_setr_pd(1., 2., 3., 4.);
4587 let b = _mm256_setr_pd(5., 6., 7., 8.);
4588 let r = _mm256_unpacklo_pd(a, b);
4589 let e = _mm256_setr_pd(1., 5., 3., 7.);
4590 assert_eq_m256d(r, e);
4591 }
4592
4593 #[simd_test(enable = "avx")]
4594 const fn test_mm256_unpacklo_ps() {
4595 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4596 let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
4597 let r = _mm256_unpacklo_ps(a, b);
4598 let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.);
4599 assert_eq_m256(r, e);
4600 }
4601
4602 #[simd_test(enable = "avx")]
4603 const fn test_mm256_testz_si256() {
4604 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4605 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4606 let r = _mm256_testz_si256(a, b);
4607 assert_eq!(r, 0);
4608 let b = _mm256_set1_epi64x(0);
4609 let r = _mm256_testz_si256(a, b);
4610 assert_eq!(r, 1);
4611 }
4612
4613 #[simd_test(enable = "avx")]
4614 const fn test_mm256_testc_si256() {
4615 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4616 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4617 let r = _mm256_testc_si256(a, b);
4618 assert_eq!(r, 0);
4619 let b = _mm256_set1_epi64x(0);
4620 let r = _mm256_testc_si256(a, b);
4621 assert_eq!(r, 1);
4622 }
4623
4624 #[simd_test(enable = "avx")]
4625 fn test_mm256_testnzc_si256() {
4626 let a = _mm256_setr_epi64x(1, 2, 3, 4);
4627 let b = _mm256_setr_epi64x(5, 6, 7, 8);
4628 let r = _mm256_testnzc_si256(a, b);
4629 assert_eq!(r, 1);
4630 let a = _mm256_setr_epi64x(0, 0, 0, 0);
4631 let b = _mm256_setr_epi64x(0, 0, 0, 0);
4632 let r = _mm256_testnzc_si256(a, b);
4633 assert_eq!(r, 0);
4634 }
4635
4636 #[simd_test(enable = "avx")]
4637 fn test_mm256_testz_pd() {
4638 let a = _mm256_setr_pd(1., 2., 3., 4.);
4639 let b = _mm256_setr_pd(5., 6., 7., 8.);
4640 let r = _mm256_testz_pd(a, b);
4641 assert_eq!(r, 1);
4642 let a = _mm256_set1_pd(-1.);
4643 let r = _mm256_testz_pd(a, a);
4644 assert_eq!(r, 0);
4645 }
4646
4647 #[simd_test(enable = "avx")]
4648 fn test_mm256_testc_pd() {
4649 let a = _mm256_setr_pd(1., 2., 3., 4.);
4650 let b = _mm256_setr_pd(5., 6., 7., 8.);
4651 let r = _mm256_testc_pd(a, b);
4652 assert_eq!(r, 1);
4653 let a = _mm256_set1_pd(1.);
4654 let b = _mm256_set1_pd(-1.);
4655 let r = _mm256_testc_pd(a, b);
4656 assert_eq!(r, 0);
4657 }
4658
4659 #[simd_test(enable = "avx")]
4660 fn test_mm256_testnzc_pd() {
4661 let a = _mm256_setr_pd(1., 2., 3., 4.);
4662 let b = _mm256_setr_pd(5., 6., 7., 8.);
4663 let r = _mm256_testnzc_pd(a, b);
4664 assert_eq!(r, 0);
4665 let a = _mm256_setr_pd(1., -1., -1., -1.);
4666 let b = _mm256_setr_pd(-1., -1., 1., 1.);
4667 let r = _mm256_testnzc_pd(a, b);
4668 assert_eq!(r, 1);
4669 }
4670
4671 #[simd_test(enable = "avx")]
4672 const fn test_mm_testz_pd() {
4673 let a = _mm_setr_pd(1., 2.);
4674 let b = _mm_setr_pd(5., 6.);
4675 let r = _mm_testz_pd(a, b);
4676 assert_eq!(r, 1);
4677 let a = _mm_set1_pd(-1.);
4678 let r = _mm_testz_pd(a, a);
4679 assert_eq!(r, 0);
4680 }
4681
4682 #[simd_test(enable = "avx")]
4683 const fn test_mm_testc_pd() {
4684 let a = _mm_setr_pd(1., 2.);
4685 let b = _mm_setr_pd(5., 6.);
4686 let r = _mm_testc_pd(a, b);
4687 assert_eq!(r, 1);
4688 let a = _mm_set1_pd(1.);
4689 let b = _mm_set1_pd(-1.);
4690 let r = _mm_testc_pd(a, b);
4691 assert_eq!(r, 0);
4692 }
4693
4694 #[simd_test(enable = "avx")]
4695 fn test_mm_testnzc_pd() {
4696 let a = _mm_setr_pd(1., 2.);
4697 let b = _mm_setr_pd(5., 6.);
4698 let r = _mm_testnzc_pd(a, b);
4699 assert_eq!(r, 0);
4700 let a = _mm_setr_pd(1., -1.);
4701 let b = _mm_setr_pd(-1., -1.);
4702 let r = _mm_testnzc_pd(a, b);
4703 assert_eq!(r, 1);
4704 }
4705
4706 #[simd_test(enable = "avx")]
4707 fn test_mm256_testz_ps() {
4708 let a = _mm256_set1_ps(1.);
4709 let r = _mm256_testz_ps(a, a);
4710 assert_eq!(r, 1);
4711 let a = _mm256_set1_ps(-1.);
4712 let r = _mm256_testz_ps(a, a);
4713 assert_eq!(r, 0);
4714 }
4715
4716 #[simd_test(enable = "avx")]
4717 fn test_mm256_testc_ps() {
4718 let a = _mm256_set1_ps(1.);
4719 let r = _mm256_testc_ps(a, a);
4720 assert_eq!(r, 1);
4721 let b = _mm256_set1_ps(-1.);
4722 let r = _mm256_testc_ps(a, b);
4723 assert_eq!(r, 0);
4724 }
4725
4726 #[simd_test(enable = "avx")]
4727 fn test_mm256_testnzc_ps() {
4728 let a = _mm256_set1_ps(1.);
4729 let r = _mm256_testnzc_ps(a, a);
4730 assert_eq!(r, 0);
4731 let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.);
4732 let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.);
4733 let r = _mm256_testnzc_ps(a, b);
4734 assert_eq!(r, 1);
4735 }
4736
4737 #[simd_test(enable = "avx")]
4738 const fn test_mm_testz_ps() {
4739 let a = _mm_set1_ps(1.);
4740 let r = _mm_testz_ps(a, a);
4741 assert_eq!(r, 1);
4742 let a = _mm_set1_ps(-1.);
4743 let r = _mm_testz_ps(a, a);
4744 assert_eq!(r, 0);
4745 }
4746
4747 #[simd_test(enable = "avx")]
4748 const fn test_mm_testc_ps() {
4749 let a = _mm_set1_ps(1.);
4750 let r = _mm_testc_ps(a, a);
4751 assert_eq!(r, 1);
4752 let b = _mm_set1_ps(-1.);
4753 let r = _mm_testc_ps(a, b);
4754 assert_eq!(r, 0);
4755 }
4756
4757 #[simd_test(enable = "avx")]
4758 fn test_mm_testnzc_ps() {
4759 let a = _mm_set1_ps(1.);
4760 let r = _mm_testnzc_ps(a, a);
4761 assert_eq!(r, 0);
4762 let a = _mm_setr_ps(1., -1., -1., -1.);
4763 let b = _mm_setr_ps(-1., -1., 1., 1.);
4764 let r = _mm_testnzc_ps(a, b);
4765 assert_eq!(r, 1);
4766 }
4767
4768 #[simd_test(enable = "avx")]
4769 const fn test_mm256_movemask_pd() {
4770 let a = _mm256_setr_pd(1., -2., 3., -4.);
4771 let r = _mm256_movemask_pd(a);
4772 assert_eq!(r, 0xA);
4773 }
4774
4775 #[simd_test(enable = "avx")]
4776 const fn test_mm256_movemask_ps() {
4777 let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
4778 let r = _mm256_movemask_ps(a);
4779 assert_eq!(r, 0xAA);
4780 }
4781
4782 #[simd_test(enable = "avx")]
4783 const fn test_mm256_setzero_pd() {
4784 let r = _mm256_setzero_pd();
4785 assert_eq_m256d(r, _mm256_set1_pd(0.));
4786 }
4787
4788 #[simd_test(enable = "avx")]
4789 const fn test_mm256_setzero_ps() {
4790 let r = _mm256_setzero_ps();
4791 assert_eq_m256(r, _mm256_set1_ps(0.));
4792 }
4793
4794 #[simd_test(enable = "avx")]
4795 const fn test_mm256_setzero_si256() {
4796 let r = _mm256_setzero_si256();
4797 assert_eq_m256i(r, _mm256_set1_epi8(0));
4798 }
4799
4800 #[simd_test(enable = "avx")]
4801 const fn test_mm256_set_pd() {
4802 let r = _mm256_set_pd(1., 2., 3., 4.);
4803 assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
4804 }
4805
4806 #[simd_test(enable = "avx")]
4807 const fn test_mm256_set_ps() {
4808 let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4809 assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
4810 }
4811
4812 #[simd_test(enable = "avx")]
4813 const fn test_mm256_set_epi8() {
4814 #[rustfmt::skip]
4815 let r = _mm256_set_epi8(
4816 1, 2, 3, 4, 5, 6, 7, 8,
4817 9, 10, 11, 12, 13, 14, 15, 16,
4818 17, 18, 19, 20, 21, 22, 23, 24,
4819 25, 26, 27, 28, 29, 30, 31, 32,
4820 );
4821 #[rustfmt::skip]
4822 let e = _mm256_setr_epi8(
4823 32, 31, 30, 29, 28, 27, 26, 25,
4824 24, 23, 22, 21, 20, 19, 18, 17,
4825 16, 15, 14, 13, 12, 11, 10, 9,
4826 8, 7, 6, 5, 4, 3, 2, 1
4827 );
4828 assert_eq_m256i(r, e);
4829 }
4830
4831 #[simd_test(enable = "avx")]
4832 const fn test_mm256_set_epi16() {
4833 #[rustfmt::skip]
4834 let r = _mm256_set_epi16(
4835 1, 2, 3, 4, 5, 6, 7, 8,
4836 9, 10, 11, 12, 13, 14, 15, 16,
4837 );
4838 #[rustfmt::skip]
4839 let e = _mm256_setr_epi16(
4840 16, 15, 14, 13, 12, 11, 10, 9, 8,
4841 7, 6, 5, 4, 3, 2, 1,
4842 );
4843 assert_eq_m256i(r, e);
4844 }
4845
4846 #[simd_test(enable = "avx")]
4847 const fn test_mm256_set_epi32() {
4848 let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4849 assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
4850 }
4851
4852 #[simd_test(enable = "avx")]
4853 const fn test_mm256_set_epi64x() {
4854 let r = _mm256_set_epi64x(1, 2, 3, 4);
4855 assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
4856 }
4857
4858 #[simd_test(enable = "avx")]
4859 const fn test_mm256_setr_pd() {
4860 let r = _mm256_setr_pd(1., 2., 3., 4.);
4861 assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
4862 }
4863
4864 #[simd_test(enable = "avx")]
4865 const fn test_mm256_setr_ps() {
4866 let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4867 assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
4868 }
4869
4870 #[simd_test(enable = "avx")]
4871 const fn test_mm256_setr_epi8() {
4872 #[rustfmt::skip]
4873 let r = _mm256_setr_epi8(
4874 1, 2, 3, 4, 5, 6, 7, 8,
4875 9, 10, 11, 12, 13, 14, 15, 16,
4876 17, 18, 19, 20, 21, 22, 23, 24,
4877 25, 26, 27, 28, 29, 30, 31, 32,
4878 );
4879 #[rustfmt::skip]
4880 let e = _mm256_setr_epi8(
4881 1, 2, 3, 4, 5, 6, 7, 8,
4882 9, 10, 11, 12, 13, 14, 15, 16,
4883 17, 18, 19, 20, 21, 22, 23, 24,
4884 25, 26, 27, 28, 29, 30, 31, 32
4885 );
4886
4887 assert_eq_m256i(r, e);
4888 }
4889
4890 #[simd_test(enable = "avx")]
4891 const fn test_mm256_setr_epi16() {
4892 #[rustfmt::skip]
4893 let r = _mm256_setr_epi16(
4894 1, 2, 3, 4, 5, 6, 7, 8,
4895 9, 10, 11, 12, 13, 14, 15, 16,
4896 );
4897 #[rustfmt::skip]
4898 let e = _mm256_setr_epi16(
4899 1, 2, 3, 4, 5, 6, 7, 8,
4900 9, 10, 11, 12, 13, 14, 15, 16,
4901 );
4902 assert_eq_m256i(r, e);
4903 }
4904
4905 #[simd_test(enable = "avx")]
4906 const fn test_mm256_setr_epi32() {
4907 let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4908 assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
4909 }
4910
4911 #[simd_test(enable = "avx")]
4912 const fn test_mm256_setr_epi64x() {
4913 let r = _mm256_setr_epi64x(1, 2, 3, 4);
4914 assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
4915 }
4916
4917 #[simd_test(enable = "avx")]
4918 const fn test_mm256_set1_pd() {
4919 let r = _mm256_set1_pd(1.);
4920 assert_eq_m256d(r, _mm256_set1_pd(1.));
4921 }
4922
4923 #[simd_test(enable = "avx")]
4924 const fn test_mm256_set1_ps() {
4925 let r = _mm256_set1_ps(1.);
4926 assert_eq_m256(r, _mm256_set1_ps(1.));
4927 }
4928
4929 #[simd_test(enable = "avx")]
4930 const fn test_mm256_set1_epi8() {
4931 let r = _mm256_set1_epi8(1);
4932 assert_eq_m256i(r, _mm256_set1_epi8(1));
4933 }
4934
4935 #[simd_test(enable = "avx")]
4936 const fn test_mm256_set1_epi16() {
4937 let r = _mm256_set1_epi16(1);
4938 assert_eq_m256i(r, _mm256_set1_epi16(1));
4939 }
4940
4941 #[simd_test(enable = "avx")]
4942 const fn test_mm256_set1_epi32() {
4943 let r = _mm256_set1_epi32(1);
4944 assert_eq_m256i(r, _mm256_set1_epi32(1));
4945 }
4946
4947 #[simd_test(enable = "avx")]
4948 const fn test_mm256_set1_epi64x() {
4949 let r = _mm256_set1_epi64x(1);
4950 assert_eq_m256i(r, _mm256_set1_epi64x(1));
4951 }
4952
4953 #[simd_test(enable = "avx")]
4954 const fn test_mm256_castpd_ps() {
4955 let a = _mm256_setr_pd(1., 2., 3., 4.);
4956 let r = _mm256_castpd_ps(a);
4957 let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4958 assert_eq_m256(r, e);
4959 }
4960
4961 #[simd_test(enable = "avx")]
4962 const fn test_mm256_castps_pd() {
4963 let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
4964 let r = _mm256_castps_pd(a);
4965 let e = _mm256_setr_pd(1., 2., 3., 4.);
4966 assert_eq_m256d(r, e);
4967 }
4968
4969 #[simd_test(enable = "avx")]
4970 const fn test_mm256_castps_si256() {
4971 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4972 let r = _mm256_castps_si256(a);
4973 #[rustfmt::skip]
4974 let e = _mm256_setr_epi8(
4975 0, 0, -128, 63, 0, 0, 0, 64,
4976 0, 0, 64, 64, 0, 0, -128, 64,
4977 0, 0, -96, 64, 0, 0, -64, 64,
4978 0, 0, -32, 64, 0, 0, 0, 65,
4979 );
4980 assert_eq_m256i(r, e);
4981 }
4982
4983 #[simd_test(enable = "avx")]
4984 const fn test_mm256_castsi256_ps() {
4985 #[rustfmt::skip]
4986 let a = _mm256_setr_epi8(
4987 0, 0, -128, 63, 0, 0, 0, 64,
4988 0, 0, 64, 64, 0, 0, -128, 64,
4989 0, 0, -96, 64, 0, 0, -64, 64,
4990 0, 0, -32, 64, 0, 0, 0, 65,
4991 );
4992 let r = _mm256_castsi256_ps(a);
4993 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
4994 assert_eq_m256(r, e);
4995 }
4996
4997 #[simd_test(enable = "avx")]
4998 const fn test_mm256_castpd_si256() {
4999 let a = _mm256_setr_pd(1., 2., 3., 4.);
5000 let r = _mm256_castpd_si256(a);
5001 assert_eq_m256d(unsafe { transmute(r) }, a);
5002 }
5003
5004 #[simd_test(enable = "avx")]
5005 const fn test_mm256_castsi256_pd() {
5006 let a = _mm256_setr_epi64x(1, 2, 3, 4);
5007 let r = _mm256_castsi256_pd(a);
5008 assert_eq_m256d(r, unsafe { transmute(a) });
5009 }
5010
5011 #[simd_test(enable = "avx")]
5012 const fn test_mm256_castps256_ps128() {
5013 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5014 let r = _mm256_castps256_ps128(a);
5015 assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
5016 }
5017
5018 #[simd_test(enable = "avx")]
5019 const fn test_mm256_castpd256_pd128() {
5020 let a = _mm256_setr_pd(1., 2., 3., 4.);
5021 let r = _mm256_castpd256_pd128(a);
5022 assert_eq_m128d(r, _mm_setr_pd(1., 2.));
5023 }
5024
5025 #[simd_test(enable = "avx")]
5026 const fn test_mm256_castsi256_si128() {
5027 let a = _mm256_setr_epi64x(1, 2, 3, 4);
5028 let r = _mm256_castsi256_si128(a);
5029 assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
5030 }
5031
5032 #[simd_test(enable = "avx")]
5033 const fn test_mm256_castps128_ps256() {
5034 let a = _mm_setr_ps(1., 2., 3., 4.);
5035 let r = _mm256_castps128_ps256(a);
5036 assert_eq_m128(_mm256_castps256_ps128(r), a);
5037 }
5038
5039 #[simd_test(enable = "avx")]
5040 const fn test_mm256_castpd128_pd256() {
5041 let a = _mm_setr_pd(1., 2.);
5042 let r = _mm256_castpd128_pd256(a);
5043 assert_eq_m128d(_mm256_castpd256_pd128(r), a);
5044 }
5045
5046 #[simd_test(enable = "avx")]
5047 const fn test_mm256_castsi128_si256() {
5048 let a = _mm_setr_epi32(1, 2, 3, 4);
5049 let r = _mm256_castsi128_si256(a);
5050 assert_eq_m128i(_mm256_castsi256_si128(r), a);
5051 }
5052
5053 #[simd_test(enable = "avx")]
5054 const fn test_mm256_zextps128_ps256() {
5055 let a = _mm_setr_ps(1., 2., 3., 4.);
5056 let r = _mm256_zextps128_ps256(a);
5057 let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
5058 assert_eq_m256(r, e);
5059 }
5060
5061 #[simd_test(enable = "avx")]
5062 const fn test_mm256_zextsi128_si256() {
5063 let a = _mm_setr_epi64x(1, 2);
5064 let r = _mm256_zextsi128_si256(a);
5065 let e = _mm256_setr_epi64x(1, 2, 0, 0);
5066 assert_eq_m256i(r, e);
5067 }
5068
5069 #[simd_test(enable = "avx")]
5070 const fn test_mm256_zextpd128_pd256() {
5071 let a = _mm_setr_pd(1., 2.);
5072 let r = _mm256_zextpd128_pd256(a);
5073 let e = _mm256_setr_pd(1., 2., 0., 0.);
5074 assert_eq_m256d(r, e);
5075 }
5076
5077 #[simd_test(enable = "avx")]
5078 const fn test_mm256_set_m128() {
5079 let hi = _mm_setr_ps(5., 6., 7., 8.);
5080 let lo = _mm_setr_ps(1., 2., 3., 4.);
5081 let r = _mm256_set_m128(hi, lo);
5082 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5083 assert_eq_m256(r, e);
5084 }
5085
5086 #[simd_test(enable = "avx")]
5087 const fn test_mm256_set_m128d() {
5088 let hi = _mm_setr_pd(3., 4.);
5089 let lo = _mm_setr_pd(1., 2.);
5090 let r = _mm256_set_m128d(hi, lo);
5091 let e = _mm256_setr_pd(1., 2., 3., 4.);
5092 assert_eq_m256d(r, e);
5093 }
5094
5095 #[simd_test(enable = "avx")]
5096 const fn test_mm256_set_m128i() {
5097 #[rustfmt::skip]
5098 let hi = _mm_setr_epi8(
5099 17, 18, 19, 20,
5100 21, 22, 23, 24,
5101 25, 26, 27, 28,
5102 29, 30, 31, 32,
5103 );
5104 #[rustfmt::skip]
5105 let lo = _mm_setr_epi8(
5106 1, 2, 3, 4,
5107 5, 6, 7, 8,
5108 9, 10, 11, 12,
5109 13, 14, 15, 16,
5110 );
5111 let r = _mm256_set_m128i(hi, lo);
5112 #[rustfmt::skip]
5113 let e = _mm256_setr_epi8(
5114 1, 2, 3, 4, 5, 6, 7, 8,
5115 9, 10, 11, 12, 13, 14, 15, 16,
5116 17, 18, 19, 20, 21, 22, 23, 24,
5117 25, 26, 27, 28, 29, 30, 31, 32,
5118 );
5119 assert_eq_m256i(r, e);
5120 }
5121
5122 #[simd_test(enable = "avx")]
5123 const fn test_mm256_setr_m128() {
5124 let lo = _mm_setr_ps(1., 2., 3., 4.);
5125 let hi = _mm_setr_ps(5., 6., 7., 8.);
5126 let r = _mm256_setr_m128(lo, hi);
5127 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5128 assert_eq_m256(r, e);
5129 }
5130
5131 #[simd_test(enable = "avx")]
5132 const fn test_mm256_setr_m128d() {
5133 let lo = _mm_setr_pd(1., 2.);
5134 let hi = _mm_setr_pd(3., 4.);
5135 let r = _mm256_setr_m128d(lo, hi);
5136 let e = _mm256_setr_pd(1., 2., 3., 4.);
5137 assert_eq_m256d(r, e);
5138 }
5139
5140 #[simd_test(enable = "avx")]
5141 const fn test_mm256_setr_m128i() {
5142 #[rustfmt::skip]
5143 let lo = _mm_setr_epi8(
5144 1, 2, 3, 4,
5145 5, 6, 7, 8,
5146 9, 10, 11, 12,
5147 13, 14, 15, 16,
5148 );
5149 #[rustfmt::skip]
5150 let hi = _mm_setr_epi8(
5151 17, 18, 19, 20, 21, 22, 23, 24,
5152 25, 26, 27, 28, 29, 30, 31, 32,
5153 );
5154 let r = _mm256_setr_m128i(lo, hi);
5155 #[rustfmt::skip]
5156 let e = _mm256_setr_epi8(
5157 1, 2, 3, 4, 5, 6, 7, 8,
5158 9, 10, 11, 12, 13, 14, 15, 16,
5159 17, 18, 19, 20, 21, 22, 23, 24,
5160 25, 26, 27, 28, 29, 30, 31, 32,
5161 );
5162 assert_eq_m256i(r, e);
5163 }
5164
5165 #[simd_test(enable = "avx")]
5166 const fn test_mm256_loadu2_m128() {
5167 let hi = &[5., 6., 7., 8.];
5168 let hiaddr = hi.as_ptr();
5169 let lo = &[1., 2., 3., 4.];
5170 let loaddr = lo.as_ptr();
5171 let r = unsafe { _mm256_loadu2_m128(hiaddr, loaddr) };
5172 let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5173 assert_eq_m256(r, e);
5174 }
5175
5176 #[simd_test(enable = "avx")]
5177 const fn test_mm256_loadu2_m128d() {
5178 let hi = &[3., 4.];
5179 let hiaddr = hi.as_ptr();
5180 let lo = &[1., 2.];
5181 let loaddr = lo.as_ptr();
5182 let r = unsafe { _mm256_loadu2_m128d(hiaddr, loaddr) };
5183 let e = _mm256_setr_pd(1., 2., 3., 4.);
5184 assert_eq_m256d(r, e);
5185 }
5186
5187 #[simd_test(enable = "avx")]
5188 const fn test_mm256_loadu2_m128i() {
5189 #[rustfmt::skip]
5190 let hi = _mm_setr_epi8(
5191 17, 18, 19, 20, 21, 22, 23, 24,
5192 25, 26, 27, 28, 29, 30, 31, 32,
5193 );
5194 #[rustfmt::skip]
5195 let lo = _mm_setr_epi8(
5196 1, 2, 3, 4, 5, 6, 7, 8,
5197 9, 10, 11, 12, 13, 14, 15, 16,
5198 );
5199 let r = unsafe {
5200 _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _)
5201 };
5202 #[rustfmt::skip]
5203 let e = _mm256_setr_epi8(
5204 1, 2, 3, 4, 5, 6, 7, 8,
5205 9, 10, 11, 12, 13, 14, 15, 16,
5206 17, 18, 19, 20, 21, 22, 23, 24,
5207 25, 26, 27, 28, 29, 30, 31, 32,
5208 );
5209 assert_eq_m256i(r, e);
5210 }
5211
5212 #[simd_test(enable = "avx")]
5213 const fn test_mm256_storeu2_m128() {
5214 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5215 let mut hi = _mm_undefined_ps();
5216 let mut lo = _mm_undefined_ps();
5217 unsafe {
5218 _mm256_storeu2_m128(
5219 ptr::addr_of_mut!(hi) as *mut f32,
5220 ptr::addr_of_mut!(lo) as *mut f32,
5221 a,
5222 );
5223 }
5224 assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
5225 assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
5226 }
5227
5228 #[simd_test(enable = "avx")]
5229 const fn test_mm256_storeu2_m128d() {
5230 let a = _mm256_setr_pd(1., 2., 3., 4.);
5231 let mut hi = _mm_undefined_pd();
5232 let mut lo = _mm_undefined_pd();
5233 unsafe {
5234 _mm256_storeu2_m128d(
5235 ptr::addr_of_mut!(hi) as *mut f64,
5236 ptr::addr_of_mut!(lo) as *mut f64,
5237 a,
5238 );
5239 }
5240 assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
5241 assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
5242 }
5243
5244 #[simd_test(enable = "avx")]
5245 const fn test_mm256_storeu2_m128i() {
5246 #[rustfmt::skip]
5247 let a = _mm256_setr_epi8(
5248 1, 2, 3, 4, 5, 6, 7, 8,
5249 9, 10, 11, 12, 13, 14, 15, 16,
5250 17, 18, 19, 20, 21, 22, 23, 24,
5251 25, 26, 27, 28, 29, 30, 31, 32,
5252 );
5253 let mut hi = _mm_undefined_si128();
5254 let mut lo = _mm_undefined_si128();
5255 unsafe {
5256 _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
5257 }
5258 #[rustfmt::skip]
5259 let e_hi = _mm_setr_epi8(
5260 17, 18, 19, 20, 21, 22, 23, 24,
5261 25, 26, 27, 28, 29, 30, 31, 32
5262 );
5263 #[rustfmt::skip]
5264 let e_lo = _mm_setr_epi8(
5265 1, 2, 3, 4, 5, 6, 7, 8,
5266 9, 10, 11, 12, 13, 14, 15, 16
5267 );
5268
5269 assert_eq_m128i(hi, e_hi);
5270 assert_eq_m128i(lo, e_lo);
5271 }
5272
5273 #[simd_test(enable = "avx")]
5274 const fn test_mm256_cvtss_f32() {
5275 let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5276 let r = _mm256_cvtss_f32(a);
5277 assert_eq!(r, 1.);
5278 }
5279}
5280