1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use super::{f32x8, i32x8};
9
10cfg_if::cfg_if! {
11 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
12 #[cfg(target_arch = "x86")]
13 use core::arch::x86::*;
14 #[cfg(target_arch = "x86_64")]
15 use core::arch::x86_64::*;
16
17 use bytemuck::cast;
18
19 #[derive(Clone, Copy, Debug)]
20 #[repr(C, align(32))]
21 pub struct u32x8(__m256i);
22 } else {
23 use super::u32x4;
24
25 #[derive(Clone, Copy, Debug)]
26 #[repr(C, align(32))]
27 pub struct u32x8(u32x4, u32x4);
28 }
29}
30
31unsafe impl bytemuck::Zeroable for u32x8 {}
32unsafe impl bytemuck::Pod for u32x8 {}
33
34impl Default for u32x8 {
35 fn default() -> Self {
36 Self::splat(0)
37 }
38}
39
40impl u32x8 {
41 pub fn splat(n: u32) -> Self {
42 bytemuck::cast([n, n, n, n, n, n, n, n])
43 }
44
45 pub fn to_i32x8_bitcast(self) -> i32x8 {
46 bytemuck::cast(self)
47 }
48
49 pub fn to_f32x8_bitcast(self) -> f32x8 {
50 bytemuck::cast(self)
51 }
52
53 pub fn cmp_eq(self, rhs: Self) -> Self {
54 cfg_if::cfg_if! {
55 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
56 Self(unsafe { _mm256_cmpeq_epi32(self.0, rhs.0) })
57 } else {
58 Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
59 }
60 }
61 }
62
63 pub fn shl<const RHS: i32>(self) -> Self {
64 cfg_if::cfg_if! {
65 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
66 let shift: __m128i = cast([RHS as u64, 0]);
67 Self(unsafe { _mm256_sll_epi32(self.0, shift) })
68 } else {
69 Self(self.0.shl::<RHS>(), self.1.shl::<RHS>())
70 }
71 }
72 }
73
74 pub fn shr<const RHS: i32>(self) -> Self {
75 cfg_if::cfg_if! {
76 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
77 let shift: __m128i = cast([RHS as u64, 0]);
78 Self(unsafe { _mm256_srl_epi32(self.0, shift) })
79 } else {
80 Self(self.0.shr::<RHS>(), self.1.shr::<RHS>())
81 }
82 }
83 }
84}
85
86impl core::ops::Not for u32x8 {
87 type Output = Self;
88
89 fn not(self) -> Self {
90 cfg_if::cfg_if! {
91 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
92 let all_bits = unsafe { _mm256_set1_epi16(-1) };
93 Self(unsafe { _mm256_xor_si256(self.0, all_bits) })
94 } else {
95 Self(!self.0, !self.1)
96 }
97 }
98 }
99}
100
101impl core::ops::Add for u32x8 {
102 type Output = Self;
103
104 fn add(self, rhs: Self) -> Self::Output {
105 cfg_if::cfg_if! {
106 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
107 Self(unsafe { _mm256_add_epi32(self.0, rhs.0) })
108 } else {
109 Self(self.0 + rhs.0, self.1 + rhs.1)
110 }
111 }
112 }
113}
114
115impl core::ops::BitAnd for u32x8 {
116 type Output = Self;
117
118 fn bitand(self, rhs: Self) -> Self::Output {
119 cfg_if::cfg_if! {
120 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
121 Self(unsafe { _mm256_and_si256(self.0, rhs.0) })
122 } else {
123 Self(self.0 & rhs.0, self.1 & rhs.1)
124 }
125 }
126 }
127}
128