1// Copyright 2020 Yevhenii Reizner
2//
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5
6// Based on https://github.com/Lokathor/wide (Zlib)
7
8use bytemuck::cast;
9
10use super::{f32x8, u32x8};
11
12cfg_if::cfg_if! {
13 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
14 #[cfg(target_arch = "x86")]
15 use core::arch::x86::*;
16 #[cfg(target_arch = "x86_64")]
17 use core::arch::x86_64::*;
18
19 #[derive(Clone, Copy, Debug)]
20 #[repr(C, align(32))]
21 pub struct i32x8(__m256i);
22 } else {
23 use super::i32x4;
24
25 #[derive(Clone, Copy, Debug)]
26 #[repr(C, align(32))]
27 pub struct i32x8(pub i32x4, pub i32x4);
28 }
29}
30
31unsafe impl bytemuck::Zeroable for i32x8 {}
32unsafe impl bytemuck::Pod for i32x8 {}
33
34impl Default for i32x8 {
35 fn default() -> Self {
36 Self::splat(0)
37 }
38}
39
40impl i32x8 {
41 pub fn splat(n: i32) -> Self {
42 cast([n, n, n, n, n, n, n, n])
43 }
44
45 pub fn blend(self, t: Self, f: Self) -> Self {
46 cfg_if::cfg_if! {
47 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
48 Self(unsafe { _mm256_blendv_epi8(f.0, t.0, self.0) })
49 } else {
50 Self(self.0.blend(t.0, f.0), self.1.blend(t.1, f.1))
51 }
52 }
53 }
54
55 pub fn cmp_eq(self, rhs: Self) -> Self {
56 cfg_if::cfg_if! {
57 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
58 Self(unsafe { _mm256_cmpeq_epi32(self.0, rhs.0) })
59 } else {
60 Self(self.0.cmp_eq(rhs.0), self.1.cmp_eq(rhs.1))
61 }
62 }
63 }
64
65 pub fn cmp_gt(self, rhs: Self) -> Self {
66 cfg_if::cfg_if! {
67 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
68 Self(unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) })
69 } else {
70 Self(self.0.cmp_gt(rhs.0), self.1.cmp_gt(rhs.1))
71 }
72 }
73 }
74
75 pub fn cmp_lt(self, rhs: Self) -> Self {
76 cfg_if::cfg_if! {
77 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
78 // There is no `_mm256_cmpLT_epi32`, therefore we have to use
79 // `_mm256_cmpGT_epi32` and then invert the result.
80 let v = unsafe { _mm256_cmpgt_epi32(self.0, rhs.0) };
81 let all_bits = unsafe { _mm256_set1_epi16(-1) };
82 Self(unsafe { _mm256_xor_si256(v, all_bits) })
83 } else {
84 Self(self.0.cmp_lt(rhs.0), self.1.cmp_lt(rhs.1))
85 }
86 }
87 }
88
89 pub fn to_f32x8(self) -> f32x8 {
90 cfg_if::cfg_if! {
91 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
92 cast(unsafe { _mm256_cvtepi32_ps(self.0) })
93 } else if #[cfg(all(feature = "simd", target_feature = "avx"))] {
94 cast([self.0.to_f32x4(), self.1.to_f32x4()])
95 } else {
96 f32x8(self.0.to_f32x4(), self.1.to_f32x4())
97 }
98 }
99 }
100
101 pub fn to_u32x8_bitcast(self) -> u32x8 {
102 bytemuck::cast(self)
103 }
104
105 pub fn to_f32x8_bitcast(self) -> f32x8 {
106 bytemuck::cast(self)
107 }
108}
109
110impl From<[i32; 8]> for i32x8 {
111 fn from(v: [i32; 8]) -> Self {
112 cast(v)
113 }
114}
115
116impl From<i32x8> for [i32; 8] {
117 fn from(v: i32x8) -> Self {
118 cast(v)
119 }
120}
121
122impl core::ops::Add for i32x8 {
123 type Output = Self;
124
125 fn add(self, rhs: Self) -> Self::Output {
126 cfg_if::cfg_if! {
127 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
128 Self(unsafe { _mm256_add_epi32(self.0, rhs.0) })
129 } else {
130 Self(self.0 + rhs.0, self.1 + rhs.1)
131 }
132 }
133 }
134}
135
136impl core::ops::BitAnd for i32x8 {
137 type Output = Self;
138
139 fn bitand(self, rhs: Self) -> Self::Output {
140 cfg_if::cfg_if! {
141 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
142 Self(unsafe { _mm256_and_si256(self.0, rhs.0) })
143 } else {
144 Self(self.0 & rhs.0, self.1 & rhs.1)
145 }
146 }
147 }
148}
149
150impl core::ops::Mul for i32x8 {
151 type Output = Self;
152
153 fn mul(self, rhs: Self) -> Self::Output {
154 cfg_if::cfg_if! {
155 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
156 Self(unsafe { _mm256_mullo_epi32(self.0, rhs.0) })
157 } else {
158 Self(self.0 * rhs.0, self.1 * rhs.1)
159 }
160 }
161 }
162}
163
164impl core::ops::BitOr for i32x8 {
165 type Output = Self;
166
167 #[inline]
168 fn bitor(self, rhs: Self) -> Self::Output {
169 cfg_if::cfg_if! {
170 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
171 Self(unsafe { _mm256_or_si256(self.0, rhs.0) })
172 } else {
173 Self(self.0 | rhs.0, self.1 | rhs.1)
174 }
175 }
176 }
177}
178
179impl core::ops::BitXor for i32x8 {
180 type Output = Self;
181
182 #[inline]
183 fn bitxor(self, rhs: Self) -> Self::Output {
184 cfg_if::cfg_if! {
185 if #[cfg(all(feature = "simd", target_feature = "avx2"))] {
186 Self(unsafe { _mm256_xor_si256(self.0, rhs.0) })
187 } else {
188 Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
189 }
190 }
191 }
192}
193