1 | use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq}; |
2 | use core::ops::{Add, Mul}; |
3 | use core::ops::{BitAnd, BitOr, BitXor}; |
4 | use core::ops::{Div, Rem, Sub}; |
5 | use core::ops::{Shl, Shr}; |
6 | |
7 | mod assign; |
8 | mod deref; |
9 | mod shift_scalar; |
10 | mod unary; |
11 | |
12 | impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N> |
13 | where |
14 | T: SimdElement, |
15 | LaneCount<N>: SupportedLaneCount, |
16 | I: core::slice::SliceIndex<[T]>, |
17 | { |
18 | type Output = I::Output; |
19 | #[inline ] |
20 | fn index(&self, index: I) -> &Self::Output { |
21 | &self.as_array()[index] |
22 | } |
23 | } |
24 | |
25 | impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N> |
26 | where |
27 | T: SimdElement, |
28 | LaneCount<N>: SupportedLaneCount, |
29 | I: core::slice::SliceIndex<[T]>, |
30 | { |
31 | #[inline ] |
32 | fn index_mut(&mut self, index: I) -> &mut Self::Output { |
33 | &mut self.as_mut_array()[index] |
34 | } |
35 | } |
36 | |
37 | macro_rules! unsafe_base { |
38 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { |
39 | // Safety: $lhs and $rhs are vectors |
40 | unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) } |
41 | }; |
42 | } |
43 | |
44 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. |
45 | /// It handles performing a bitand in addition to calling the shift operator, so that the result |
46 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS` |
47 | /// At worst, this will maybe add another instruction and cycle, |
48 | /// at best, it may open up more optimization opportunities, |
49 | /// or simply be elided entirely, especially for SIMD ISAs which default to this. |
50 | /// |
51 | // FIXME: Consider implementing this in cg_llvm instead? |
52 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping |
53 | macro_rules! wrap_bitshift { |
54 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { |
55 | #[allow(clippy::suspicious_arithmetic_impl)] |
56 | // Safety: $lhs and the bitand result are vectors |
57 | unsafe { |
58 | core::intrinsics::simd::$simd_call( |
59 | $lhs, |
60 | $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), |
61 | ) |
62 | } |
63 | }; |
64 | } |
65 | |
66 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. |
67 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects, |
68 | /// thus guaranteeing a Rust value returns instead. |
69 | /// |
70 | /// | | LLVM | Rust |
71 | /// | :--------------: | :--- | :---------- |
72 | /// | N {/,%} 0 | UB | panic!() |
73 | /// | <$int>::MIN / -1 | UB | <$int>::MIN |
74 | /// | <$int>::MIN % -1 | UB | 0 |
75 | /// |
76 | macro_rules! int_divrem_guard { |
77 | ( $lhs:ident, |
78 | $rhs:ident, |
79 | { const PANIC_ZERO: &'static str = $zero:literal; |
80 | $simd_call:ident, $op:tt |
81 | }, |
82 | $int:ident ) => { |
83 | if $rhs.simd_eq(Simd::splat(0 as _)).any() { |
84 | panic!($zero); |
85 | } else { |
86 | // Prevent otherwise-UB overflow on the MIN / -1 case. |
87 | let rhs = if <$int>::MIN != 0 { |
88 | // This should, at worst, optimize to a few branchless logical ops |
89 | // Ideally, this entire conditional should evaporate |
90 | // Fire LLVM and implement those manually if it doesn't get the hint |
91 | ($lhs.simd_eq(Simd::splat(<$int>::MIN)) |
92 | // type inference can break here, so cut an SInt to size |
93 | & $rhs.simd_eq(Simd::splat(-1i64 as _))) |
94 | .select(Simd::splat(1 as _), $rhs) |
95 | } else { |
96 | // Nice base case to make it easy to const-fold away the other branch. |
97 | $rhs |
98 | }; |
99 | |
100 | // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two |
101 | // these operations aren't vectorized on aarch64 anyway |
102 | #[cfg(target_arch = "aarch64" )] |
103 | { |
104 | let mut out = Simd::splat(0 as _); |
105 | for i in 0..Self::LEN { |
106 | out[i] = $lhs[i] $op rhs[i]; |
107 | } |
108 | out |
109 | } |
110 | |
111 | #[cfg(not(target_arch = "aarch64" ))] |
112 | { |
113 | // Safety: $lhs and rhs are vectors |
114 | unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } |
115 | } |
116 | } |
117 | }; |
118 | } |
119 | |
120 | macro_rules! for_base_types { |
121 | ( T = ($($scalar:ident),*); |
122 | type Lhs = Simd<T, N>; |
123 | type Rhs = Simd<T, N>; |
124 | type Output = $out:ty; |
125 | |
126 | impl $op:ident::$call:ident { |
127 | $macro_impl:ident $inner:tt |
128 | }) => { |
129 | $( |
130 | impl<const N: usize> $op<Self> for Simd<$scalar, N> |
131 | where |
132 | $scalar: SimdElement, |
133 | LaneCount<N>: SupportedLaneCount, |
134 | { |
135 | type Output = $out; |
136 | |
137 | #[inline] |
138 | // TODO: only useful for int Div::div, but we hope that this |
139 | // will essentially always get inlined anyway. |
140 | #[track_caller] |
141 | fn $call(self, rhs: Self) -> Self::Output { |
142 | $macro_impl!(self, rhs, $inner, $scalar) |
143 | } |
144 | } |
145 | )* |
146 | } |
147 | } |
148 | |
149 | // A "TokenTree muncher": takes a set of scalar types `T = {};` |
150 | // type parameters for the ops it implements, `Op::fn` names, |
151 | // and a macro that expands into an expr, substituting in an intrinsic. |
152 | // It passes that to for_base_types, which expands an impl for the types, |
153 | // using the expanded expr in the function, and recurses with itself. |
154 | // |
155 | // tl;dr impls a set of ops::{Traits} for a set of types |
156 | macro_rules! for_base_ops { |
157 | ( |
158 | T = $types:tt; |
159 | type Lhs = Simd<T, N>; |
160 | type Rhs = Simd<T, N>; |
161 | type Output = $out:ident; |
162 | impl $op:ident::$call:ident |
163 | $inner:tt |
164 | $($rest:tt)* |
165 | ) => { |
166 | for_base_types! { |
167 | T = $types; |
168 | type Lhs = Simd<T, N>; |
169 | type Rhs = Simd<T, N>; |
170 | type Output = $out; |
171 | impl $op::$call |
172 | $inner |
173 | } |
174 | for_base_ops! { |
175 | T = $types; |
176 | type Lhs = Simd<T, N>; |
177 | type Rhs = Simd<T, N>; |
178 | type Output = $out; |
179 | $($rest)* |
180 | } |
181 | }; |
182 | ($($done:tt)*) => { |
183 | // Done. |
184 | } |
185 | } |
186 | |
187 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor. |
188 | // For all of these operations, simd_* intrinsics apply wrapping logic. |
189 | for_base_ops! { |
190 | T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); |
191 | type Lhs = Simd<T, N>; |
192 | type Rhs = Simd<T, N>; |
193 | type Output = Self; |
194 | |
195 | impl Add::add { |
196 | unsafe_base { simd_add } |
197 | } |
198 | |
199 | impl Mul::mul { |
200 | unsafe_base { simd_mul } |
201 | } |
202 | |
203 | impl Sub::sub { |
204 | unsafe_base { simd_sub } |
205 | } |
206 | |
207 | impl BitAnd::bitand { |
208 | unsafe_base { simd_and } |
209 | } |
210 | |
211 | impl BitOr::bitor { |
212 | unsafe_base { simd_or } |
213 | } |
214 | |
215 | impl BitXor::bitxor { |
216 | unsafe_base { simd_xor } |
217 | } |
218 | |
219 | impl Div::div { |
220 | int_divrem_guard { |
221 | const PANIC_ZERO: &'static str = "attempt to divide by zero" ; |
222 | simd_div, / |
223 | } |
224 | } |
225 | |
226 | impl Rem::rem { |
227 | int_divrem_guard { |
228 | const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero" ; |
229 | simd_rem, % |
230 | } |
231 | } |
232 | |
233 | // The only question is how to handle shifts >= <Int>::BITS? |
234 | // Our current solution uses wrapping logic. |
235 | impl Shl::shl { |
236 | wrap_bitshift { simd_shl } |
237 | } |
238 | |
239 | impl Shr::shr { |
240 | wrap_bitshift { |
241 | // This automatically monomorphizes to lshr or ashr, depending, |
242 | // so it's fine to use it for both UInts and SInts. |
243 | simd_shr |
244 | } |
245 | } |
246 | } |
247 | |
248 | // We don't need any special precautions here: |
249 | // Floats always accept arithmetic ops, but may become NaN. |
250 | for_base_ops! { |
251 | T = (f32, f64); |
252 | type Lhs = Simd<T, N>; |
253 | type Rhs = Simd<T, N>; |
254 | type Output = Self; |
255 | |
256 | impl Add::add { |
257 | unsafe_base { simd_add } |
258 | } |
259 | |
260 | impl Mul::mul { |
261 | unsafe_base { simd_mul } |
262 | } |
263 | |
264 | impl Sub::sub { |
265 | unsafe_base { simd_sub } |
266 | } |
267 | |
268 | impl Div::div { |
269 | unsafe_base { simd_div } |
270 | } |
271 | |
272 | impl Rem::rem { |
273 | unsafe_base { simd_rem } |
274 | } |
275 | } |
276 | |