1 | use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount}; |
2 | use core::ops::{Add, Mul}; |
3 | use core::ops::{BitAnd, BitOr, BitXor}; |
4 | use core::ops::{Div, Rem, Sub}; |
5 | use core::ops::{Shl, Shr}; |
6 | |
7 | mod assign; |
8 | mod deref; |
9 | mod shift_scalar; |
10 | mod unary; |
11 | |
12 | impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N> |
13 | where |
14 | T: SimdElement, |
15 | LaneCount<N>: SupportedLaneCount, |
16 | I: core::slice::SliceIndex<[T]>, |
17 | { |
18 | type Output = I::Output; |
19 | #[inline ] |
20 | fn index(&self, index: I) -> &Self::Output { |
21 | &self.as_array()[index] |
22 | } |
23 | } |
24 | |
25 | impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N> |
26 | where |
27 | T: SimdElement, |
28 | LaneCount<N>: SupportedLaneCount, |
29 | I: core::slice::SliceIndex<[T]>, |
30 | { |
31 | #[inline ] |
32 | fn index_mut(&mut self, index: I) -> &mut Self::Output { |
33 | &mut self.as_mut_array()[index] |
34 | } |
35 | } |
36 | |
37 | macro_rules! unsafe_base { |
38 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { |
39 | // Safety: $lhs and $rhs are vectors |
40 | unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) } |
41 | }; |
42 | } |
43 | |
44 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. |
45 | /// It handles performing a bitand in addition to calling the shift operator, so that the result |
46 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS` |
47 | /// At worst, this will maybe add another instruction and cycle, |
48 | /// at best, it may open up more optimization opportunities, |
49 | /// or simply be elided entirely, especially for SIMD ISAs which default to this. |
50 | /// |
51 | // FIXME: Consider implementing this in cg_llvm instead? |
52 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping |
53 | macro_rules! wrap_bitshift { |
54 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { |
55 | #[allow(clippy::suspicious_arithmetic_impl)] |
56 | // Safety: $lhs and the bitand result are vectors |
57 | unsafe { |
58 | core::intrinsics::simd::$simd_call( |
59 | $lhs, |
60 | $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), |
61 | ) |
62 | } |
63 | }; |
64 | } |
65 | |
66 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. |
67 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects, |
68 | /// thus guaranteeing a Rust value returns instead. |
69 | /// |
70 | /// | | LLVM | Rust |
71 | /// | :--------------: | :--- | :---------- |
72 | /// | N {/,%} 0 | UB | panic!() |
73 | /// | <$int>::MIN / -1 | UB | <$int>::MIN |
74 | /// | <$int>::MIN % -1 | UB | 0 |
75 | /// |
76 | macro_rules! int_divrem_guard { |
77 | ( $lhs:ident, |
78 | $rhs:ident, |
79 | { const PANIC_ZERO: &'static str = $zero:literal; |
80 | $simd_call:ident |
81 | }, |
82 | $int:ident ) => { |
83 | if $rhs.simd_eq(Simd::splat(0 as _)).any() { |
84 | panic!($zero); |
85 | } else { |
86 | // Prevent otherwise-UB overflow on the MIN / -1 case. |
87 | let rhs = if <$int>::MIN != 0 { |
88 | // This should, at worst, optimize to a few branchless logical ops |
89 | // Ideally, this entire conditional should evaporate |
90 | // Fire LLVM and implement those manually if it doesn't get the hint |
91 | ($lhs.simd_eq(Simd::splat(<$int>::MIN)) |
92 | // type inference can break here, so cut an SInt to size |
93 | & $rhs.simd_eq(Simd::splat(-1i64 as _))) |
94 | .select(Simd::splat(1 as _), $rhs) |
95 | } else { |
96 | // Nice base case to make it easy to const-fold away the other branch. |
97 | $rhs |
98 | }; |
99 | // Safety: $lhs and rhs are vectors |
100 | unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } |
101 | } |
102 | }; |
103 | } |
104 | |
105 | macro_rules! for_base_types { |
106 | ( T = ($($scalar:ident),*); |
107 | type Lhs = Simd<T, N>; |
108 | type Rhs = Simd<T, N>; |
109 | type Output = $out:ty; |
110 | |
111 | impl $op:ident::$call:ident { |
112 | $macro_impl:ident $inner:tt |
113 | }) => { |
114 | $( |
115 | impl<const N: usize> $op<Self> for Simd<$scalar, N> |
116 | where |
117 | $scalar: SimdElement, |
118 | LaneCount<N>: SupportedLaneCount, |
119 | { |
120 | type Output = $out; |
121 | |
122 | #[inline] |
123 | #[must_use = "operator returns a new vector without mutating the inputs" ] |
124 | // TODO: only useful for int Div::div, but we hope that this |
125 | // will essentially always always get inlined anyway. |
126 | #[track_caller] |
127 | fn $call(self, rhs: Self) -> Self::Output { |
128 | $macro_impl!(self, rhs, $inner, $scalar) |
129 | } |
130 | } |
131 | )* |
132 | } |
133 | } |
134 | |
135 | // A "TokenTree muncher": takes a set of scalar types `T = {};` |
136 | // type parameters for the ops it implements, `Op::fn` names, |
137 | // and a macro that expands into an expr, substituting in an intrinsic. |
138 | // It passes that to for_base_types, which expands an impl for the types, |
139 | // using the expanded expr in the function, and recurses with itself. |
140 | // |
141 | // tl;dr impls a set of ops::{Traits} for a set of types |
142 | macro_rules! for_base_ops { |
143 | ( |
144 | T = $types:tt; |
145 | type Lhs = Simd<T, N>; |
146 | type Rhs = Simd<T, N>; |
147 | type Output = $out:ident; |
148 | impl $op:ident::$call:ident |
149 | $inner:tt |
150 | $($rest:tt)* |
151 | ) => { |
152 | for_base_types! { |
153 | T = $types; |
154 | type Lhs = Simd<T, N>; |
155 | type Rhs = Simd<T, N>; |
156 | type Output = $out; |
157 | impl $op::$call |
158 | $inner |
159 | } |
160 | for_base_ops! { |
161 | T = $types; |
162 | type Lhs = Simd<T, N>; |
163 | type Rhs = Simd<T, N>; |
164 | type Output = $out; |
165 | $($rest)* |
166 | } |
167 | }; |
168 | ($($done:tt)*) => { |
169 | // Done. |
170 | } |
171 | } |
172 | |
173 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor. |
174 | // For all of these operations, simd_* intrinsics apply wrapping logic. |
175 | for_base_ops! { |
176 | T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); |
177 | type Lhs = Simd<T, N>; |
178 | type Rhs = Simd<T, N>; |
179 | type Output = Self; |
180 | |
181 | impl Add::add { |
182 | unsafe_base { simd_add } |
183 | } |
184 | |
185 | impl Mul::mul { |
186 | unsafe_base { simd_mul } |
187 | } |
188 | |
189 | impl Sub::sub { |
190 | unsafe_base { simd_sub } |
191 | } |
192 | |
193 | impl BitAnd::bitand { |
194 | unsafe_base { simd_and } |
195 | } |
196 | |
197 | impl BitOr::bitor { |
198 | unsafe_base { simd_or } |
199 | } |
200 | |
201 | impl BitXor::bitxor { |
202 | unsafe_base { simd_xor } |
203 | } |
204 | |
205 | impl Div::div { |
206 | int_divrem_guard { |
207 | const PANIC_ZERO: &'static str = "attempt to divide by zero" ; |
208 | simd_div |
209 | } |
210 | } |
211 | |
212 | impl Rem::rem { |
213 | int_divrem_guard { |
214 | const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero" ; |
215 | simd_rem |
216 | } |
217 | } |
218 | |
219 | // The only question is how to handle shifts >= <Int>::BITS? |
220 | // Our current solution uses wrapping logic. |
221 | impl Shl::shl { |
222 | wrap_bitshift { simd_shl } |
223 | } |
224 | |
225 | impl Shr::shr { |
226 | wrap_bitshift { |
227 | // This automatically monomorphizes to lshr or ashr, depending, |
228 | // so it's fine to use it for both UInts and SInts. |
229 | simd_shr |
230 | } |
231 | } |
232 | } |
233 | |
234 | // We don't need any special precautions here: |
235 | // Floats always accept arithmetic ops, but may become NaN. |
236 | for_base_ops! { |
237 | T = (f32, f64); |
238 | type Lhs = Simd<T, N>; |
239 | type Rhs = Simd<T, N>; |
240 | type Output = Self; |
241 | |
242 | impl Add::add { |
243 | unsafe_base { simd_add } |
244 | } |
245 | |
246 | impl Mul::mul { |
247 | unsafe_base { simd_mul } |
248 | } |
249 | |
250 | impl Sub::sub { |
251 | unsafe_base { simd_sub } |
252 | } |
253 | |
254 | impl Div::div { |
255 | unsafe_base { simd_div } |
256 | } |
257 | |
258 | impl Rem::rem { |
259 | unsafe_base { simd_rem } |
260 | } |
261 | } |
262 | |