| 1 | use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount, cmp::SimdPartialEq}; |
| 2 | use core::ops::{Add, Mul}; |
| 3 | use core::ops::{BitAnd, BitOr, BitXor}; |
| 4 | use core::ops::{Div, Rem, Sub}; |
| 5 | use core::ops::{Shl, Shr}; |
| 6 | |
| 7 | mod assign; |
| 8 | mod deref; |
| 9 | mod shift_scalar; |
| 10 | mod unary; |
| 11 | |
| 12 | impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N> |
| 13 | where |
| 14 | T: SimdElement, |
| 15 | LaneCount<N>: SupportedLaneCount, |
| 16 | I: core::slice::SliceIndex<[T]>, |
| 17 | { |
| 18 | type Output = I::Output; |
| 19 | #[inline ] |
| 20 | fn index(&self, index: I) -> &Self::Output { |
| 21 | &self.as_array()[index] |
| 22 | } |
| 23 | } |
| 24 | |
| 25 | impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N> |
| 26 | where |
| 27 | T: SimdElement, |
| 28 | LaneCount<N>: SupportedLaneCount, |
| 29 | I: core::slice::SliceIndex<[T]>, |
| 30 | { |
| 31 | #[inline ] |
| 32 | fn index_mut(&mut self, index: I) -> &mut Self::Output { |
| 33 | &mut self.as_mut_array()[index] |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | macro_rules! unsafe_base { |
| 38 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { |
| 39 | // Safety: $lhs and $rhs are vectors |
| 40 | unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) } |
| 41 | }; |
| 42 | } |
| 43 | |
| 44 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. |
| 45 | /// It handles performing a bitand in addition to calling the shift operator, so that the result |
| 46 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS` |
| 47 | /// At worst, this will maybe add another instruction and cycle, |
| 48 | /// at best, it may open up more optimization opportunities, |
| 49 | /// or simply be elided entirely, especially for SIMD ISAs which default to this. |
| 50 | /// |
| 51 | // FIXME: Consider implementing this in cg_llvm instead? |
| 52 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping |
| 53 | macro_rules! wrap_bitshift { |
| 54 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { |
| 55 | #[allow(clippy::suspicious_arithmetic_impl)] |
| 56 | // Safety: $lhs and the bitand result are vectors |
| 57 | unsafe { |
| 58 | core::intrinsics::simd::$simd_call( |
| 59 | $lhs, |
| 60 | $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), |
| 61 | ) |
| 62 | } |
| 63 | }; |
| 64 | } |
| 65 | |
| 66 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. |
| 67 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects, |
| 68 | /// thus guaranteeing a Rust value returns instead. |
| 69 | /// |
| 70 | /// | | LLVM | Rust |
| 71 | /// | :--------------: | :--- | :---------- |
| 72 | /// | N {/,%} 0 | UB | panic!() |
| 73 | /// | <$int>::MIN / -1 | UB | <$int>::MIN |
| 74 | /// | <$int>::MIN % -1 | UB | 0 |
| 75 | /// |
| 76 | macro_rules! int_divrem_guard { |
| 77 | ( $lhs:ident, |
| 78 | $rhs:ident, |
| 79 | { const PANIC_ZERO: &'static str = $zero:literal; |
| 80 | $simd_call:ident, $op:tt |
| 81 | }, |
| 82 | $int:ident ) => { |
| 83 | if $rhs.simd_eq(Simd::splat(0 as _)).any() { |
| 84 | panic!($zero); |
| 85 | } else { |
| 86 | // Prevent otherwise-UB overflow on the MIN / -1 case. |
| 87 | let rhs = if <$int>::MIN != 0 { |
| 88 | // This should, at worst, optimize to a few branchless logical ops |
| 89 | // Ideally, this entire conditional should evaporate |
| 90 | // Fire LLVM and implement those manually if it doesn't get the hint |
| 91 | ($lhs.simd_eq(Simd::splat(<$int>::MIN)) |
| 92 | // type inference can break here, so cut an SInt to size |
| 93 | & $rhs.simd_eq(Simd::splat(-1i64 as _))) |
| 94 | .select(Simd::splat(1 as _), $rhs) |
| 95 | } else { |
| 96 | // Nice base case to make it easy to const-fold away the other branch. |
| 97 | $rhs |
| 98 | }; |
| 99 | |
| 100 | // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two |
| 101 | // these operations aren't vectorized on aarch64 anyway |
| 102 | #[cfg(target_arch = "aarch64" )] |
| 103 | { |
| 104 | let mut out = Simd::splat(0 as _); |
| 105 | for i in 0..Self::LEN { |
| 106 | out[i] = $lhs[i] $op rhs[i]; |
| 107 | } |
| 108 | out |
| 109 | } |
| 110 | |
| 111 | #[cfg(not(target_arch = "aarch64" ))] |
| 112 | { |
| 113 | // Safety: $lhs and rhs are vectors |
| 114 | unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } |
| 115 | } |
| 116 | } |
| 117 | }; |
| 118 | } |
| 119 | |
| 120 | macro_rules! for_base_types { |
| 121 | ( T = ($($scalar:ident),*); |
| 122 | type Lhs = Simd<T, N>; |
| 123 | type Rhs = Simd<T, N>; |
| 124 | type Output = $out:ty; |
| 125 | |
| 126 | impl $op:ident::$call:ident { |
| 127 | $macro_impl:ident $inner:tt |
| 128 | }) => { |
| 129 | $( |
| 130 | impl<const N: usize> $op<Self> for Simd<$scalar, N> |
| 131 | where |
| 132 | $scalar: SimdElement, |
| 133 | LaneCount<N>: SupportedLaneCount, |
| 134 | { |
| 135 | type Output = $out; |
| 136 | |
| 137 | #[inline] |
| 138 | // TODO: only useful for int Div::div, but we hope that this |
| 139 | // will essentially always get inlined anyway. |
| 140 | #[track_caller] |
| 141 | fn $call(self, rhs: Self) -> Self::Output { |
| 142 | $macro_impl!(self, rhs, $inner, $scalar) |
| 143 | } |
| 144 | } |
| 145 | )* |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | // A "TokenTree muncher": takes a set of scalar types `T = {};` |
| 150 | // type parameters for the ops it implements, `Op::fn` names, |
| 151 | // and a macro that expands into an expr, substituting in an intrinsic. |
| 152 | // It passes that to for_base_types, which expands an impl for the types, |
| 153 | // using the expanded expr in the function, and recurses with itself. |
| 154 | // |
| 155 | // tl;dr impls a set of ops::{Traits} for a set of types |
| 156 | macro_rules! for_base_ops { |
| 157 | ( |
| 158 | T = $types:tt; |
| 159 | type Lhs = Simd<T, N>; |
| 160 | type Rhs = Simd<T, N>; |
| 161 | type Output = $out:ident; |
| 162 | impl $op:ident::$call:ident |
| 163 | $inner:tt |
| 164 | $($rest:tt)* |
| 165 | ) => { |
| 166 | for_base_types! { |
| 167 | T = $types; |
| 168 | type Lhs = Simd<T, N>; |
| 169 | type Rhs = Simd<T, N>; |
| 170 | type Output = $out; |
| 171 | impl $op::$call |
| 172 | $inner |
| 173 | } |
| 174 | for_base_ops! { |
| 175 | T = $types; |
| 176 | type Lhs = Simd<T, N>; |
| 177 | type Rhs = Simd<T, N>; |
| 178 | type Output = $out; |
| 179 | $($rest)* |
| 180 | } |
| 181 | }; |
| 182 | ($($done:tt)*) => { |
| 183 | // Done. |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor. |
| 188 | // For all of these operations, simd_* intrinsics apply wrapping logic. |
| 189 | for_base_ops! { |
| 190 | T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); |
| 191 | type Lhs = Simd<T, N>; |
| 192 | type Rhs = Simd<T, N>; |
| 193 | type Output = Self; |
| 194 | |
| 195 | impl Add::add { |
| 196 | unsafe_base { simd_add } |
| 197 | } |
| 198 | |
| 199 | impl Mul::mul { |
| 200 | unsafe_base { simd_mul } |
| 201 | } |
| 202 | |
| 203 | impl Sub::sub { |
| 204 | unsafe_base { simd_sub } |
| 205 | } |
| 206 | |
| 207 | impl BitAnd::bitand { |
| 208 | unsafe_base { simd_and } |
| 209 | } |
| 210 | |
| 211 | impl BitOr::bitor { |
| 212 | unsafe_base { simd_or } |
| 213 | } |
| 214 | |
| 215 | impl BitXor::bitxor { |
| 216 | unsafe_base { simd_xor } |
| 217 | } |
| 218 | |
| 219 | impl Div::div { |
| 220 | int_divrem_guard { |
| 221 | const PANIC_ZERO: &'static str = "attempt to divide by zero" ; |
| 222 | simd_div, / |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | impl Rem::rem { |
| 227 | int_divrem_guard { |
| 228 | const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero" ; |
| 229 | simd_rem, % |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | // The only question is how to handle shifts >= <Int>::BITS? |
| 234 | // Our current solution uses wrapping logic. |
| 235 | impl Shl::shl { |
| 236 | wrap_bitshift { simd_shl } |
| 237 | } |
| 238 | |
| 239 | impl Shr::shr { |
| 240 | wrap_bitshift { |
| 241 | // This automatically monomorphizes to lshr or ashr, depending, |
| 242 | // so it's fine to use it for both UInts and SInts. |
| 243 | simd_shr |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | // We don't need any special precautions here: |
| 249 | // Floats always accept arithmetic ops, but may become NaN. |
| 250 | for_base_ops! { |
| 251 | T = (f32, f64); |
| 252 | type Lhs = Simd<T, N>; |
| 253 | type Rhs = Simd<T, N>; |
| 254 | type Output = Self; |
| 255 | |
| 256 | impl Add::add { |
| 257 | unsafe_base { simd_add } |
| 258 | } |
| 259 | |
| 260 | impl Mul::mul { |
| 261 | unsafe_base { simd_mul } |
| 262 | } |
| 263 | |
| 264 | impl Sub::sub { |
| 265 | unsafe_base { simd_sub } |
| 266 | } |
| 267 | |
| 268 | impl Div::div { |
| 269 | unsafe_base { simd_div } |
| 270 | } |
| 271 | |
| 272 | impl Rem::rem { |
| 273 | unsafe_base { simd_rem } |
| 274 | } |
| 275 | } |
| 276 | |