| 1 | #![cfg_attr ( |
| 2 | feature = "as_crate" , |
| 3 | feature(core_intrinsics), |
| 4 | feature(portable_simd), |
| 5 | allow(internal_features) |
| 6 | )] |
| 7 | #[cfg (not(feature = "as_crate" ))] |
| 8 | use core::simd; |
| 9 | #[cfg (feature = "as_crate" )] |
| 10 | use core_simd::simd; |
| 11 | |
| 12 | use core::intrinsics::simd as intrinsics; |
| 13 | |
| 14 | use simd::{LaneCount, Simd, SupportedLaneCount}; |
| 15 | |
| 16 | #[cfg (feature = "as_crate" )] |
| 17 | mod experimental { |
| 18 | pub trait Sealed {} |
| 19 | } |
| 20 | |
| 21 | #[cfg (feature = "as_crate" )] |
| 22 | use experimental as sealed; |
| 23 | |
| 24 | use crate::sealed::Sealed; |
| 25 | |
| 26 | /// This trait provides a possibly-temporary implementation of float functions |
| 27 | /// that may, in the absence of hardware support, canonicalize to calling an |
| 28 | /// operating system's `math.h` dynamically-loaded library (also known as a |
| 29 | /// shared object). As these conditionally require runtime support, they |
| 30 | /// should only appear in binaries built assuming OS support: `std`. |
| 31 | /// |
| 32 | /// However, there is no reason SIMD types, in general, need OS support, |
| 33 | /// as for many architectures an embedded binary may simply configure that |
| 34 | /// support itself. This means these types must be visible in `core` |
| 35 | /// but have these functions available in `std`. |
| 36 | /// |
| 37 | /// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but |
| 38 | /// due to compiler limitations, it is harder to implement this approach for |
| 39 | /// abstract data types like [`Simd`]. From that need, this trait is born. |
| 40 | /// |
| 41 | /// It is possible this trait will be replaced in some manner in the future, |
| 42 | /// when either the compiler or its supporting runtime functions are improved. |
| 43 | /// For now this trait is available to permit experimentation with SIMD float |
| 44 | /// operations that may lack hardware support, such as `mul_add`. |
| 45 | pub trait StdFloat: Sealed + Sized { |
| 46 | /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error, |
| 47 | /// yielding a more accurate result than an unfused multiply-add. |
| 48 | /// |
| 49 | /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target |
| 50 | /// architecture has a dedicated `fma` CPU instruction. However, this is not always |
| 51 | /// true, and will be heavily dependent on designing algorithms with specific target |
| 52 | /// hardware in mind. |
| 53 | #[inline ] |
| 54 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 55 | fn mul_add(self, a: Self, b: Self) -> Self { |
| 56 | unsafe { intrinsics::simd_fma(self, a, b) } |
| 57 | } |
| 58 | |
| 59 | /// Produces a vector where every element has the square root value |
| 60 | /// of the equivalently-indexed element in `self` |
| 61 | #[inline ] |
| 62 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 63 | fn sqrt(self) -> Self { |
| 64 | unsafe { intrinsics::simd_fsqrt(self) } |
| 65 | } |
| 66 | |
| 67 | /// Produces a vector where every element has the sine of the value |
| 68 | /// in the equivalently-indexed element in `self`. |
| 69 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 70 | fn sin(self) -> Self; |
| 71 | |
| 72 | /// Produces a vector where every element has the cosine of the value |
| 73 | /// in the equivalently-indexed element in `self`. |
| 74 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 75 | fn cos(self) -> Self; |
| 76 | |
| 77 | /// Produces a vector where every element has the exponential (base e) of the value |
| 78 | /// in the equivalently-indexed element in `self`. |
| 79 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 80 | fn exp(self) -> Self; |
| 81 | |
| 82 | /// Produces a vector where every element has the exponential (base 2) of the value |
| 83 | /// in the equivalently-indexed element in `self`. |
| 84 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 85 | fn exp2(self) -> Self; |
| 86 | |
| 87 | /// Produces a vector where every element has the natural logarithm of the value |
| 88 | /// in the equivalently-indexed element in `self`. |
| 89 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 90 | fn ln(self) -> Self; |
| 91 | |
| 92 | /// Produces a vector where every element has the logarithm with respect to an arbitrary |
| 93 | /// in the equivalently-indexed elements in `self` and `base`. |
| 94 | #[inline ] |
| 95 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 96 | fn log(self, base: Self) -> Self { |
| 97 | unsafe { intrinsics::simd_div(self.ln(), base.ln()) } |
| 98 | } |
| 99 | |
| 100 | /// Produces a vector where every element has the base-2 logarithm of the value |
| 101 | /// in the equivalently-indexed element in `self`. |
| 102 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 103 | fn log2(self) -> Self; |
| 104 | |
| 105 | /// Produces a vector where every element has the base-10 logarithm of the value |
| 106 | /// in the equivalently-indexed element in `self`. |
| 107 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 108 | fn log10(self) -> Self; |
| 109 | |
| 110 | /// Returns the smallest integer greater than or equal to each element. |
| 111 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 112 | #[inline ] |
| 113 | fn ceil(self) -> Self { |
| 114 | unsafe { intrinsics::simd_ceil(self) } |
| 115 | } |
| 116 | |
| 117 | /// Returns the largest integer value less than or equal to each element. |
| 118 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 119 | #[inline ] |
| 120 | fn floor(self) -> Self { |
| 121 | unsafe { intrinsics::simd_floor(self) } |
| 122 | } |
| 123 | |
| 124 | /// Rounds to the nearest integer value. Ties round toward zero. |
| 125 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 126 | #[inline ] |
| 127 | fn round(self) -> Self { |
| 128 | unsafe { intrinsics::simd_round(self) } |
| 129 | } |
| 130 | |
| 131 | /// Returns the floating point's integer value, with its fractional part removed. |
| 132 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 133 | #[inline ] |
| 134 | fn trunc(self) -> Self { |
| 135 | unsafe { intrinsics::simd_trunc(self) } |
| 136 | } |
| 137 | |
| 138 | /// Returns the floating point's fractional value, with its integer part removed. |
| 139 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
| 140 | fn fract(self) -> Self; |
| 141 | } |
| 142 | |
| 143 | impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {} |
| 144 | impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {} |
| 145 | |
| 146 | macro_rules! impl_float { |
| 147 | { |
| 148 | $($fn:ident: $intrinsic:ident,)* |
| 149 | } => { |
| 150 | impl<const N: usize> StdFloat for Simd<f32, N> |
| 151 | where |
| 152 | LaneCount<N>: SupportedLaneCount, |
| 153 | { |
| 154 | #[inline] |
| 155 | fn fract(self) -> Self { |
| 156 | self - self.trunc() |
| 157 | } |
| 158 | |
| 159 | $( |
| 160 | #[inline] |
| 161 | fn $fn(self) -> Self { |
| 162 | unsafe { intrinsics::$intrinsic(self) } |
| 163 | } |
| 164 | )* |
| 165 | } |
| 166 | |
| 167 | impl<const N: usize> StdFloat for Simd<f64, N> |
| 168 | where |
| 169 | LaneCount<N>: SupportedLaneCount, |
| 170 | { |
| 171 | #[inline] |
| 172 | fn fract(self) -> Self { |
| 173 | self - self.trunc() |
| 174 | } |
| 175 | |
| 176 | $( |
| 177 | #[inline] |
| 178 | fn $fn(self) -> Self { |
| 179 | // https://github.com/llvm/llvm-project/issues/83729 |
| 180 | #[cfg(target_arch = "aarch64" )] |
| 181 | { |
| 182 | let mut ln = Self::splat(0f64); |
| 183 | for i in 0..N { |
| 184 | ln[i] = self[i].$fn() |
| 185 | } |
| 186 | ln |
| 187 | } |
| 188 | |
| 189 | #[cfg(not(target_arch = "aarch64" ))] |
| 190 | { |
| 191 | unsafe { intrinsics::$intrinsic(self) } |
| 192 | } |
| 193 | } |
| 194 | )* |
| 195 | } |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | impl_float! { |
| 200 | sin: simd_fsin, |
| 201 | cos: simd_fcos, |
| 202 | exp: simd_fexp, |
| 203 | exp2: simd_fexp2, |
| 204 | ln: simd_flog, |
| 205 | log2: simd_flog2, |
| 206 | log10: simd_flog10, |
| 207 | } |
| 208 | |