1 | #![cfg_attr ( |
2 | feature = "as_crate" , |
3 | feature(core_intrinsics), |
4 | feature(portable_simd), |
5 | allow(internal_features) |
6 | )] |
7 | #[cfg (not(feature = "as_crate" ))] |
8 | use core::simd; |
9 | #[cfg (feature = "as_crate" )] |
10 | use core_simd::simd; |
11 | |
12 | use core::intrinsics::simd as intrinsics; |
13 | |
14 | use simd::{LaneCount, Simd, SupportedLaneCount}; |
15 | |
16 | #[cfg (feature = "as_crate" )] |
17 | mod experimental { |
18 | pub trait Sealed {} |
19 | } |
20 | |
21 | #[cfg (feature = "as_crate" )] |
22 | use experimental as sealed; |
23 | |
24 | use crate::sealed::Sealed; |
25 | |
26 | /// This trait provides a possibly-temporary implementation of float functions |
27 | /// that may, in the absence of hardware support, canonicalize to calling an |
28 | /// operating system's `math.h` dynamically-loaded library (also known as a |
29 | /// shared object). As these conditionally require runtime support, they |
30 | /// should only appear in binaries built assuming OS support: `std`. |
31 | /// |
32 | /// However, there is no reason SIMD types, in general, need OS support, |
33 | /// as for many architectures an embedded binary may simply configure that |
34 | /// support itself. This means these types must be visible in `core` |
35 | /// but have these functions available in `std`. |
36 | /// |
37 | /// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but |
38 | /// due to compiler limitations, it is harder to implement this approach for |
39 | /// abstract data types like [`Simd`]. From that need, this trait is born. |
40 | /// |
41 | /// It is possible this trait will be replaced in some manner in the future, |
42 | /// when either the compiler or its supporting runtime functions are improved. |
43 | /// For now this trait is available to permit experimentation with SIMD float |
44 | /// operations that may lack hardware support, such as `mul_add`. |
45 | pub trait StdFloat: Sealed + Sized { |
46 | /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error, |
47 | /// yielding a more accurate result than an unfused multiply-add. |
48 | /// |
49 | /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target |
50 | /// architecture has a dedicated `fma` CPU instruction. However, this is not always |
51 | /// true, and will be heavily dependent on designing algorithms with specific target |
52 | /// hardware in mind. |
53 | #[inline ] |
54 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
55 | fn mul_add(self, a: Self, b: Self) -> Self { |
56 | unsafe { intrinsics::simd_fma(self, a, b) } |
57 | } |
58 | |
59 | /// Produces a vector where every element has the square root value |
60 | /// of the equivalently-indexed element in `self` |
61 | #[inline ] |
62 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
63 | fn sqrt(self) -> Self { |
64 | unsafe { intrinsics::simd_fsqrt(self) } |
65 | } |
66 | |
67 | /// Produces a vector where every element has the sine of the value |
68 | /// in the equivalently-indexed element in `self`. |
69 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
70 | fn sin(self) -> Self; |
71 | |
72 | /// Produces a vector where every element has the cosine of the value |
73 | /// in the equivalently-indexed element in `self`. |
74 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
75 | fn cos(self) -> Self; |
76 | |
77 | /// Produces a vector where every element has the exponential (base e) of the value |
78 | /// in the equivalently-indexed element in `self`. |
79 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
80 | fn exp(self) -> Self; |
81 | |
82 | /// Produces a vector where every element has the exponential (base 2) of the value |
83 | /// in the equivalently-indexed element in `self`. |
84 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
85 | fn exp2(self) -> Self; |
86 | |
87 | /// Produces a vector where every element has the natural logarithm of the value |
88 | /// in the equivalently-indexed element in `self`. |
89 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
90 | fn ln(self) -> Self; |
91 | |
92 | /// Produces a vector where every element has the logarithm with respect to an arbitrary |
93 | /// in the equivalently-indexed elements in `self` and `base`. |
94 | #[inline ] |
95 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
96 | fn log(self, base: Self) -> Self { |
97 | unsafe { intrinsics::simd_div(self.ln(), base.ln()) } |
98 | } |
99 | |
100 | /// Produces a vector where every element has the base-2 logarithm of the value |
101 | /// in the equivalently-indexed element in `self`. |
102 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
103 | fn log2(self) -> Self; |
104 | |
105 | /// Produces a vector where every element has the base-10 logarithm of the value |
106 | /// in the equivalently-indexed element in `self`. |
107 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
108 | fn log10(self) -> Self; |
109 | |
110 | /// Returns the smallest integer greater than or equal to each element. |
111 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
112 | #[inline ] |
113 | fn ceil(self) -> Self { |
114 | unsafe { intrinsics::simd_ceil(self) } |
115 | } |
116 | |
117 | /// Returns the largest integer value less than or equal to each element. |
118 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
119 | #[inline ] |
120 | fn floor(self) -> Self { |
121 | unsafe { intrinsics::simd_floor(self) } |
122 | } |
123 | |
124 | /// Rounds to the nearest integer value. Ties round toward zero. |
125 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
126 | #[inline ] |
127 | fn round(self) -> Self { |
128 | unsafe { intrinsics::simd_round(self) } |
129 | } |
130 | |
131 | /// Returns the floating point's integer value, with its fractional part removed. |
132 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
133 | #[inline ] |
134 | fn trunc(self) -> Self { |
135 | unsafe { intrinsics::simd_trunc(self) } |
136 | } |
137 | |
138 | /// Returns the floating point's fractional value, with its integer part removed. |
139 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
140 | fn fract(self) -> Self; |
141 | } |
142 | |
143 | impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {} |
144 | impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {} |
145 | |
146 | macro_rules! impl_float { |
147 | { |
148 | $($fn:ident: $intrinsic:ident,)* |
149 | } => { |
150 | impl<const N: usize> StdFloat for Simd<f32, N> |
151 | where |
152 | LaneCount<N>: SupportedLaneCount, |
153 | { |
154 | #[inline] |
155 | fn fract(self) -> Self { |
156 | self - self.trunc() |
157 | } |
158 | |
159 | $( |
160 | #[inline] |
161 | fn $fn(self) -> Self { |
162 | unsafe { intrinsics::$intrinsic(self) } |
163 | } |
164 | )* |
165 | } |
166 | |
167 | impl<const N: usize> StdFloat for Simd<f64, N> |
168 | where |
169 | LaneCount<N>: SupportedLaneCount, |
170 | { |
171 | #[inline] |
172 | fn fract(self) -> Self { |
173 | self - self.trunc() |
174 | } |
175 | |
176 | $( |
177 | #[inline] |
178 | fn $fn(self) -> Self { |
179 | // https://github.com/llvm/llvm-project/issues/83729 |
180 | #[cfg(target_arch = "aarch64" )] |
181 | { |
182 | let mut ln = Self::splat(0f64); |
183 | for i in 0..N { |
184 | ln[i] = self[i].$fn() |
185 | } |
186 | ln |
187 | } |
188 | |
189 | #[cfg(not(target_arch = "aarch64" ))] |
190 | { |
191 | unsafe { intrinsics::$intrinsic(self) } |
192 | } |
193 | } |
194 | )* |
195 | } |
196 | } |
197 | } |
198 | |
199 | impl_float! { |
200 | sin: simd_fsin, |
201 | cos: simd_fcos, |
202 | exp: simd_fexp, |
203 | exp2: simd_fexp2, |
204 | ln: simd_flog, |
205 | log2: simd_flog2, |
206 | log10: simd_flog10, |
207 | } |
208 | |