1#![cfg_attr(
2 feature = "as_crate",
3 feature(core_intrinsics),
4 feature(portable_simd),
5 allow(internal_features)
6)]
7#[cfg(not(feature = "as_crate"))]
8use core::simd;
9#[cfg(feature = "as_crate")]
10use core_simd::simd;
11
12use core::intrinsics::simd as intrinsics;
13
14use simd::{LaneCount, Simd, SupportedLaneCount};
15
16#[cfg(feature = "as_crate")]
17mod experimental {
18 pub trait Sealed {}
19}
20
21#[cfg(feature = "as_crate")]
22use experimental as sealed;
23
24use crate::sealed::Sealed;
25
26/// This trait provides a possibly-temporary implementation of float functions
27/// that may, in the absence of hardware support, canonicalize to calling an
28/// operating system's `math.h` dynamically-loaded library (also known as a
29/// shared object). As these conditionally require runtime support, they
30/// should only appear in binaries built assuming OS support: `std`.
31///
32/// However, there is no reason SIMD types, in general, need OS support,
33/// as for many architectures an embedded binary may simply configure that
34/// support itself. This means these types must be visible in `core`
35/// but have these functions available in `std`.
36///
37/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but
38/// due to compiler limitations, it is harder to implement this approach for
39/// abstract data types like [`Simd`]. From that need, this trait is born.
40///
41/// It is possible this trait will be replaced in some manner in the future,
42/// when either the compiler or its supporting runtime functions are improved.
43/// For now this trait is available to permit experimentation with SIMD float
44/// operations that may lack hardware support, such as `mul_add`.
45pub trait StdFloat: Sealed + Sized {
46 /// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error,
47 /// yielding a more accurate result than an unfused multiply-add.
48 ///
49 /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
50 /// architecture has a dedicated `fma` CPU instruction. However, this is not always
51 /// true, and will be heavily dependent on designing algorithms with specific target
52 /// hardware in mind.
53 #[inline]
54 #[must_use = "method returns a new vector and does not mutate the original value"]
55 fn mul_add(self, a: Self, b: Self) -> Self {
56 unsafe { intrinsics::simd_fma(self, a, b) }
57 }
58
59 /// Produces a vector where every element has the square root value
60 /// of the equivalently-indexed element in `self`
61 #[inline]
62 #[must_use = "method returns a new vector and does not mutate the original value"]
63 fn sqrt(self) -> Self {
64 unsafe { intrinsics::simd_fsqrt(self) }
65 }
66
67 /// Produces a vector where every element has the sine of the value
68 /// in the equivalently-indexed element in `self`.
69 #[must_use = "method returns a new vector and does not mutate the original value"]
70 fn sin(self) -> Self;
71
72 /// Produces a vector where every element has the cosine of the value
73 /// in the equivalently-indexed element in `self`.
74 #[must_use = "method returns a new vector and does not mutate the original value"]
75 fn cos(self) -> Self;
76
77 /// Produces a vector where every element has the exponential (base e) of the value
78 /// in the equivalently-indexed element in `self`.
79 #[must_use = "method returns a new vector and does not mutate the original value"]
80 fn exp(self) -> Self;
81
82 /// Produces a vector where every element has the exponential (base 2) of the value
83 /// in the equivalently-indexed element in `self`.
84 #[must_use = "method returns a new vector and does not mutate the original value"]
85 fn exp2(self) -> Self;
86
87 /// Produces a vector where every element has the natural logarithm of the value
88 /// in the equivalently-indexed element in `self`.
89 #[must_use = "method returns a new vector and does not mutate the original value"]
90 fn ln(self) -> Self;
91
92 /// Produces a vector where every element has the logarithm with respect to an arbitrary
93 /// in the equivalently-indexed elements in `self` and `base`.
94 #[inline]
95 #[must_use = "method returns a new vector and does not mutate the original value"]
96 fn log(self, base: Self) -> Self {
97 unsafe { intrinsics::simd_div(self.ln(), base.ln()) }
98 }
99
100 /// Produces a vector where every element has the base-2 logarithm of the value
101 /// in the equivalently-indexed element in `self`.
102 #[must_use = "method returns a new vector and does not mutate the original value"]
103 fn log2(self) -> Self;
104
105 /// Produces a vector where every element has the base-10 logarithm of the value
106 /// in the equivalently-indexed element in `self`.
107 #[must_use = "method returns a new vector and does not mutate the original value"]
108 fn log10(self) -> Self;
109
110 /// Returns the smallest integer greater than or equal to each element.
111 #[must_use = "method returns a new vector and does not mutate the original value"]
112 #[inline]
113 fn ceil(self) -> Self {
114 unsafe { intrinsics::simd_ceil(self) }
115 }
116
117 /// Returns the largest integer value less than or equal to each element.
118 #[must_use = "method returns a new vector and does not mutate the original value"]
119 #[inline]
120 fn floor(self) -> Self {
121 unsafe { intrinsics::simd_floor(self) }
122 }
123
124 /// Rounds to the nearest integer value. Ties round toward zero.
125 #[must_use = "method returns a new vector and does not mutate the original value"]
126 #[inline]
127 fn round(self) -> Self {
128 unsafe { intrinsics::simd_round(self) }
129 }
130
131 /// Returns the floating point's integer value, with its fractional part removed.
132 #[must_use = "method returns a new vector and does not mutate the original value"]
133 #[inline]
134 fn trunc(self) -> Self {
135 unsafe { intrinsics::simd_trunc(self) }
136 }
137
138 /// Returns the floating point's fractional value, with its integer part removed.
139 #[must_use = "method returns a new vector and does not mutate the original value"]
140 fn fract(self) -> Self;
141}
142
143impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
144impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
145
146macro_rules! impl_float {
147 {
148 $($fn:ident: $intrinsic:ident,)*
149 } => {
150 impl<const N: usize> StdFloat for Simd<f32, N>
151 where
152 LaneCount<N>: SupportedLaneCount,
153 {
154 #[inline]
155 fn fract(self) -> Self {
156 self - self.trunc()
157 }
158
159 $(
160 #[inline]
161 fn $fn(self) -> Self {
162 unsafe { intrinsics::$intrinsic(self) }
163 }
164 )*
165 }
166
167 impl<const N: usize> StdFloat for Simd<f64, N>
168 where
169 LaneCount<N>: SupportedLaneCount,
170 {
171 #[inline]
172 fn fract(self) -> Self {
173 self - self.trunc()
174 }
175
176 $(
177 #[inline]
178 fn $fn(self) -> Self {
179 // https://github.com/llvm/llvm-project/issues/83729
180 #[cfg(target_arch = "aarch64")]
181 {
182 let mut ln = Self::splat(0f64);
183 for i in 0..N {
184 ln[i] = self[i].$fn()
185 }
186 ln
187 }
188
189 #[cfg(not(target_arch = "aarch64"))]
190 {
191 unsafe { intrinsics::$intrinsic(self) }
192 }
193 }
194 )*
195 }
196 }
197}
198
199impl_float! {
200 sin: simd_fsin,
201 cos: simd_fcos,
202 exp: simd_fexp,
203 exp2: simd_fexp2,
204 ln: simd_flog,
205 log2: simd_flog2,
206 log10: simd_flog10,
207}
208