1 | #![cfg_attr (feature = "as_crate" , no_std)] // We are std! |
2 | #![cfg_attr ( |
3 | feature = "as_crate" , |
4 | feature(platform_intrinsics), |
5 | feature(portable_simd), |
6 | allow(internal_features) |
7 | )] |
8 | #[cfg (not(feature = "as_crate" ))] |
9 | use core::simd; |
10 | #[cfg (feature = "as_crate" )] |
11 | use core_simd::simd; |
12 | |
13 | use simd::{LaneCount, Simd, SupportedLaneCount}; |
14 | |
15 | #[cfg (feature = "as_crate" )] |
16 | mod experimental { |
17 | pub trait Sealed {} |
18 | } |
19 | |
20 | #[cfg (feature = "as_crate" )] |
21 | use experimental as sealed; |
22 | |
23 | use crate::sealed::Sealed; |
24 | |
25 | // "platform intrinsics" are essentially "codegen intrinsics" |
26 | // each of these may be scalarized and lowered to a libm call |
27 | extern "platform-intrinsic" { |
28 | // ceil |
29 | fn simd_ceil<T>(x: T) -> T; |
30 | |
31 | // floor |
32 | fn simd_floor<T>(x: T) -> T; |
33 | |
34 | // round |
35 | fn simd_round<T>(x: T) -> T; |
36 | |
37 | // trunc |
38 | fn simd_trunc<T>(x: T) -> T; |
39 | |
40 | // fsqrt |
41 | fn simd_fsqrt<T>(x: T) -> T; |
42 | |
43 | // fma |
44 | fn simd_fma<T>(x: T, y: T, z: T) -> T; |
45 | } |
46 | |
47 | /// This trait provides a possibly-temporary implementation of float functions |
48 | /// that may, in the absence of hardware support, canonicalize to calling an |
49 | /// operating system's `math.h` dynamically-loaded library (also known as a |
50 | /// shared object). As these conditionally require runtime support, they |
51 | /// should only appear in binaries built assuming OS support: `std`. |
52 | /// |
53 | /// However, there is no reason SIMD types, in general, need OS support, |
54 | /// as for many architectures an embedded binary may simply configure that |
55 | /// support itself. This means these types must be visible in `core` |
56 | /// but have these functions available in `std`. |
57 | /// |
58 | /// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but |
59 | /// due to compiler limitations, it is harder to implement this approach for |
60 | /// abstract data types like [`Simd`]. From that need, this trait is born. |
61 | /// |
62 | /// It is possible this trait will be replaced in some manner in the future, |
63 | /// when either the compiler or its supporting runtime functions are improved. |
64 | /// For now this trait is available to permit experimentation with SIMD float |
65 | /// operations that may lack hardware support, such as `mul_add`. |
66 | pub trait StdFloat: Sealed + Sized { |
67 | /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, |
68 | /// yielding a more accurate result than an unfused multiply-add. |
69 | /// |
70 | /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target |
71 | /// architecture has a dedicated `fma` CPU instruction. However, this is not always |
72 | /// true, and will be heavily dependent on designing algorithms with specific target |
73 | /// hardware in mind. |
74 | #[inline ] |
75 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
76 | fn mul_add(self, a: Self, b: Self) -> Self { |
77 | unsafe { simd_fma(self, a, b) } |
78 | } |
79 | |
80 | /// Produces a vector where every lane has the square root value |
81 | /// of the equivalently-indexed lane in `self` |
82 | #[inline ] |
83 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
84 | fn sqrt(self) -> Self { |
85 | unsafe { simd_fsqrt(self) } |
86 | } |
87 | |
88 | /// Returns the smallest integer greater than or equal to each lane. |
89 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
90 | #[inline ] |
91 | fn ceil(self) -> Self { |
92 | unsafe { simd_ceil(self) } |
93 | } |
94 | |
95 | /// Returns the largest integer value less than or equal to each lane. |
96 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
97 | #[inline ] |
98 | fn floor(self) -> Self { |
99 | unsafe { simd_floor(self) } |
100 | } |
101 | |
102 | /// Rounds to the nearest integer value. Ties round toward zero. |
103 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
104 | #[inline ] |
105 | fn round(self) -> Self { |
106 | unsafe { simd_round(self) } |
107 | } |
108 | |
109 | /// Returns the floating point's integer value, with its fractional part removed. |
110 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
111 | #[inline ] |
112 | fn trunc(self) -> Self { |
113 | unsafe { simd_trunc(self) } |
114 | } |
115 | |
116 | /// Returns the floating point's fractional value, with its integer part removed. |
117 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
118 | fn fract(self) -> Self; |
119 | } |
120 | |
121 | impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {} |
122 | impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {} |
123 | |
124 | // We can safely just use all the defaults. |
125 | impl<const N: usize> StdFloat for Simd<f32, N> |
126 | where |
127 | LaneCount<N>: SupportedLaneCount, |
128 | { |
129 | /// Returns the floating point's fractional value, with its integer part removed. |
130 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
131 | #[inline ] |
132 | fn fract(self) -> Self { |
133 | self - self.trunc() |
134 | } |
135 | } |
136 | |
137 | impl<const N: usize> StdFloat for Simd<f64, N> |
138 | where |
139 | LaneCount<N>: SupportedLaneCount, |
140 | { |
141 | /// Returns the floating point's fractional value, with its integer part removed. |
142 | #[must_use = "method returns a new vector and does not mutate the original value" ] |
143 | #[inline ] |
144 | fn fract(self) -> Self { |
145 | self - self.trunc() |
146 | } |
147 | } |
148 | |
149 | #[cfg (test)] |
150 | mod tests { |
151 | use super::*; |
152 | use simd::prelude::*; |
153 | |
154 | #[test ] |
155 | fn everything_works() { |
156 | let x = f32x4::from_array([0.1, 0.5, 0.6, -1.5]); |
157 | let x2 = x + x; |
158 | let _xc = x.ceil(); |
159 | let _xf = x.floor(); |
160 | let _xr = x.round(); |
161 | let _xt = x.trunc(); |
162 | let _xfma = x.mul_add(x, x); |
163 | let _xsqrt = x.sqrt(); |
164 | let _ = x2.abs() * x2; |
165 | } |
166 | } |
167 | |