1use core::{fmt, mem, ops};
2
3use super::int_traits::{CastFrom, Int, MinInt};
4
5/// Trait for some basic operations on floats
6// #[allow(dead_code)]
7pub trait Float:
8 Copy
9 + fmt::Debug
10 + PartialEq
11 + PartialOrd
12 + ops::AddAssign
13 + ops::MulAssign
14 + ops::Add<Output = Self>
15 + ops::Sub<Output = Self>
16 + ops::Mul<Output = Self>
17 + ops::Div<Output = Self>
18 + ops::Rem<Output = Self>
19 + ops::Neg<Output = Self>
20 + 'static
21{
22 /// A uint of the same width as the float
23 type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
24
25 /// A int of the same width as the float
26 type SignedInt: Int
27 + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
28 + ops::Neg<Output = Self::SignedInt>;
29
30 const ZERO: Self;
31 const NEG_ZERO: Self;
32 const ONE: Self;
33 const NEG_ONE: Self;
34 const INFINITY: Self;
35 const NEG_INFINITY: Self;
36 const NAN: Self;
37 const MAX: Self;
38 const MIN: Self;
39 const EPSILON: Self;
40 const PI: Self;
41 const NEG_PI: Self;
42 const FRAC_PI_2: Self;
43
44 const MIN_POSITIVE_NORMAL: Self;
45
46 /// The bitwidth of the float type
47 const BITS: u32;
48
49 /// The bitwidth of the significand
50 const SIG_BITS: u32;
51
52 /// The bitwidth of the exponent
53 const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
54
55 /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
56 /// representation.
57 ///
58 /// This shifted fully right, use `EXP_MASK` for the shifted value.
59 const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
60
61 /// The exponent bias value
62 const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
63
64 /// Maximum unbiased exponent value.
65 const EXP_MAX: i32 = Self::EXP_BIAS as i32;
66
67 /// Minimum *NORMAL* unbiased exponent value.
68 const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
69
70 /// Minimum subnormal exponent value.
71 const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
72
73 /// A mask for the sign bit
74 const SIGN_MASK: Self::Int;
75
76 /// A mask for the significand
77 const SIG_MASK: Self::Int;
78
79 /// A mask for the exponent
80 const EXP_MASK: Self::Int;
81
82 /// The implicit bit of the float format
83 const IMPLICIT_BIT: Self::Int;
84
85 /// Returns `self` transmuted to `Self::Int`
86 fn to_bits(self) -> Self::Int;
87
88 /// Returns `self` transmuted to `Self::SignedInt`
89 #[allow(dead_code)]
90 fn to_bits_signed(self) -> Self::SignedInt {
91 self.to_bits().signed()
92 }
93
94 /// Check bitwise equality.
95 #[allow(dead_code)]
96 fn biteq(self, rhs: Self) -> bool {
97 self.to_bits() == rhs.to_bits()
98 }
99
100 /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
101 /// represented in multiple different ways.
102 ///
103 /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
104 /// if `NaN` should not be treated separately.
105 #[allow(dead_code)]
106 fn eq_repr(self, rhs: Self) -> bool {
107 if self.is_nan() && rhs.is_nan() { true } else { self.biteq(rhs) }
108 }
109
110 /// Returns true if the value is NaN.
111 fn is_nan(self) -> bool;
112
113 /// Returns true if the value is +inf or -inf.
114 fn is_infinite(self) -> bool;
115
116 /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
117 fn is_sign_negative(self) -> bool;
118
119 /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
120 fn is_sign_positive(self) -> bool {
121 !self.is_sign_negative()
122 }
123
124 /// Returns if `self` is subnormal.
125 #[allow(dead_code)]
126 fn is_subnormal(self) -> bool {
127 (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
128 }
129
130 /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
131 fn ex(self) -> u32 {
132 u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
133 }
134
135 /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
136 fn exp_unbiased(self) -> i32 {
137 self.ex().signed() - (Self::EXP_BIAS as i32)
138 }
139
140 /// Returns the significand with no implicit bit (or the "fractional" part)
141 #[allow(dead_code)]
142 fn frac(self) -> Self::Int {
143 self.to_bits() & Self::SIG_MASK
144 }
145
146 /// Returns a `Self::Int` transmuted back to `Self`
147 fn from_bits(a: Self::Int) -> Self;
148
149 /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
150 fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
151 let sign = if negative { Self::Int::ONE } else { Self::Int::ZERO };
152 Self::from_bits(
153 (sign << (Self::BITS - 1))
154 | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
155 | (significand & Self::SIG_MASK),
156 )
157 }
158
159 #[allow(dead_code)]
160 fn abs(self) -> Self;
161
162 /// Returns a number composed of the magnitude of self and the sign of sign.
163 fn copysign(self, other: Self) -> Self;
164
165 /// Fused multiply add, rounding once.
166 fn fma(self, y: Self, z: Self) -> Self;
167
168 /// Returns (normalized exponent, normalized significand)
169 #[allow(dead_code)]
170 fn normalize(significand: Self::Int) -> (i32, Self::Int);
171
172 /// Returns a number that represents the sign of self.
173 #[allow(dead_code)]
174 fn signum(self) -> Self {
175 if self.is_nan() { self } else { Self::ONE.copysign(self) }
176 }
177}
178
179/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
180pub type IntTy<F> = <F as Float>::Int;
181
182macro_rules! float_impl {
183 (
184 $ty:ident,
185 $ity:ident,
186 $sity:ident,
187 $bits:expr,
188 $significand_bits:expr,
189 $from_bits:path,
190 $fma_fn:ident,
191 $fma_intrinsic:ident
192 ) => {
193 impl Float for $ty {
194 type Int = $ity;
195 type SignedInt = $sity;
196
197 const ZERO: Self = 0.0;
198 const NEG_ZERO: Self = -0.0;
199 const ONE: Self = 1.0;
200 const NEG_ONE: Self = -1.0;
201 const INFINITY: Self = Self::INFINITY;
202 const NEG_INFINITY: Self = Self::NEG_INFINITY;
203 const NAN: Self = Self::NAN;
204 const MAX: Self = -Self::MIN;
205 // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
206 const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
207 const EPSILON: Self = <$ty>::EPSILON;
208
209 // Exponent is a 1 in the LSB
210 const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
211
212 const PI: Self = core::$ty::consts::PI;
213 const NEG_PI: Self = -Self::PI;
214 const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
215
216 const BITS: u32 = $bits;
217 const SIG_BITS: u32 = $significand_bits;
218
219 const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
220 const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
221 const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
222 const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
223
224 fn to_bits(self) -> Self::Int {
225 self.to_bits()
226 }
227 fn is_nan(self) -> bool {
228 self.is_nan()
229 }
230 fn is_infinite(self) -> bool {
231 self.is_infinite()
232 }
233 fn is_sign_negative(self) -> bool {
234 self.is_sign_negative()
235 }
236 fn from_bits(a: Self::Int) -> Self {
237 Self::from_bits(a)
238 }
239 fn abs(self) -> Self {
240 cfg_if! {
241 // FIXME(msrv): `abs` is available in `core` starting with 1.85.
242 if #[cfg(intrinsics_enabled)] {
243 self.abs()
244 } else {
245 super::super::generic::fabs(self)
246 }
247 }
248 }
249 fn copysign(self, other: Self) -> Self {
250 cfg_if! {
251 // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
252 if #[cfg(intrinsics_enabled)] {
253 self.copysign(other)
254 } else {
255 super::super::generic::copysign(self, other)
256 }
257 }
258 }
259 fn fma(self, y: Self, z: Self) -> Self {
260 cfg_if! {
261 // fma is not yet available in `core`
262 if #[cfg(intrinsics_enabled)] {
263 unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
264 } else {
265 super::super::$fma_fn(self, y, z)
266 }
267 }
268 }
269 fn normalize(significand: Self::Int) -> (i32, Self::Int) {
270 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
271 (1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
272 }
273 }
274 };
275}
276
277#[cfg(f16_enabled)]
278float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16);
279float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32);
280float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64);
281#[cfg(f128_enabled)]
282float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128);
283
284/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
285
286/// `f32::from_bits`
287pub const fn f32_from_bits(bits: u32) -> f32 {
288 // SAFETY: POD cast with no preconditions
289 unsafe { mem::transmute::<u32, f32>(src:bits) }
290}
291
292/// `f64::from_bits`
293pub const fn f64_from_bits(bits: u64) -> f64 {
294 // SAFETY: POD cast with no preconditions
295 unsafe { mem::transmute::<u64, f64>(src:bits) }
296}
297
298/// Trait for floats twice the bit width of another integer.
299pub trait DFloat: Float {
300 /// Float that is half the bit width of the floatthis trait is implemented for.
301 type H: HFloat<D = Self>;
302
303 /// Narrow the float type.
304 fn narrow(self) -> Self::H;
305}
306
307/// Trait for floats half the bit width of another float.
308pub trait HFloat: Float {
309 /// Float that is double the bit width of the float this trait is implemented for.
310 type D: DFloat<H = Self>;
311
312 /// Widen the float type.
313 fn widen(self) -> Self::D;
314}
315
316macro_rules! impl_d_float {
317 ($($X:ident $D:ident),*) => {
318 $(
319 impl DFloat for $D {
320 type H = $X;
321
322 fn narrow(self) -> Self::H {
323 self as $X
324 }
325 }
326 )*
327 };
328}
329
330macro_rules! impl_h_float {
331 ($($H:ident $X:ident),*) => {
332 $(
333 impl HFloat for $H {
334 type D = $X;
335
336 fn widen(self) -> Self::D {
337 self as $X
338 }
339 }
340 )*
341 };
342}
343
344impl_d_float!(f32 f64);
345#[cfg(f16_enabled)]
346impl_d_float!(f16 f32);
347#[cfg(f128_enabled)]
348impl_d_float!(f64 f128);
349
350impl_h_float!(f32 f64);
351#[cfg(f16_enabled)]
352impl_h_float!(f16 f32);
353#[cfg(f128_enabled)]
354impl_h_float!(f64 f128);
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359
360 #[test]
361 #[cfg(f16_enabled)]
362 fn check_f16() {
363 // Constants
364 assert_eq!(f16::EXP_SAT, 0b11111);
365 assert_eq!(f16::EXP_BIAS, 15);
366 assert_eq!(f16::EXP_MAX, 15);
367 assert_eq!(f16::EXP_MIN, -14);
368 assert_eq!(f16::EXP_MIN_SUBNORM, -24);
369
370 // `exp_unbiased`
371 assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
372 assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
373 assert_eq!(f16::MAX.exp_unbiased(), 15);
374 assert_eq!(f16::MIN.exp_unbiased(), 15);
375 assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
376 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
377 // results for zero and subnormals.
378 assert_eq!(f16::ZERO.exp_unbiased(), -15);
379 assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
380 assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
381
382 // `from_parts`
383 assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
384 assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
385 }
386
387 #[test]
388 fn check_f32() {
389 // Constants
390 assert_eq!(f32::EXP_SAT, 0b11111111);
391 assert_eq!(f32::EXP_BIAS, 127);
392 assert_eq!(f32::EXP_MAX, 127);
393 assert_eq!(f32::EXP_MIN, -126);
394 assert_eq!(f32::EXP_MIN_SUBNORM, -149);
395
396 // `exp_unbiased`
397 assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
398 assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
399 assert_eq!(f32::MAX.exp_unbiased(), 127);
400 assert_eq!(f32::MIN.exp_unbiased(), 127);
401 assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
402 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
403 // results for zero and subnormals.
404 assert_eq!(f32::ZERO.exp_unbiased(), -127);
405 assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
406 assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
407
408 // `from_parts`
409 assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
410 assert_biteq!(f32::from_parts(false, 10 + f32::EXP_BIAS, 0), hf32!("0x1p10"));
411 assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
412 }
413
414 #[test]
415 fn check_f64() {
416 // Constants
417 assert_eq!(f64::EXP_SAT, 0b11111111111);
418 assert_eq!(f64::EXP_BIAS, 1023);
419 assert_eq!(f64::EXP_MAX, 1023);
420 assert_eq!(f64::EXP_MIN, -1022);
421 assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
422
423 // `exp_unbiased`
424 assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
425 assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
426 assert_eq!(f64::MAX.exp_unbiased(), 1023);
427 assert_eq!(f64::MIN.exp_unbiased(), 1023);
428 assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
429 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
430 // results for zero and subnormals.
431 assert_eq!(f64::ZERO.exp_unbiased(), -1023);
432 assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
433 assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
434
435 // `from_parts`
436 assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
437 assert_biteq!(f64::from_parts(false, 10 + f64::EXP_BIAS, 0), hf64!("0x1p10"));
438 assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
439 }
440
441 #[test]
442 #[cfg(f128_enabled)]
443 fn check_f128() {
444 // Constants
445 assert_eq!(f128::EXP_SAT, 0b111111111111111);
446 assert_eq!(f128::EXP_BIAS, 16383);
447 assert_eq!(f128::EXP_MAX, 16383);
448 assert_eq!(f128::EXP_MIN, -16382);
449 assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
450
451 // `exp_unbiased`
452 assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
453 assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
454 assert_eq!(f128::MAX.exp_unbiased(), 16383);
455 assert_eq!(f128::MIN.exp_unbiased(), 16383);
456 assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
457 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
458 // results for zero and subnormals.
459 assert_eq!(f128::ZERO.exp_unbiased(), -16383);
460 assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
461 assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
462
463 // `from_parts`
464 assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
465 assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
466 }
467}
468