1#![allow(unknown_lints)] // FIXME(msrv) we shouldn't need this
2
3use core::{fmt, mem, ops};
4
5use super::int_traits::{CastFrom, Int, MinInt};
6
7/// Trait for some basic operations on floats
8// #[allow(dead_code)]
9#[allow(dead_code)] // Some constants are only used with tests
10pub trait Float:
11 Copy
12 + fmt::Debug
13 + PartialEq
14 + PartialOrd
15 + ops::AddAssign
16 + ops::MulAssign
17 + ops::Add<Output = Self>
18 + ops::Sub<Output = Self>
19 + ops::Mul<Output = Self>
20 + ops::Div<Output = Self>
21 + ops::Rem<Output = Self>
22 + ops::Neg<Output = Self>
23 + 'static
24{
25 /// A uint of the same width as the float
26 type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
27
28 /// A int of the same width as the float
29 type SignedInt: Int
30 + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
31 + ops::Neg<Output = Self::SignedInt>;
32
33 const ZERO: Self;
34 const NEG_ZERO: Self;
35 const ONE: Self;
36 const NEG_ONE: Self;
37 const INFINITY: Self;
38 const NEG_INFINITY: Self;
39 const NAN: Self;
40 const NEG_NAN: Self;
41 const MAX: Self;
42 const MIN: Self;
43 const EPSILON: Self;
44 const PI: Self;
45 const NEG_PI: Self;
46 const FRAC_PI_2: Self;
47
48 const MIN_POSITIVE_NORMAL: Self;
49
50 /// The bitwidth of the float type
51 const BITS: u32;
52
53 /// The bitwidth of the significand
54 const SIG_BITS: u32;
55
56 /// The bitwidth of the exponent
57 const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
58
59 /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
60 /// representation.
61 ///
62 /// This shifted fully right, use `EXP_MASK` for the shifted value.
63 const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
64
65 /// The exponent bias value
66 const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
67
68 /// Maximum unbiased exponent value.
69 const EXP_MAX: i32 = Self::EXP_BIAS as i32;
70
71 /// Minimum *NORMAL* unbiased exponent value.
72 const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
73
74 /// Minimum subnormal exponent value.
75 const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
76
77 /// A mask for the sign bit
78 const SIGN_MASK: Self::Int;
79
80 /// A mask for the significand
81 const SIG_MASK: Self::Int;
82
83 /// A mask for the exponent
84 const EXP_MASK: Self::Int;
85
86 /// The implicit bit of the float format
87 const IMPLICIT_BIT: Self::Int;
88
89 /// Returns `self` transmuted to `Self::Int`
90 fn to_bits(self) -> Self::Int;
91
92 /// Returns `self` transmuted to `Self::SignedInt`
93 #[allow(dead_code)]
94 fn to_bits_signed(self) -> Self::SignedInt {
95 self.to_bits().signed()
96 }
97
98 /// Check bitwise equality.
99 #[allow(dead_code)]
100 fn biteq(self, rhs: Self) -> bool {
101 self.to_bits() == rhs.to_bits()
102 }
103
104 /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
105 /// represented in multiple different ways.
106 ///
107 /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
108 /// if `NaN` should not be treated separately.
109 #[allow(dead_code)]
110 fn eq_repr(self, rhs: Self) -> bool {
111 if self.is_nan() && rhs.is_nan() {
112 true
113 } else {
114 self.biteq(rhs)
115 }
116 }
117
118 /// Returns true if the value is NaN.
119 fn is_nan(self) -> bool;
120
121 /// Returns true if the value is +inf or -inf.
122 fn is_infinite(self) -> bool;
123
124 /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
125 fn is_sign_negative(self) -> bool;
126
127 /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
128 fn is_sign_positive(self) -> bool {
129 !self.is_sign_negative()
130 }
131
132 /// Returns if `self` is subnormal.
133 #[allow(dead_code)]
134 fn is_subnormal(self) -> bool {
135 (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
136 }
137
138 /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
139 fn ex(self) -> u32 {
140 u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
141 }
142
143 /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
144 fn exp_unbiased(self) -> i32 {
145 self.ex().signed() - (Self::EXP_BIAS as i32)
146 }
147
148 /// Returns the significand with no implicit bit (or the "fractional" part)
149 #[allow(dead_code)]
150 fn frac(self) -> Self::Int {
151 self.to_bits() & Self::SIG_MASK
152 }
153
154 /// Returns a `Self::Int` transmuted back to `Self`
155 fn from_bits(a: Self::Int) -> Self;
156
157 /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
158 fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
159 let sign = if negative {
160 Self::Int::ONE
161 } else {
162 Self::Int::ZERO
163 };
164 Self::from_bits(
165 (sign << (Self::BITS - 1))
166 | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
167 | (significand & Self::SIG_MASK),
168 )
169 }
170
171 #[allow(dead_code)]
172 fn abs(self) -> Self;
173
174 /// Returns a number composed of the magnitude of self and the sign of sign.
175 fn copysign(self, other: Self) -> Self;
176
177 /// Fused multiply add, rounding once.
178 fn fma(self, y: Self, z: Self) -> Self;
179
180 /// Returns (normalized exponent, normalized significand)
181 #[allow(dead_code)]
182 fn normalize(significand: Self::Int) -> (i32, Self::Int);
183
184 /// Returns a number that represents the sign of self.
185 #[allow(dead_code)]
186 fn signum(self) -> Self {
187 if self.is_nan() {
188 self
189 } else {
190 Self::ONE.copysign(self)
191 }
192 }
193}
194
195/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
196pub type IntTy<F> = <F as Float>::Int;
197
198macro_rules! float_impl {
199 (
200 $ty:ident,
201 $ity:ident,
202 $sity:ident,
203 $bits:expr,
204 $significand_bits:expr,
205 $from_bits:path,
206 $to_bits:path,
207 $fma_fn:ident,
208 $fma_intrinsic:ident
209 ) => {
210 impl Float for $ty {
211 type Int = $ity;
212 type SignedInt = $sity;
213
214 const ZERO: Self = 0.0;
215 const NEG_ZERO: Self = -0.0;
216 const ONE: Self = 1.0;
217 const NEG_ONE: Self = -1.0;
218 const INFINITY: Self = Self::INFINITY;
219 const NEG_INFINITY: Self = Self::NEG_INFINITY;
220 const NAN: Self = Self::NAN;
221 // NAN isn't guaranteed to be positive but it usually is. We only use this for
222 // tests.
223 const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK);
224 const MAX: Self = -Self::MIN;
225 // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
226 const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
227 const EPSILON: Self = <$ty>::EPSILON;
228
229 // Exponent is a 1 in the LSB
230 const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
231
232 const PI: Self = core::$ty::consts::PI;
233 const NEG_PI: Self = -Self::PI;
234 const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
235
236 const BITS: u32 = $bits;
237 const SIG_BITS: u32 = $significand_bits;
238
239 const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
240 const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
241 const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
242 const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
243
244 fn to_bits(self) -> Self::Int {
245 self.to_bits()
246 }
247 fn is_nan(self) -> bool {
248 self.is_nan()
249 }
250 fn is_infinite(self) -> bool {
251 self.is_infinite()
252 }
253 fn is_sign_negative(self) -> bool {
254 self.is_sign_negative()
255 }
256 fn from_bits(a: Self::Int) -> Self {
257 Self::from_bits(a)
258 }
259 fn abs(self) -> Self {
260 cfg_if! {
261 // FIXME(msrv): `abs` is available in `core` starting with 1.85.
262 if #[cfg(intrinsics_enabled)] {
263 self.abs()
264 } else {
265 super::super::generic::fabs(self)
266 }
267 }
268 }
269 fn copysign(self, other: Self) -> Self {
270 cfg_if! {
271 // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
272 if #[cfg(intrinsics_enabled)] {
273 self.copysign(other)
274 } else {
275 super::super::generic::copysign(self, other)
276 }
277 }
278 }
279 fn fma(self, y: Self, z: Self) -> Self {
280 cfg_if! {
281 // fma is not yet available in `core`
282 if #[cfg(intrinsics_enabled)] {
283 unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
284 } else {
285 super::super::$fma_fn(self, y, z)
286 }
287 }
288 }
289 fn normalize(significand: Self::Int) -> (i32, Self::Int) {
290 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
291 (
292 1i32.wrapping_sub(shift as i32),
293 significand << shift as Self::Int,
294 )
295 }
296 }
297 };
298}
299
300#[cfg(f16_enabled)]
301float_impl!(
302 f16,
303 u16,
304 i16,
305 16,
306 10,
307 f16::from_bits,
308 f16::to_bits,
309 fmaf16,
310 fmaf16
311);
312float_impl!(
313 f32,
314 u32,
315 i32,
316 32,
317 23,
318 f32_from_bits,
319 f32_to_bits,
320 fmaf,
321 fmaf32
322);
323float_impl!(
324 f64,
325 u64,
326 i64,
327 64,
328 52,
329 f64_from_bits,
330 f64_to_bits,
331 fma,
332 fmaf64
333);
334#[cfg(f128_enabled)]
335float_impl!(
336 f128,
337 u128,
338 i128,
339 128,
340 112,
341 f128::from_bits,
342 f128::to_bits,
343 fmaf128,
344 fmaf128
345);
346
347/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
348
349/// `f32::from_bits`
350#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
351pub const fn f32_from_bits(bits: u32) -> f32 {
352 // SAFETY: POD cast with no preconditions
353 unsafe { mem::transmute::<u32, f32>(src:bits) }
354}
355
356/// `f32::to_bits`
357#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
358pub const fn f32_to_bits(x: f32) -> u32 {
359 // SAFETY: POD cast with no preconditions
360 unsafe { mem::transmute::<f32, u32>(src:x) }
361}
362
363/// `f64::from_bits`
364#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
365pub const fn f64_from_bits(bits: u64) -> f64 {
366 // SAFETY: POD cast with no preconditions
367 unsafe { mem::transmute::<u64, f64>(src:bits) }
368}
369
370/// `f64::to_bits`
371#[allow(unnecessary_transmutes)] // lint appears in newer versions of Rust
372pub const fn f64_to_bits(x: f64) -> u64 {
373 // SAFETY: POD cast with no preconditions
374 unsafe { mem::transmute::<f64, u64>(src:x) }
375}
376
377/// Trait for floats twice the bit width of another integer.
378pub trait DFloat: Float {
379 /// Float that is half the bit width of the floatthis trait is implemented for.
380 type H: HFloat<D = Self>;
381
382 /// Narrow the float type.
383 fn narrow(self) -> Self::H;
384}
385
386/// Trait for floats half the bit width of another float.
387pub trait HFloat: Float {
388 /// Float that is double the bit width of the float this trait is implemented for.
389 type D: DFloat<H = Self>;
390
391 /// Widen the float type.
392 fn widen(self) -> Self::D;
393}
394
395macro_rules! impl_d_float {
396 ($($X:ident $D:ident),*) => {
397 $(
398 impl DFloat for $D {
399 type H = $X;
400
401 fn narrow(self) -> Self::H {
402 self as $X
403 }
404 }
405 )*
406 };
407}
408
409macro_rules! impl_h_float {
410 ($($H:ident $X:ident),*) => {
411 $(
412 impl HFloat for $H {
413 type D = $X;
414
415 fn widen(self) -> Self::D {
416 self as $X
417 }
418 }
419 )*
420 };
421}
422
423impl_d_float!(f32 f64);
424#[cfg(f16_enabled)]
425impl_d_float!(f16 f32);
426#[cfg(f128_enabled)]
427impl_d_float!(f64 f128);
428
429impl_h_float!(f32 f64);
430#[cfg(f16_enabled)]
431impl_h_float!(f16 f32);
432#[cfg(f128_enabled)]
433impl_h_float!(f64 f128);
434
435#[cfg(test)]
436mod tests {
437 use super::*;
438
439 #[test]
440 #[cfg(f16_enabled)]
441 fn check_f16() {
442 // Constants
443 assert_eq!(f16::EXP_SAT, 0b11111);
444 assert_eq!(f16::EXP_BIAS, 15);
445 assert_eq!(f16::EXP_MAX, 15);
446 assert_eq!(f16::EXP_MIN, -14);
447 assert_eq!(f16::EXP_MIN_SUBNORM, -24);
448
449 // `exp_unbiased`
450 assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
451 assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
452 assert_eq!(f16::MAX.exp_unbiased(), 15);
453 assert_eq!(f16::MIN.exp_unbiased(), 15);
454 assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
455 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
456 // results for zero and subnormals.
457 assert_eq!(f16::ZERO.exp_unbiased(), -15);
458 assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
459 assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
460
461 // `from_parts`
462 assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
463 assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
464 }
465
466 #[test]
467 fn check_f32() {
468 // Constants
469 assert_eq!(f32::EXP_SAT, 0b11111111);
470 assert_eq!(f32::EXP_BIAS, 127);
471 assert_eq!(f32::EXP_MAX, 127);
472 assert_eq!(f32::EXP_MIN, -126);
473 assert_eq!(f32::EXP_MIN_SUBNORM, -149);
474
475 // `exp_unbiased`
476 assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
477 assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
478 assert_eq!(f32::MAX.exp_unbiased(), 127);
479 assert_eq!(f32::MIN.exp_unbiased(), 127);
480 assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
481 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
482 // results for zero and subnormals.
483 assert_eq!(f32::ZERO.exp_unbiased(), -127);
484 assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
485 assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
486
487 // `from_parts`
488 assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
489 assert_biteq!(
490 f32::from_parts(false, 10 + f32::EXP_BIAS, 0),
491 hf32!("0x1p10")
492 );
493 assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
494 }
495
496 #[test]
497 fn check_f64() {
498 // Constants
499 assert_eq!(f64::EXP_SAT, 0b11111111111);
500 assert_eq!(f64::EXP_BIAS, 1023);
501 assert_eq!(f64::EXP_MAX, 1023);
502 assert_eq!(f64::EXP_MIN, -1022);
503 assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
504
505 // `exp_unbiased`
506 assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
507 assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
508 assert_eq!(f64::MAX.exp_unbiased(), 1023);
509 assert_eq!(f64::MIN.exp_unbiased(), 1023);
510 assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
511 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
512 // results for zero and subnormals.
513 assert_eq!(f64::ZERO.exp_unbiased(), -1023);
514 assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
515 assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
516
517 // `from_parts`
518 assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
519 assert_biteq!(
520 f64::from_parts(false, 10 + f64::EXP_BIAS, 0),
521 hf64!("0x1p10")
522 );
523 assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
524 }
525
526 #[test]
527 #[cfg(f128_enabled)]
528 fn check_f128() {
529 // Constants
530 assert_eq!(f128::EXP_SAT, 0b111111111111111);
531 assert_eq!(f128::EXP_BIAS, 16383);
532 assert_eq!(f128::EXP_MAX, 16383);
533 assert_eq!(f128::EXP_MIN, -16382);
534 assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
535
536 // `exp_unbiased`
537 assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
538 assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
539 assert_eq!(f128::MAX.exp_unbiased(), 16383);
540 assert_eq!(f128::MIN.exp_unbiased(), 16383);
541 assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
542 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
543 // results for zero and subnormals.
544 assert_eq!(f128::ZERO.exp_unbiased(), -16383);
545 assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
546 assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
547
548 // `from_parts`
549 assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
550 assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
551 }
552}
553