1use core::{fmt, mem, ops};
2
3use super::int_traits::{CastFrom, Int, MinInt};
4
5/// Trait for some basic operations on floats
6// #[allow(dead_code)]
7pub trait Float:
8 Copy
9 + fmt::Debug
10 + PartialEq
11 + PartialOrd
12 + ops::AddAssign
13 + ops::MulAssign
14 + ops::Add<Output = Self>
15 + ops::Sub<Output = Self>
16 + ops::Mul<Output = Self>
17 + ops::Div<Output = Self>
18 + ops::Rem<Output = Self>
19 + ops::Neg<Output = Self>
20 + 'static
21{
22 /// A uint of the same width as the float
23 type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
24
25 /// A int of the same width as the float
26 type SignedInt: Int
27 + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>
28 + ops::Neg<Output = Self::SignedInt>;
29
30 const ZERO: Self;
31 const NEG_ZERO: Self;
32 const ONE: Self;
33 const NEG_ONE: Self;
34 const INFINITY: Self;
35 const NEG_INFINITY: Self;
36 const NAN: Self;
37 const NEG_NAN: Self;
38 const MAX: Self;
39 const MIN: Self;
40 const EPSILON: Self;
41 const PI: Self;
42 const NEG_PI: Self;
43 const FRAC_PI_2: Self;
44
45 const MIN_POSITIVE_NORMAL: Self;
46
47 /// The bitwidth of the float type
48 const BITS: u32;
49
50 /// The bitwidth of the significand
51 const SIG_BITS: u32;
52
53 /// The bitwidth of the exponent
54 const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
55
56 /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
57 /// representation.
58 ///
59 /// This shifted fully right, use `EXP_MASK` for the shifted value.
60 const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
61
62 /// The exponent bias value
63 const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
64
65 /// Maximum unbiased exponent value.
66 const EXP_MAX: i32 = Self::EXP_BIAS as i32;
67
68 /// Minimum *NORMAL* unbiased exponent value.
69 const EXP_MIN: i32 = -(Self::EXP_MAX - 1);
70
71 /// Minimum subnormal exponent value.
72 const EXP_MIN_SUBNORM: i32 = Self::EXP_MIN - Self::SIG_BITS as i32;
73
74 /// A mask for the sign bit
75 const SIGN_MASK: Self::Int;
76
77 /// A mask for the significand
78 const SIG_MASK: Self::Int;
79
80 /// A mask for the exponent
81 const EXP_MASK: Self::Int;
82
83 /// The implicit bit of the float format
84 const IMPLICIT_BIT: Self::Int;
85
86 /// Returns `self` transmuted to `Self::Int`
87 fn to_bits(self) -> Self::Int;
88
89 /// Returns `self` transmuted to `Self::SignedInt`
90 #[allow(dead_code)]
91 fn to_bits_signed(self) -> Self::SignedInt {
92 self.to_bits().signed()
93 }
94
95 /// Check bitwise equality.
96 #[allow(dead_code)]
97 fn biteq(self, rhs: Self) -> bool {
98 self.to_bits() == rhs.to_bits()
99 }
100
101 /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
102 /// represented in multiple different ways.
103 ///
104 /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
105 /// if `NaN` should not be treated separately.
106 #[allow(dead_code)]
107 fn eq_repr(self, rhs: Self) -> bool {
108 if self.is_nan() && rhs.is_nan() {
109 true
110 } else {
111 self.biteq(rhs)
112 }
113 }
114
115 /// Returns true if the value is NaN.
116 fn is_nan(self) -> bool;
117
118 /// Returns true if the value is +inf or -inf.
119 fn is_infinite(self) -> bool;
120
121 /// Returns true if the sign is negative. Extracts the sign bit regardless of zero or NaN.
122 fn is_sign_negative(self) -> bool;
123
124 /// Returns true if the sign is positive. Extracts the sign bit regardless of zero or NaN.
125 fn is_sign_positive(self) -> bool {
126 !self.is_sign_negative()
127 }
128
129 /// Returns if `self` is subnormal.
130 #[allow(dead_code)]
131 fn is_subnormal(self) -> bool {
132 (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
133 }
134
135 /// Returns the exponent, not adjusting for bias, not accounting for subnormals or zero.
136 fn ex(self) -> u32 {
137 u32::cast_from(self.to_bits() >> Self::SIG_BITS) & Self::EXP_SAT
138 }
139
140 /// Extract the exponent and adjust it for bias, not accounting for subnormals or zero.
141 fn exp_unbiased(self) -> i32 {
142 self.ex().signed() - (Self::EXP_BIAS as i32)
143 }
144
145 /// Returns the significand with no implicit bit (or the "fractional" part)
146 #[allow(dead_code)]
147 fn frac(self) -> Self::Int {
148 self.to_bits() & Self::SIG_MASK
149 }
150
151 /// Returns a `Self::Int` transmuted back to `Self`
152 fn from_bits(a: Self::Int) -> Self;
153
154 /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
155 fn from_parts(negative: bool, exponent: u32, significand: Self::Int) -> Self {
156 let sign = if negative {
157 Self::Int::ONE
158 } else {
159 Self::Int::ZERO
160 };
161 Self::from_bits(
162 (sign << (Self::BITS - 1))
163 | (Self::Int::cast_from(exponent & Self::EXP_SAT) << Self::SIG_BITS)
164 | (significand & Self::SIG_MASK),
165 )
166 }
167
168 #[allow(dead_code)]
169 fn abs(self) -> Self;
170
171 /// Returns a number composed of the magnitude of self and the sign of sign.
172 fn copysign(self, other: Self) -> Self;
173
174 /// Fused multiply add, rounding once.
175 fn fma(self, y: Self, z: Self) -> Self;
176
177 /// Returns (normalized exponent, normalized significand)
178 #[allow(dead_code)]
179 fn normalize(significand: Self::Int) -> (i32, Self::Int);
180
181 /// Returns a number that represents the sign of self.
182 #[allow(dead_code)]
183 fn signum(self) -> Self {
184 if self.is_nan() {
185 self
186 } else {
187 Self::ONE.copysign(self)
188 }
189 }
190}
191
192/// Access the associated `Int` type from a float (helper to avoid ambiguous associated types).
193pub type IntTy<F> = <F as Float>::Int;
194
195macro_rules! float_impl {
196 (
197 $ty:ident,
198 $ity:ident,
199 $sity:ident,
200 $bits:expr,
201 $significand_bits:expr,
202 $from_bits:path,
203 $to_bits:path,
204 $fma_fn:ident,
205 $fma_intrinsic:ident
206 ) => {
207 impl Float for $ty {
208 type Int = $ity;
209 type SignedInt = $sity;
210
211 const ZERO: Self = 0.0;
212 const NEG_ZERO: Self = -0.0;
213 const ONE: Self = 1.0;
214 const NEG_ONE: Self = -1.0;
215 const INFINITY: Self = Self::INFINITY;
216 const NEG_INFINITY: Self = Self::NEG_INFINITY;
217 const NAN: Self = Self::NAN;
218 // NAN isn't guaranteed to be positive but it usually is. We only use this for
219 // tests.
220 const NEG_NAN: Self = $from_bits($to_bits(Self::NAN) | Self::SIGN_MASK);
221 const MAX: Self = -Self::MIN;
222 // Sign bit set, saturated mantissa, saturated exponent with last bit zeroed
223 const MIN: Self = $from_bits(Self::Int::MAX & !(1 << Self::SIG_BITS));
224 const EPSILON: Self = <$ty>::EPSILON;
225
226 // Exponent is a 1 in the LSB
227 const MIN_POSITIVE_NORMAL: Self = $from_bits(1 << Self::SIG_BITS);
228
229 const PI: Self = core::$ty::consts::PI;
230 const NEG_PI: Self = -Self::PI;
231 const FRAC_PI_2: Self = core::$ty::consts::FRAC_PI_2;
232
233 const BITS: u32 = $bits;
234 const SIG_BITS: u32 = $significand_bits;
235
236 const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
237 const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
238 const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
239 const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
240
241 fn to_bits(self) -> Self::Int {
242 self.to_bits()
243 }
244 fn is_nan(self) -> bool {
245 self.is_nan()
246 }
247 fn is_infinite(self) -> bool {
248 self.is_infinite()
249 }
250 fn is_sign_negative(self) -> bool {
251 self.is_sign_negative()
252 }
253 fn from_bits(a: Self::Int) -> Self {
254 Self::from_bits(a)
255 }
256 fn abs(self) -> Self {
257 cfg_if! {
258 // FIXME(msrv): `abs` is available in `core` starting with 1.85.
259 if #[cfg(intrinsics_enabled)] {
260 self.abs()
261 } else {
262 super::super::generic::fabs(self)
263 }
264 }
265 }
266 fn copysign(self, other: Self) -> Self {
267 cfg_if! {
268 // FIXME(msrv): `copysign` is available in `core` starting with 1.85.
269 if #[cfg(intrinsics_enabled)] {
270 self.copysign(other)
271 } else {
272 super::super::generic::copysign(self, other)
273 }
274 }
275 }
276 fn fma(self, y: Self, z: Self) -> Self {
277 cfg_if! {
278 // fma is not yet available in `core`
279 if #[cfg(intrinsics_enabled)] {
280 unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
281 } else {
282 super::super::$fma_fn(self, y, z)
283 }
284 }
285 }
286 fn normalize(significand: Self::Int) -> (i32, Self::Int) {
287 let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
288 (
289 1i32.wrapping_sub(shift as i32),
290 significand << shift as Self::Int,
291 )
292 }
293 }
294 };
295}
296
297#[cfg(f16_enabled)]
298float_impl!(
299 f16,
300 u16,
301 i16,
302 16,
303 10,
304 f16::from_bits,
305 f16::to_bits,
306 fmaf16,
307 fmaf16
308);
309float_impl!(
310 f32,
311 u32,
312 i32,
313 32,
314 23,
315 f32_from_bits,
316 f32_to_bits,
317 fmaf,
318 fmaf32
319);
320float_impl!(
321 f64,
322 u64,
323 i64,
324 64,
325 52,
326 f64_from_bits,
327 f64_to_bits,
328 fma,
329 fmaf64
330);
331#[cfg(f128_enabled)]
332float_impl!(
333 f128,
334 u128,
335 i128,
336 128,
337 112,
338 f128::from_bits,
339 f128::to_bits,
340 fmaf128,
341 fmaf128
342);
343
344/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
345
346/// `f32::from_bits`
347pub const fn f32_from_bits(bits: u32) -> f32 {
348 // SAFETY: POD cast with no preconditions
349 unsafe { mem::transmute::<u32, f32>(src:bits) }
350}
351
352/// `f32::to_bits`
353pub const fn f32_to_bits(x: f32) -> u32 {
354 // SAFETY: POD cast with no preconditions
355 unsafe { mem::transmute::<f32, u32>(src:x) }
356}
357
358/// `f64::from_bits`
359pub const fn f64_from_bits(bits: u64) -> f64 {
360 // SAFETY: POD cast with no preconditions
361 unsafe { mem::transmute::<u64, f64>(src:bits) }
362}
363
364/// `f64::to_bits`
365pub const fn f64_to_bits(x: f64) -> u64 {
366 // SAFETY: POD cast with no preconditions
367 unsafe { mem::transmute::<f64, u64>(src:x) }
368}
369
370/// Trait for floats twice the bit width of another integer.
371pub trait DFloat: Float {
372 /// Float that is half the bit width of the floatthis trait is implemented for.
373 type H: HFloat<D = Self>;
374
375 /// Narrow the float type.
376 fn narrow(self) -> Self::H;
377}
378
379/// Trait for floats half the bit width of another float.
380pub trait HFloat: Float {
381 /// Float that is double the bit width of the float this trait is implemented for.
382 type D: DFloat<H = Self>;
383
384 /// Widen the float type.
385 fn widen(self) -> Self::D;
386}
387
388macro_rules! impl_d_float {
389 ($($X:ident $D:ident),*) => {
390 $(
391 impl DFloat for $D {
392 type H = $X;
393
394 fn narrow(self) -> Self::H {
395 self as $X
396 }
397 }
398 )*
399 };
400}
401
402macro_rules! impl_h_float {
403 ($($H:ident $X:ident),*) => {
404 $(
405 impl HFloat for $H {
406 type D = $X;
407
408 fn widen(self) -> Self::D {
409 self as $X
410 }
411 }
412 )*
413 };
414}
415
416impl_d_float!(f32 f64);
417#[cfg(f16_enabled)]
418impl_d_float!(f16 f32);
419#[cfg(f128_enabled)]
420impl_d_float!(f64 f128);
421
422impl_h_float!(f32 f64);
423#[cfg(f16_enabled)]
424impl_h_float!(f16 f32);
425#[cfg(f128_enabled)]
426impl_h_float!(f64 f128);
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 #[test]
433 #[cfg(f16_enabled)]
434 fn check_f16() {
435 // Constants
436 assert_eq!(f16::EXP_SAT, 0b11111);
437 assert_eq!(f16::EXP_BIAS, 15);
438 assert_eq!(f16::EXP_MAX, 15);
439 assert_eq!(f16::EXP_MIN, -14);
440 assert_eq!(f16::EXP_MIN_SUBNORM, -24);
441
442 // `exp_unbiased`
443 assert_eq!(f16::FRAC_PI_2.exp_unbiased(), 0);
444 assert_eq!((1.0f16 / 2.0).exp_unbiased(), -1);
445 assert_eq!(f16::MAX.exp_unbiased(), 15);
446 assert_eq!(f16::MIN.exp_unbiased(), 15);
447 assert_eq!(f16::MIN_POSITIVE.exp_unbiased(), -14);
448 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
449 // results for zero and subnormals.
450 assert_eq!(f16::ZERO.exp_unbiased(), -15);
451 assert_eq!(f16::from_bits(0x1).exp_unbiased(), -15);
452 assert_eq!(f16::MIN_POSITIVE, f16::MIN_POSITIVE_NORMAL);
453
454 // `from_parts`
455 assert_biteq!(f16::from_parts(true, f16::EXP_BIAS, 0), -1.0f16);
456 assert_biteq!(f16::from_parts(false, 0, 1), f16::from_bits(0x1));
457 }
458
459 #[test]
460 fn check_f32() {
461 // Constants
462 assert_eq!(f32::EXP_SAT, 0b11111111);
463 assert_eq!(f32::EXP_BIAS, 127);
464 assert_eq!(f32::EXP_MAX, 127);
465 assert_eq!(f32::EXP_MIN, -126);
466 assert_eq!(f32::EXP_MIN_SUBNORM, -149);
467
468 // `exp_unbiased`
469 assert_eq!(f32::FRAC_PI_2.exp_unbiased(), 0);
470 assert_eq!((1.0f32 / 2.0).exp_unbiased(), -1);
471 assert_eq!(f32::MAX.exp_unbiased(), 127);
472 assert_eq!(f32::MIN.exp_unbiased(), 127);
473 assert_eq!(f32::MIN_POSITIVE.exp_unbiased(), -126);
474 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
475 // results for zero and subnormals.
476 assert_eq!(f32::ZERO.exp_unbiased(), -127);
477 assert_eq!(f32::from_bits(0x1).exp_unbiased(), -127);
478 assert_eq!(f32::MIN_POSITIVE, f32::MIN_POSITIVE_NORMAL);
479
480 // `from_parts`
481 assert_biteq!(f32::from_parts(true, f32::EXP_BIAS, 0), -1.0f32);
482 assert_biteq!(
483 f32::from_parts(false, 10 + f32::EXP_BIAS, 0),
484 hf32!("0x1p10")
485 );
486 assert_biteq!(f32::from_parts(false, 0, 1), f32::from_bits(0x1));
487 }
488
489 #[test]
490 fn check_f64() {
491 // Constants
492 assert_eq!(f64::EXP_SAT, 0b11111111111);
493 assert_eq!(f64::EXP_BIAS, 1023);
494 assert_eq!(f64::EXP_MAX, 1023);
495 assert_eq!(f64::EXP_MIN, -1022);
496 assert_eq!(f64::EXP_MIN_SUBNORM, -1074);
497
498 // `exp_unbiased`
499 assert_eq!(f64::FRAC_PI_2.exp_unbiased(), 0);
500 assert_eq!((1.0f64 / 2.0).exp_unbiased(), -1);
501 assert_eq!(f64::MAX.exp_unbiased(), 1023);
502 assert_eq!(f64::MIN.exp_unbiased(), 1023);
503 assert_eq!(f64::MIN_POSITIVE.exp_unbiased(), -1022);
504 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
505 // results for zero and subnormals.
506 assert_eq!(f64::ZERO.exp_unbiased(), -1023);
507 assert_eq!(f64::from_bits(0x1).exp_unbiased(), -1023);
508 assert_eq!(f64::MIN_POSITIVE, f64::MIN_POSITIVE_NORMAL);
509
510 // `from_parts`
511 assert_biteq!(f64::from_parts(true, f64::EXP_BIAS, 0), -1.0f64);
512 assert_biteq!(
513 f64::from_parts(false, 10 + f64::EXP_BIAS, 0),
514 hf64!("0x1p10")
515 );
516 assert_biteq!(f64::from_parts(false, 0, 1), f64::from_bits(0x1));
517 }
518
519 #[test]
520 #[cfg(f128_enabled)]
521 fn check_f128() {
522 // Constants
523 assert_eq!(f128::EXP_SAT, 0b111111111111111);
524 assert_eq!(f128::EXP_BIAS, 16383);
525 assert_eq!(f128::EXP_MAX, 16383);
526 assert_eq!(f128::EXP_MIN, -16382);
527 assert_eq!(f128::EXP_MIN_SUBNORM, -16494);
528
529 // `exp_unbiased`
530 assert_eq!(f128::FRAC_PI_2.exp_unbiased(), 0);
531 assert_eq!((1.0f128 / 2.0).exp_unbiased(), -1);
532 assert_eq!(f128::MAX.exp_unbiased(), 16383);
533 assert_eq!(f128::MIN.exp_unbiased(), 16383);
534 assert_eq!(f128::MIN_POSITIVE.exp_unbiased(), -16382);
535 // This is a convenience method and not ldexp, `exp_unbiased` does not return correct
536 // results for zero and subnormals.
537 assert_eq!(f128::ZERO.exp_unbiased(), -16383);
538 assert_eq!(f128::from_bits(0x1).exp_unbiased(), -16383);
539 assert_eq!(f128::MIN_POSITIVE, f128::MIN_POSITIVE_NORMAL);
540
541 // `from_parts`
542 assert_biteq!(f128::from_parts(true, f128::EXP_BIAS, 0), -1.0f128);
543 assert_biteq!(f128::from_parts(false, 0, 1), f128::from_bits(0x1));
544 }
545}
546