float.rs - Codebrowser

1	// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3	// FLOAT TYPE
4
5	use super::num::*;
6	use super::rounding::*;
7	use super::shift::*;
8
9	/// Extended precision floating-point type.
10	///
11	/// Private implementation, exposed only for testing purposes.
12	#[doc(hidden)]
13	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
14	pub(crate) struct ExtendedFloat {
15	/// Mantissa for the extended-precision float.
16	pub mant: u64,
17	/// Binary exponent for the extended-precision float.
18	pub exp: i32,
19	}
20
21	impl ExtendedFloat {
22	// PROPERTIES
23
24	// OPERATIONS
25
26	/// Multiply two normalized extended-precision floats, as if by `ab`.*
27	///
28	/// The precision is maximal when the numbers are normalized, however,
29	/// decent precision will occur as long as both values have high bits
30	/// set. The result is not normalized.
31	///
32	/// Algorithm:
33	/// 1. Non-signed multiplication of mantissas (requires 2x as many bits as input).
34	/// 2. Normalization of the result (not done here).
35	/// 3. Addition of exponents.
36	pub(crate) fn mul(&self, b: &ExtendedFloat) -> ExtendedFloat {
37	// Logic check, values must be decently normalized prior to multiplication.
38	debug_assert!((self.mant & u64::HIMASK != `0`) && (b.mant & u64::HIMASK != `0`));
39
40	// Extract high-and-low masks.
41	let ah = self.mant >> u64::HALF;
42	let al = self.mant & u64::LOMASK;
43	let bh = b.mant >> u64::HALF;
44	let bl = b.mant & u64::LOMASK;
45
46	// Get our products
47	let ah_bl = ah * bl;
48	let al_bh = al * bh;
49	let al_bl = al * bl;
50	let ah_bh = ah * bh;
51
52	let mut tmp = (ah_bl & u64::LOMASK) + (al_bh & u64::LOMASK) + (al_bl >> u64::HALF);
53	// round up
54	tmp += `1` << (u64::HALF - `1`);
55
56	ExtendedFloat {
57	mant: ah_bh + (ah_bl >> u64::HALF) + (al_bh >> u64::HALF) + (tmp >> u64::HALF),
58	exp: self.exp + b.exp + u64::FULL,
59	}
60	}
61
62	/// Multiply in-place, as if by `ab`.*
63	///
64	/// The result is not normalized.
65	#[inline]
66	pub(crate) fn imul(&mut self, b: &ExtendedFloat) {
67	*self = self.mul(b);
68	}
69
70	// NORMALIZE
71
72	/// Normalize float-point number.
73	///
74	/// Shift the mantissa so the number of leading zeros is 0, or the value
75	/// itself is 0.
76	///
77	/// Get the number of bytes shifted.
78	#[inline]
79	pub(crate) fn normalize(&mut self) -> u32 {
80	// Note:
81	// Using the cltz intrinsic via leading_zeros is way faster (~10x)
82	// than shifting 1-bit at a time, via while loop, and also way
83	// faster (~2x) than an unrolled loop that checks at 32, 16, 4,
84	// 2, and 1 bit.
85	//
86	// Using a modulus of pow2 (which will get optimized to a bitwise
87	// and with 0x3F or faster) is slightly slower than an if/then,
88	// however, removing the if/then will likely optimize more branched
89	// code as it removes conditional logic.
90
91	// Calculate the number of leading zeros, and then zero-out
92	// any overflowing bits, to avoid shl overflow when self.mant == 0.
93	let shift = if self.mant == `0` {
94	`0`
95	} else {
96	self.mant.leading_zeros()
97	};
98	shl(self, shift as i32);
99	shift
100	}
101
102	// ROUND
103
104	/// Lossy round float-point number to native mantissa boundaries.
105	#[inline]
106	pub(crate) fn round_to_native<F, Algorithm>(&mut self, algorithm: Algorithm)
107	where
108	F: Float,
109	Algorithm: FnOnce(&mut ExtendedFloat, i32),
110	{
111	round_to_native::<F, _>(self, algorithm);
112	}
113
114	// FROM
115
116	/// Create extended float from native float.
117	#[inline]
118	pub fn from_float<F: Float>(f: F) -> ExtendedFloat {
119	from_float(f)
120	}
121
122	// INTO
123
124	/// Convert into default-rounded, lower-precision native float.
125	#[inline]
126	pub(crate) fn into_float<F: Float>(mut self) -> F {
127	self.round_to_native::<F, _>(round_nearest_tie_even);
128	into_float(self)
129	}
130
131	/// Convert into downward-rounded, lower-precision native float.
132	#[inline]
133	pub(crate) fn into_downward_float<F: Float>(mut self) -> F {
134	self.round_to_native::<F, _>(round_downward);
135	into_float(self)
136	}
137	}
138
139	// FROM FLOAT
140
141	// Import ExtendedFloat from native float.
142	#[inline]
143	pub(crate) fn from_float<F>(f: F) -> ExtendedFloat
144	where
145	F: Float,
146	{
147	ExtendedFloat {
148	mant: u64::as_cast(f.mantissa()),
149	exp: f.exponent(),
150	}
151	}
152
153	// INTO FLOAT
154
155	// Export extended-precision float to native float.
156	//
157	// The extended-precision float must be in native float representation,
158	// with overflow/underflow appropriately handled.
159	#[inline]
160	pub(crate) fn into_float<F>(fp: ExtendedFloat) -> F
161	where
162	F: Float,
163	{
164	// Export floating-point number.
165	if fp.mant == `0` \|\| fp.exp < F::DENORMAL_EXPONENT {
166	// sub-denormal, underflow
167	F::ZERO
168	} else if fp.exp >= F::MAX_EXPONENT {
169	// overflow
170	F::from_bits(F::INFINITY_BITS)
171	} else {
172	// calculate the exp and fraction bits, and return a float from bits.
173	let exp: u64;
174	if (fp.exp == F::DENORMAL_EXPONENT) && (fp.mant & F::HIDDEN_BIT_MASK.as_u64()) == `0` {
175	exp = `0`;
176	} else {
177	exp = (fp.exp + F::EXPONENT_BIAS) as u64;
178	}
179	let exp = exp << F::MANTISSA_SIZE;
180	let mant = fp.mant & F::MANTISSA_MASK.as_u64();
181	F::from_bits(F::Unsigned::as_cast(mant \| exp))
182	}
183	}
184