1 | use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}; |
2 | use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}; |
3 | |
4 | use core::cmp::{self, Ordering}; |
5 | use core::convert::TryFrom; |
6 | use core::fmt::{self, Write}; |
7 | use core::marker::PhantomData; |
8 | use core::mem; |
9 | use core::ops::Neg; |
10 | |
11 | #[must_use ] |
12 | pub struct IeeeFloat<S> { |
13 | /// Absolute significand value (including the integer bit). |
14 | sig: [Limb; 1], |
15 | |
16 | /// The signed unbiased exponent of the value. |
17 | exp: ExpInt, |
18 | |
19 | /// What kind of floating point number this is. |
20 | // |
21 | // HACK(eddyb) because mutating this without accounting for `exp`/`sig` |
22 | // can break some subtle edge cases, it should be only read through the |
23 | // `.category()` method, and only set during initialization, either for |
24 | // one of the special value constants, or for conversion from bits. |
25 | read_only_category_do_not_mutate: Category, |
26 | |
27 | /// Sign bit of the number. |
28 | // |
29 | // HACK(eddyb) because mutating this without accounting for `category` |
30 | // can break some subtle edge cases, it should be only read through the |
31 | // `.is_negative()` method, and only set through negation (which can be |
32 | // more easily used through e.g. `copy_sign`/`negate_if`/`with_sign`). |
33 | read_only_sign_do_not_mutate: bool, |
34 | |
35 | marker: PhantomData<S>, |
36 | } |
37 | |
38 | /// Fundamental unit of big integer arithmetic, but also |
39 | /// large to store the largest significands by itself. |
40 | type Limb = u128; |
41 | const LIMB_BITS: usize = 128; |
42 | #[inline (always)] |
43 | fn limbs_for_bits(bits: usize) -> usize { |
44 | (bits + LIMB_BITS - 1) / LIMB_BITS |
45 | } |
46 | |
47 | /// Growable `[Limb]` (i.e. heap-allocated and typically `Vec`/`SmallVec`/etc.), |
48 | /// used only by algorithms that may require dynamically arbitrary precision, |
49 | /// i.e. conversions from/to decimal strings. |
50 | /// |
51 | /// Note: the specific type was chosen by starting with `SmallVec<[_; 1]>` and |
52 | /// increasing the inline length as long as benchmarks were showing improvements, |
53 | /// or at least the `Double::from_str` ones, which roughly had these behaviors: |
54 | /// * `Vec<_>` -> `SmallVec<[_; 1]>`: ~15% speedup, but only for shorter inputs |
55 | /// * `SmallVec<[_; 1]>` -> `SmallVec<[_; 2]>`: ~10% speedup for longer inputs |
56 | /// * `SmallVec<[_; 2]>` -> `SmallVec<[_; 3]>`: noise and/or diminishing returns |
57 | /// |
58 | /// Note: the choice of type described above, and the factors in its decision, |
59 | /// are specific to `Limb` being `u128`, so if `Limb` changes, this should too. |
60 | type DynPrecisionLimbVec = smallvec::SmallVec<[Limb; 2]>; |
61 | |
62 | /// Enum that represents what fraction of the LSB truncated bits of an fp number |
63 | /// represent. |
64 | /// |
65 | /// This essentially combines the roles of guard and sticky bits. |
66 | #[must_use ] |
67 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
68 | enum Loss { |
69 | // Example of truncated bits: |
70 | ExactlyZero, // 000000 |
71 | LessThanHalf, // 0xxxxx x's not all zero |
72 | ExactlyHalf, // 100000 |
73 | MoreThanHalf, // 1xxxxx x's not all zero |
74 | } |
75 | |
76 | /// How the nonfinite values Inf and NaN are represented. |
77 | #[derive (Copy, Clone, PartialEq, Eq)] |
78 | pub enum NonfiniteBehavior { |
79 | /// Represents standard IEEE 754 behavior. A value is nonfinite if the |
80 | /// exponent field is all 1s. In such cases, a value is Inf if the |
81 | /// significand bits are all zero, and NaN otherwise |
82 | IEEE754, |
83 | |
84 | /// Only the Float8E5M2 has this behavior. There is no Inf representation. A |
85 | /// value is NaN if the exponent field and the mantissa field are all 1s. |
86 | /// This behavior matches the FP8 E4M3 type described in |
87 | /// https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs |
88 | /// as non-signalling, although the paper does not state whether the NaN |
89 | /// values are signalling or not. |
90 | NanOnly, |
91 | } |
92 | |
93 | // HACK(eddyb) extension method flipping/changing the sign based on `bool`s. |
94 | trait NegExt: Neg<Output = Self> + Sized { |
95 | fn negate_if(self, negate: bool) -> Self { |
96 | if negate { |
97 | -self |
98 | } else { |
99 | self |
100 | } |
101 | } |
102 | |
103 | fn with_sign(self, sign: bool) -> Self |
104 | where |
105 | Self: Float, |
106 | { |
107 | self.negate_if(self.is_negative() != sign) |
108 | } |
109 | } |
110 | impl<T: Neg<Output = Self>> NegExt for T {} |
111 | |
112 | /// Represents floating point arithmetic semantics. |
113 | pub trait Semantics: Sized { |
114 | /// Total number of bits in the interchange format. |
115 | const BITS: usize; |
116 | |
117 | /// Number of exponent bits in the interchange format. |
118 | const EXP_BITS: usize; |
119 | |
120 | /// Number of bits in the significand. This includes the integer bit. |
121 | const PRECISION: usize = (Self::BITS - 1 - Self::EXP_BITS) + 1; |
122 | |
123 | /// How the nonfinite values Inf and NaN are represented. |
124 | const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754; |
125 | |
126 | /// The largest E such that 2^E is representable; this matches the |
127 | /// definition of IEEE 754. |
128 | const MAX_EXP: ExpInt = { |
129 | let ieee_inf_and_nan_replaced_with_extra_normals = match Self::NONFINITE_BEHAVIOR { |
130 | NonfiniteBehavior::IEEE754 => false, |
131 | NonfiniteBehavior::NanOnly => true, |
132 | }; |
133 | Self::IEEE_MAX_EXP |
134 | + (Self::MIN_EXP - Self::IEEE_MIN_EXP) |
135 | + (ieee_inf_and_nan_replaced_with_extra_normals as ExpInt) |
136 | }; |
137 | const IEEE_MAX_EXP: ExpInt = -Self::IEEE_MIN_EXP + 1; |
138 | |
139 | /// The smallest E such that 2^E is a normalized number; this |
140 | /// matches the definition of IEEE 754. |
141 | const MIN_EXP: ExpInt = Self::IEEE_MIN_EXP; |
142 | const IEEE_MIN_EXP: ExpInt = -(1 << (Self::EXP_BITS - 1)) + 2; |
143 | |
144 | /// The base significand bitpattern of NaNs, i.e. the bits that must always |
145 | /// be set in all NaNs, with other significand bits being either used for |
146 | /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset. |
147 | const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR { |
148 | NonfiniteBehavior::IEEE754 => 0, |
149 | NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1, |
150 | }; |
151 | |
152 | /// The significand bitmask for the payload of a NaN (if supported), |
153 | /// including the "quiet bit" (telling QNaNs apart from SNaNs). |
154 | const NAN_PAYLOAD_MASK: Limb = match Self::NONFINITE_BEHAVIOR { |
155 | NonfiniteBehavior::IEEE754 => (1 << (Self::PRECISION - 1)) - 1, |
156 | NonfiniteBehavior::NanOnly => 0, |
157 | }; |
158 | |
159 | /// The significand bitpattern to mark a NaN as quiet (if supported). |
160 | /// |
161 | /// NOTE: for X87DoubleExtended we need to set two bits instead of one. |
162 | /// |
163 | /// NOTE: all NaNs are quiet if unsupported (see `NonfiniteBehavior::NanOnly`). |
164 | const QNAN_SIGNIFICAND: Limb = match Self::NONFINITE_BEHAVIOR { |
165 | NonfiniteBehavior::IEEE754 => 1 << (Self::PRECISION - 2), |
166 | NonfiniteBehavior::NanOnly => 0, |
167 | }; |
168 | |
169 | fn from_bits(bits: u128) -> IeeeFloat<Self> { |
170 | assert!(Self::BITS > Self::PRECISION); |
171 | |
172 | let sign = bits & (1 << (Self::BITS - 1)); |
173 | let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); |
174 | let mut r = IeeeFloat { |
175 | sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], |
176 | // Convert the exponent from its bias representation to a signed integer. |
177 | exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), |
178 | read_only_category_do_not_mutate: Category::Zero, |
179 | read_only_sign_do_not_mutate: sign != 0, |
180 | marker: PhantomData, |
181 | }; |
182 | |
183 | // NOTE(eddyb) unlike the original C++ code, this doesn't check for |
184 | // specific exponent/significand combinations, but instead relies on |
185 | // being able to construct known-good special values to compare to. |
186 | let try_category_from_special = |mut special: IeeeFloat<Self>| { |
187 | special = special.copy_sign(r); |
188 | |
189 | // Ignore NaN payload to avoid needing a separate NaN check. |
190 | let sig_mask = if special.is_nan() { !Self::NAN_PAYLOAD_MASK } else { !0 }; |
191 | |
192 | if special.is_negative() == r.is_negative() |
193 | && special.exp == r.exp |
194 | && special.sig[0] & sig_mask == r.sig[0] & sig_mask |
195 | { |
196 | Some(special.category()) |
197 | } else { |
198 | None |
199 | } |
200 | }; |
201 | |
202 | // NOTE(eddyb) the order here matters, i.e. `NAN` needs to be last, as |
203 | // its relaxed check (see above) overlaps `INFINITY`, for IEEE NaNs. |
204 | let specials = [ |
205 | IeeeFloat::<Self>::ZERO, |
206 | IeeeFloat::<Self>::INFINITY, |
207 | IeeeFloat::<Self>::NAN, |
208 | ]; |
209 | |
210 | let category = specials |
211 | .into_iter() |
212 | .find_map(try_category_from_special) |
213 | .unwrap_or_else(|| { |
214 | if r.exp == Self::MIN_EXP - 1 { |
215 | // Denormal. |
216 | r.exp = Self::MIN_EXP; |
217 | } else { |
218 | // Set integer bit. |
219 | sig::set_bit(&mut r.sig, Self::PRECISION - 1); |
220 | } |
221 | Category::Normal |
222 | }); |
223 | |
224 | r.read_only_category_do_not_mutate = category; |
225 | |
226 | r |
227 | } |
228 | |
229 | fn to_bits(x: IeeeFloat<Self>) -> u128 { |
230 | assert!(Self::BITS > Self::PRECISION); |
231 | |
232 | // Split integer bit from significand. |
233 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); |
234 | let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1); |
235 | let mut exponent = x.exp; |
236 | match x.category() { |
237 | Category::Normal => { |
238 | if exponent == Self::MIN_EXP && !integer_bit { |
239 | // Denormal. |
240 | exponent -= 1; |
241 | } |
242 | } |
243 | Category::Zero => { |
244 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
245 | IeeeFloat::<Self> { |
246 | sig: [significand], |
247 | exp: exponent, |
248 | .. |
249 | } = Float::ZERO; |
250 | } |
251 | Category::Infinity => { |
252 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
253 | IeeeFloat::<Self> { |
254 | sig: [significand], |
255 | exp: exponent, |
256 | .. |
257 | } = Float::INFINITY; |
258 | } |
259 | Category::NaN => { |
260 | IeeeFloat::<Self> { exp: exponent, .. } = Float::NAN; |
261 | } |
262 | } |
263 | |
264 | // Convert the exponent from a signed integer to its bias representation. |
265 | let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; |
266 | |
267 | ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand |
268 | } |
269 | } |
270 | |
271 | impl<S> Copy for IeeeFloat<S> {} |
272 | impl<S> Clone for IeeeFloat<S> { |
273 | fn clone(&self) -> Self { |
274 | *self |
275 | } |
276 | } |
277 | |
278 | macro_rules! ieee_semantics { |
279 | ($($name:ident = $sem:ident($bits:tt : $exp_bits:tt) $({ $($extra:tt)* })?),* $(,)?) => { |
280 | $(pub struct $sem;)* |
281 | $(pub type $name = IeeeFloat<$sem>;)* |
282 | $(impl Semantics for $sem { |
283 | const BITS: usize = $bits; |
284 | const EXP_BITS: usize = $exp_bits; |
285 | |
286 | $($($extra)*)? |
287 | })* |
288 | } |
289 | } |
290 | |
291 | ieee_semantics! { |
292 | Half = HalfS(16:5), |
293 | Single = SingleS(32:8), |
294 | Double = DoubleS(64:11), |
295 | Quad = QuadS(128:15), |
296 | |
297 | // Non-standard IEEE-like semantics: |
298 | |
299 | // FIXME(eddyb) document this as "Brain Float 16" (C++ didn't have docs). |
300 | BFloat = BFloatS(16:8), |
301 | |
302 | // 8-bit floating point number following IEEE-754 conventions with bit |
303 | // layout S1E5M2 as described in https://arxiv.org/abs/2209.05433. |
304 | Float8E5M2 = Float8E5M2S(8:5), |
305 | |
306 | // 8-bit floating point number mostly following IEEE-754 conventions with |
307 | // bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433. |
308 | // Unlike IEEE-754 types, there are no infinity values, and NaN is |
309 | // represented with the exponent and mantissa bits set to all 1s. |
310 | Float8E4M3FN = Float8E4M3FNS(8:4) { |
311 | const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly; |
312 | }, |
313 | } |
314 | |
315 | // FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit" |
316 | // associated `const` on `Semantics` itself. |
317 | pub struct X87DoubleExtendedS; |
318 | pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>; |
319 | impl Semantics for X87DoubleExtendedS { |
320 | const BITS: usize = 80; |
321 | const EXP_BITS: usize = 15; |
322 | |
323 | // HACK(eddyb) overwriting `EXP_BITS` because its default is incorrect. |
324 | // FIMXE(eddyb) get rid of this by allowing `Semantics` to generically have |
325 | // the concept of "explicitly included integer bit", which is the main way |
326 | // in which the 80-bit X87 format differs from standard IEEE encodings. |
327 | const PRECISION: usize = 64; |
328 | |
329 | /// For x87 extended precision, we want to make a NaN, not a |
330 | /// pseudo-NaN. Maybe we should expose the ability to make |
331 | /// pseudo-NaNs? |
332 | const QNAN_SIGNIFICAND: Limb = 0b11 << (Self::PRECISION - 2); |
333 | |
334 | /// Integer bit is explicit in this format. Intel hardware (387 and later) |
335 | /// does not support these bit patterns: |
336 | /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") |
337 | /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") |
338 | /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") |
339 | /// exponent = 0, integer bit 1 ("pseudodenormal") |
340 | /// At the moment, the first three are treated as NaNs, the last one as Normal. |
341 | #[inline ] |
342 | fn from_bits(bits: u128) -> IeeeFloat<Self> { |
343 | let sign = bits & (1 << (Self::BITS - 1)); |
344 | let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); |
345 | let mut r = IeeeFloat { |
346 | sig: [bits & ((1 << Self::PRECISION) - 1)], |
347 | // Convert the exponent from its bias representation to a signed integer. |
348 | exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), |
349 | read_only_category_do_not_mutate: Category::Zero, |
350 | read_only_sign_do_not_mutate: sign != 0, |
351 | marker: PhantomData, |
352 | }; |
353 | |
354 | let integer_bit = r.sig[0] >> (Self::PRECISION - 1); |
355 | |
356 | let category = if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { |
357 | Category::Zero |
358 | } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] { |
359 | Category::Infinity |
360 | } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] |
361 | || r.exp != Self::MAX_EXP + 1 && r.exp != Self::MIN_EXP - 1 && integer_bit == 0 |
362 | { |
363 | r.exp = Self::MAX_EXP + 1; |
364 | Category::NaN |
365 | } else { |
366 | if r.exp == Self::MIN_EXP - 1 { |
367 | // Denormal. |
368 | r.exp = Self::MIN_EXP; |
369 | } |
370 | Category::Normal |
371 | }; |
372 | |
373 | r.read_only_category_do_not_mutate = category; |
374 | |
375 | r |
376 | } |
377 | |
378 | #[inline ] |
379 | fn to_bits(x: IeeeFloat<Self>) -> u128 { |
380 | // Get integer bit from significand. |
381 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); |
382 | let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1); |
383 | let exponent = match x.category() { |
384 | Category::Normal => { |
385 | if x.exp == Self::MIN_EXP && !integer_bit { |
386 | // Denormal. |
387 | Self::MIN_EXP - 1 |
388 | } else { |
389 | x.exp |
390 | } |
391 | } |
392 | Category::Zero => { |
393 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
394 | significand = 0; |
395 | Self::MIN_EXP - 1 |
396 | } |
397 | Category::Infinity => { |
398 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
399 | significand = 1 << (Self::PRECISION - 1); |
400 | Self::MAX_EXP + 1 |
401 | } |
402 | Category::NaN => Self::MAX_EXP + 1, |
403 | }; |
404 | |
405 | // Convert the exponent from a signed integer to its bias representation. |
406 | let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; |
407 | |
408 | ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand |
409 | } |
410 | } |
411 | |
412 | float_common_impls!(IeeeFloat<S>); |
413 | |
414 | impl<S: Semantics> PartialEq for IeeeFloat<S> { |
415 | fn eq(&self, rhs: &Self) -> bool { |
416 | self.partial_cmp(rhs) == Some(Ordering::Equal) |
417 | } |
418 | } |
419 | |
420 | impl<S: Semantics> PartialOrd for IeeeFloat<S> { |
421 | fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { |
422 | match (self.category(), rhs.category()) { |
423 | (Category::NaN, _) | (_, Category::NaN) => None, |
424 | |
425 | (Category::Infinity, Category::Infinity) => Some((!self.is_negative()).cmp(&(!rhs.is_negative()))), |
426 | |
427 | (Category::Zero, Category::Zero) => Some(Ordering::Equal), |
428 | |
429 | (Category::Infinity, _) | (Category::Normal, Category::Zero) => { |
430 | Some((!self.is_negative()).cmp(&self.is_negative())) |
431 | } |
432 | |
433 | (_, Category::Infinity) | (Category::Zero, Category::Normal) => { |
434 | Some(rhs.is_negative().cmp(&(!rhs.is_negative()))) |
435 | } |
436 | |
437 | (Category::Normal, Category::Normal) => { |
438 | // Two normal numbers. Do they have the same sign? |
439 | Some((!self.is_negative()).cmp(&(!rhs.is_negative())).then_with(|| { |
440 | // Compare absolute values; invert result if negative. |
441 | let result = self.cmp_abs_normal(*rhs); |
442 | |
443 | if self.is_negative() { |
444 | result.reverse() |
445 | } else { |
446 | result |
447 | } |
448 | })) |
449 | } |
450 | } |
451 | } |
452 | } |
453 | |
454 | impl<S: Semantics> Neg for IeeeFloat<S> { |
455 | type Output = Self; |
456 | fn neg(mut self) -> Self { |
457 | self.read_only_sign_do_not_mutate = !self.is_negative(); |
458 | self |
459 | } |
460 | } |
461 | |
462 | /// Prints this value as a decimal string. |
463 | /// |
464 | /// \param precision The maximum number of digits of |
465 | /// precision to output. If there are fewer digits available, |
466 | /// zero padding will not be used unless the value is |
467 | /// integral and small enough to be expressed in |
468 | /// precision digits. 0 means to use the natural |
469 | /// precision of the number. |
470 | /// \param width The maximum number of zeros to |
471 | /// consider inserting before falling back to scientific |
472 | /// notation. 0 means to always use scientific notation. |
473 | /// |
474 | /// \param alternate Indicate whether to remove the trailing zero in |
475 | /// fraction part or not. Also setting this parameter to true forces |
476 | /// producing of output more similar to default printf behavior. |
477 | /// Specifically the lower e is used as exponent delimiter and exponent |
478 | /// always contains no less than two digits. |
479 | /// |
480 | /// Number precision width Result |
481 | /// ------ --------- ----- ------ |
482 | /// 1.01E+4 5 2 10100 |
483 | /// 1.01E+4 4 2 1.01E+4 |
484 | /// 1.01E+4 5 1 1.01E+4 |
485 | /// 1.01E-2 5 2 0.0101 |
486 | /// 1.01E-2 4 2 0.0101 |
487 | /// 1.01E-2 4 1 1.01E-2 |
488 | impl<S: Semantics> fmt::Display for IeeeFloat<S> { |
489 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
490 | let width = f.width().unwrap_or(3); |
491 | let alternate = f.alternate(); |
492 | |
493 | match self.category() { |
494 | Category::Infinity => { |
495 | if self.is_negative() { |
496 | return f.write_str("-Inf" ); |
497 | } else { |
498 | return f.write_str("+Inf" ); |
499 | } |
500 | } |
501 | |
502 | Category::NaN => return f.write_str("NaN" ), |
503 | |
504 | Category::Zero => { |
505 | if self.is_negative() { |
506 | f.write_char('-' )?; |
507 | } |
508 | |
509 | if width == 0 { |
510 | if alternate { |
511 | f.write_str("0.0" )?; |
512 | if let Some(n) = f.precision() { |
513 | for _ in 1..n { |
514 | f.write_char('0' )?; |
515 | } |
516 | } |
517 | f.write_str("e+00" )?; |
518 | } else { |
519 | f.write_str("0.0E+0" )?; |
520 | } |
521 | } else { |
522 | f.write_char('0' )?; |
523 | } |
524 | return Ok(()); |
525 | } |
526 | |
527 | Category::Normal => {} |
528 | } |
529 | |
530 | if self.is_negative() { |
531 | f.write_char('-' )?; |
532 | } |
533 | |
534 | // We use enough digits so the number can be round-tripped back to an |
535 | // APFloat. The formula comes from "How to Print Floating-Point Numbers |
536 | // Accurately" by Steele and White. |
537 | // FIXME: Using a formula based purely on the precision is conservative; |
538 | // we can print fewer digits depending on the actual value being printed. |
539 | |
540 | // precision = 2 + floor(S::PRECISION / lg_2(10)) |
541 | let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196); |
542 | |
543 | // Decompose the number into an APInt and an exponent. |
544 | let mut exp = self.exp - (S::PRECISION as ExpInt - 1); |
545 | let mut sig: DynPrecisionLimbVec = [self.sig[0]].into_iter().collect(); |
546 | |
547 | // Ignore trailing binary zeros. |
548 | let trailing_zeros = sig[0].trailing_zeros(); |
549 | let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize); |
550 | |
551 | // Change the exponent from 2^e to 10^e. |
552 | if exp == 0 { |
553 | // Nothing to do. |
554 | } else if exp > 0 { |
555 | // Just shift left. |
556 | let shift = exp as usize; |
557 | sig.resize(limbs_for_bits(S::PRECISION + shift), 0); |
558 | sig::shift_left(&mut sig, &mut exp, shift); |
559 | } else { |
560 | // exp < 0 |
561 | let mut texp = -exp as usize; |
562 | |
563 | // We transform this using the identity: |
564 | // (N)(2^-e) == (N)(5^e)(10^-e) |
565 | |
566 | // Multiply significand by 5^e. |
567 | // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) |
568 | let mut sig_scratch = DynPrecisionLimbVec::new(); |
569 | let mut p5 = DynPrecisionLimbVec::new(); |
570 | let mut p5_scratch = DynPrecisionLimbVec::new(); |
571 | while texp != 0 { |
572 | if p5.is_empty() { |
573 | p5.push(5); |
574 | } else { |
575 | p5_scratch.resize(p5.len() * 2, 0); |
576 | let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); |
577 | while p5_scratch.last() == Some(&0) { |
578 | p5_scratch.pop(); |
579 | } |
580 | mem::swap(&mut p5, &mut p5_scratch); |
581 | } |
582 | if texp & 1 != 0 { |
583 | sig_scratch.resize(sig.len() + p5.len(), 0); |
584 | let _: Loss = sig::mul(&mut sig_scratch, &mut 0, &sig, &p5, (sig.len() + p5.len()) * LIMB_BITS); |
585 | while sig_scratch.last() == Some(&0) { |
586 | sig_scratch.pop(); |
587 | } |
588 | mem::swap(&mut sig, &mut sig_scratch); |
589 | } |
590 | texp >>= 1; |
591 | } |
592 | } |
593 | |
594 | // Fill the buffer. |
595 | let mut buffer = smallvec::SmallVec::<[u8; 64]>::new(); |
596 | |
597 | // Ignore digits from the significand until it is no more |
598 | // precise than is required for the desired precision. |
599 | // 196/59 is a very slight overestimate of lg_2(10). |
600 | let required = (precision * 196 + 58) / 59; |
601 | let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196; |
602 | let mut in_trail = true; |
603 | while !sig.is_empty() { |
604 | // Perform short division by 10 to extract the rightmost digit. |
605 | // rem <- sig % 10 |
606 | // sig <- sig / 10 |
607 | let mut rem = 0; |
608 | |
609 | // Use 64-bit division and remainder, with 32-bit chunks from sig. |
610 | sig::each_chunk(&mut sig, 32, |chunk| { |
611 | let chunk = chunk as u32; |
612 | let combined = ((rem as u64) << 32) | (chunk as u64); |
613 | rem = (combined % 10) as u8; |
614 | (combined / 10) as u32 as Limb |
615 | }); |
616 | |
617 | // Reduce the sigificand to avoid wasting time dividing 0's. |
618 | while sig.last() == Some(&0) { |
619 | sig.pop(); |
620 | } |
621 | |
622 | let digit = rem; |
623 | |
624 | // Ignore digits we don't need. |
625 | if discard_digits > 0 { |
626 | discard_digits -= 1; |
627 | exp += 1; |
628 | continue; |
629 | } |
630 | |
631 | // Drop trailing zeros. |
632 | if in_trail && digit == 0 { |
633 | exp += 1; |
634 | } else { |
635 | in_trail = false; |
636 | buffer.push(b'0' + digit); |
637 | } |
638 | } |
639 | |
640 | assert!(!buffer.is_empty(), "no characters in buffer!" ); |
641 | |
642 | // Drop down to precision. |
643 | // FIXME: don't do more precise calculations above than are required. |
644 | if buffer.len() > precision { |
645 | // The most significant figures are the last ones in the buffer. |
646 | let mut first_sig = buffer.len() - precision; |
647 | |
648 | // Round. |
649 | // FIXME: this probably shouldn't use 'round half up'. |
650 | |
651 | // Rounding down is just a truncation, except we also want to drop |
652 | // trailing zeros from the new result. |
653 | if buffer[first_sig - 1] < b'5' { |
654 | while first_sig < buffer.len() && buffer[first_sig] == b'0' { |
655 | first_sig += 1; |
656 | } |
657 | } else { |
658 | // Rounding up requires a decimal add-with-carry. If we continue |
659 | // the carry, the newly-introduced zeros will just be truncated. |
660 | for x in &mut buffer[first_sig..] { |
661 | if *x == b'9' { |
662 | first_sig += 1; |
663 | } else { |
664 | *x += 1; |
665 | break; |
666 | } |
667 | } |
668 | } |
669 | |
670 | exp += first_sig as ExpInt; |
671 | buffer.drain(..first_sig); |
672 | |
673 | // If we carried through, we have exactly one digit of precision. |
674 | if buffer.is_empty() { |
675 | buffer.push(b'1' ); |
676 | } |
677 | } |
678 | |
679 | let digits = buffer.len(); |
680 | |
681 | // Check whether we should use scientific notation. |
682 | let scientific = if width == 0 { |
683 | true |
684 | } else { |
685 | if exp >= 0 { |
686 | // 765e3 --> 765000 |
687 | // ^^^ |
688 | // But we shouldn't make the number look more precise than it is. |
689 | exp as usize > width || digits + exp as usize > precision |
690 | } else { |
691 | // Power of the most significant digit. |
692 | let msd = exp + (digits - 1) as ExpInt; |
693 | if msd >= 0 { |
694 | // 765e-2 == 7.65 |
695 | false |
696 | } else { |
697 | // 765e-5 == 0.00765 |
698 | // ^ ^^ |
699 | -msd as usize > width |
700 | } |
701 | } |
702 | }; |
703 | |
704 | // Scientific formatting is pretty straightforward. |
705 | if scientific { |
706 | exp += digits as ExpInt - 1; |
707 | |
708 | f.write_char(buffer[digits - 1] as char)?; |
709 | f.write_char('.' )?; |
710 | let truncate_zero = !alternate; |
711 | if digits == 1 && truncate_zero { |
712 | f.write_char('0' )?; |
713 | } else { |
714 | for &d in buffer[..digits - 1].iter().rev() { |
715 | f.write_char(d as char)?; |
716 | } |
717 | } |
718 | // Fill with zeros up to precision. |
719 | if !truncate_zero && precision > digits - 1 { |
720 | for _ in 0..precision - digits + 1 { |
721 | f.write_char('0' )?; |
722 | } |
723 | } |
724 | // For alternate we use lower 'e'. |
725 | f.write_char(if alternate { 'e' } else { 'E' })?; |
726 | |
727 | // Exponent always at least two digits if we do not truncate zeros. |
728 | if truncate_zero { |
729 | write!(f, " {:+}" , exp)?; |
730 | } else { |
731 | write!(f, " {:+03}" , exp)?; |
732 | } |
733 | |
734 | return Ok(()); |
735 | } |
736 | |
737 | // Non-scientific, positive exponents. |
738 | if exp >= 0 { |
739 | for &d in buffer.iter().rev() { |
740 | f.write_char(d as char)?; |
741 | } |
742 | for _ in 0..exp { |
743 | f.write_char('0' )?; |
744 | } |
745 | return Ok(()); |
746 | } |
747 | |
748 | // Non-scientific, negative exponents. |
749 | let unit_place = -exp as usize; |
750 | if unit_place < digits { |
751 | for &d in buffer[unit_place..].iter().rev() { |
752 | f.write_char(d as char)?; |
753 | } |
754 | f.write_char('.' )?; |
755 | for &d in buffer[..unit_place].iter().rev() { |
756 | f.write_char(d as char)?; |
757 | } |
758 | } else { |
759 | f.write_str("0." )?; |
760 | for _ in digits..unit_place { |
761 | f.write_char('0' )?; |
762 | } |
763 | for &d in buffer.iter().rev() { |
764 | f.write_char(d as char)?; |
765 | } |
766 | } |
767 | |
768 | Ok(()) |
769 | } |
770 | } |
771 | |
772 | impl<S: Semantics> fmt::Debug for IeeeFloat<S> { |
773 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
774 | write!( |
775 | f, |
776 | " {}( {:?} | {}{:?} * 2^ {})" , |
777 | self, |
778 | self.category(), |
779 | if self.is_negative() { "-" } else { "+" }, |
780 | self.sig, |
781 | self.exp |
782 | ) |
783 | } |
784 | } |
785 | |
786 | // HACK(eddyb) this logic is duplicated throughout the original C++ code, |
787 | // but it's a bit too long to keep repeating in the Rust port for all ops. |
788 | // FIXME(eddyb) find a better name/organization for all of this functionality |
789 | // (`IeeeDefaultExceptionHandling` doesn't have a counterpart in the C++ code). |
790 | struct IeeeDefaultExceptionHandling; |
791 | impl IeeeDefaultExceptionHandling { |
792 | fn result_from_nan<S: Semantics>(mut r: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> { |
793 | assert!(r.is_nan()); |
794 | |
795 | let status = if r.is_signaling() { |
796 | // [IEEE Std 754-2008 6.2]: |
797 | // Under default exception handling, any operation signaling an invalid |
798 | // operation exception and for which a floating-point result is to be |
799 | // delivered shall deliver a quiet NaN. |
800 | let [sig] = &mut r.sig; |
801 | *sig |= if S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND { |
802 | // HACK(eddyb) remain bug-compatible with the original C++ code |
803 | // which doesn't appear to attempt avoiding creating pseudo-NaNs |
804 | // (see https://github.com/llvm/llvm-project/issues/63938). |
805 | S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK |
806 | } else { |
807 | S::QNAN_SIGNIFICAND |
808 | }; |
809 | |
810 | // [IEEE Std 754-2008 6.2]: |
811 | // Signaling NaNs shall be reserved operands that, under default exception |
812 | // handling, signal the invalid operation exception(see 7.2) for every |
813 | // general-computational and signaling-computational operation except for |
814 | // the conversions described in 5.12. |
815 | Status::INVALID_OP |
816 | } else { |
817 | // [IEEE Std 754-2008 6.2]: |
818 | // For an operation with quiet NaN inputs, other than maximum and minimum |
819 | // operations, if a floating-point result is to be delivered the result |
820 | // shall be a quiet NaN which should be one of the input NaNs. |
821 | // ... |
822 | // Every general-computational and quiet-computational operation involving |
823 | // one or more input NaNs, none of them signaling, shall signal no |
824 | // exception, except fusedMultiplyAdd might signal the invalid operation |
825 | // exception(see 7.2). |
826 | Status::OK |
827 | }; |
828 | status.and(r) |
829 | } |
830 | |
831 | fn binop_result_from_either_nan<S: Semantics>(a: IeeeFloat<S>, b: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> { |
832 | let r = match (a.category(), b.category()) { |
833 | (Category::NaN, _) => a, |
834 | (_, Category::NaN) => b, |
835 | _ => unreachable!(), |
836 | }; |
837 | let mut status_and_r = Self::result_from_nan(r); |
838 | if b.is_signaling() { |
839 | status_and_r.status |= Status::INVALID_OP; |
840 | } |
841 | status_and_r |
842 | } |
843 | } |
844 | |
845 | impl<S: Semantics> IeeeFloat<S> { |
846 | // HACK(eddyb) allow `Self::qnan` to be used from `IeeeFloat::NAN`. |
847 | // FIXME(eddyb) move back to the trait impl when that can be `const fn`. |
848 | const fn qnan(payload: Option<u128>) -> Self { |
849 | let sig = [S::NAN_SIGNIFICAND_BASE |
850 | | S::QNAN_SIGNIFICAND |
851 | | match payload { |
852 | // Zero out the excess bits of the significand. |
853 | Some(payload) => payload & S::NAN_PAYLOAD_MASK, |
854 | None => 0, |
855 | }]; |
856 | |
857 | let exp = match S::NONFINITE_BEHAVIOR { |
858 | NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1, |
859 | NonfiniteBehavior::NanOnly => S::MAX_EXP, |
860 | }; |
861 | |
862 | IeeeFloat { |
863 | sig, |
864 | exp, |
865 | read_only_category_do_not_mutate: Category::NaN, |
866 | read_only_sign_do_not_mutate: false, |
867 | marker: PhantomData, |
868 | } |
869 | } |
870 | } |
871 | |
872 | impl<S: Semantics> Float for IeeeFloat<S> { |
873 | const BITS: usize = S::BITS; |
874 | const PRECISION: usize = S::PRECISION; |
875 | const MAX_EXP: ExpInt = S::MAX_EXP; |
876 | const MIN_EXP: ExpInt = S::MIN_EXP; |
877 | |
878 | const ZERO: Self = IeeeFloat { |
879 | sig: [0], |
880 | exp: S::MIN_EXP - 1, |
881 | read_only_category_do_not_mutate: Category::Zero, |
882 | read_only_sign_do_not_mutate: false, |
883 | marker: PhantomData, |
884 | }; |
885 | |
886 | const INFINITY: Self = match S::NONFINITE_BEHAVIOR { |
887 | NonfiniteBehavior::IEEE754 => IeeeFloat { |
888 | sig: [0], |
889 | exp: S::MAX_EXP + 1, |
890 | read_only_category_do_not_mutate: Category::Infinity, |
891 | read_only_sign_do_not_mutate: false, |
892 | marker: PhantomData, |
893 | }, |
894 | |
895 | // There is no Inf, so make NaN instead. |
896 | NonfiniteBehavior::NanOnly => Self::NAN, |
897 | }; |
898 | |
899 | const NAN: Self = Self::qnan(None); |
900 | |
901 | fn qnan(payload: Option<u128>) -> Self { |
902 | // NOTE(eddyb) this is not a recursive self-call, but rather it calls |
903 | // the `const fn` inherent mehtod (see above). |
904 | Self::qnan(payload) |
905 | } |
906 | |
907 | fn snan(payload: Option<u128>) -> Self { |
908 | let mut snan = Self::qnan(payload); |
909 | |
910 | let [sig] = &mut snan.sig; |
911 | |
912 | // We always have to clear the QNaN bit to make it an SNaN. |
913 | *sig &= !(S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK); |
914 | |
915 | // If there are no bits set in the payload, we have to set |
916 | // *something* to make it a NaN instead of an infinity; |
917 | // conventionally, this is the next bit down from the QNaN bit. |
918 | if *sig & S::NAN_PAYLOAD_MASK == 0 { |
919 | *sig |= (S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK) >> 1; |
920 | } |
921 | |
922 | snan |
923 | } |
924 | |
925 | fn largest() -> Self { |
926 | // We want (in interchange format): |
927 | // exponent = 1..10 |
928 | // significand = 1..1 |
929 | IeeeFloat { |
930 | sig: [((1 << S::PRECISION) - 1) |
931 | & match S::NONFINITE_BEHAVIOR { |
932 | // The largest number by magnitude in our format will be the floating point |
933 | // number with maximum exponent and with significand that is all ones. |
934 | NonfiniteBehavior::IEEE754 => !0, |
935 | |
936 | // The largest number by magnitude in our format will be the floating point |
937 | // number with maximum exponent and with significand that is all ones except |
938 | // the LSB. |
939 | NonfiniteBehavior::NanOnly => !1, |
940 | }], |
941 | exp: S::MAX_EXP, |
942 | read_only_category_do_not_mutate: Category::Normal, |
943 | read_only_sign_do_not_mutate: false, |
944 | marker: PhantomData, |
945 | } |
946 | } |
947 | |
948 | // We want (in interchange format): |
949 | // exponent = 0..0 |
950 | // significand = 0..01 |
951 | const SMALLEST: Self = IeeeFloat { |
952 | sig: [1], |
953 | exp: S::MIN_EXP, |
954 | read_only_category_do_not_mutate: Category::Normal, |
955 | read_only_sign_do_not_mutate: false, |
956 | marker: PhantomData, |
957 | }; |
958 | |
959 | fn smallest_normalized() -> Self { |
960 | // We want (in interchange format): |
961 | // exponent = 0..0 |
962 | // significand = 10..0 |
963 | IeeeFloat { |
964 | sig: [1 << (S::PRECISION - 1)], |
965 | exp: S::MIN_EXP, |
966 | read_only_category_do_not_mutate: Category::Normal, |
967 | read_only_sign_do_not_mutate: false, |
968 | marker: PhantomData, |
969 | } |
970 | } |
971 | |
972 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
973 | let status = match (self.category(), rhs.category()) { |
974 | (Category::NaN, _) | (_, Category::NaN) => { |
975 | return IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs); |
976 | } |
977 | |
978 | (Category::Infinity, Category::Infinity) => { |
979 | // Differently signed infinities can only be validly |
980 | // subtracted. |
981 | if self.is_negative() != rhs.is_negative() { |
982 | self = Self::NAN; |
983 | Status::INVALID_OP |
984 | } else { |
985 | Status::OK |
986 | } |
987 | } |
988 | |
989 | // Sign may depend on rounding mode; handled below. |
990 | (_, Category::Zero) | (Category::Infinity, Category::Normal) => Status::OK, |
991 | |
992 | (Category::Zero, _) | (_, Category::Infinity) => { |
993 | self = rhs; |
994 | Status::OK |
995 | } |
996 | |
997 | (Category::Normal, Category::Normal) => { |
998 | let mut sign = self.is_negative(); |
999 | let loss = sig::add_or_sub( |
1000 | &mut self.sig, |
1001 | &mut self.exp, |
1002 | &mut sign, |
1003 | &mut [rhs.sig[0]], |
1004 | rhs.exp, |
1005 | rhs.is_negative(), |
1006 | ); |
1007 | self = self.with_sign(sign); |
1008 | |
1009 | let status; |
1010 | self = unpack!(status=, self.normalize(round, loss)); |
1011 | |
1012 | // Can only be zero if we lost no fraction. |
1013 | assert!(self.category() != Category::Zero || loss == Loss::ExactlyZero); |
1014 | |
1015 | status |
1016 | } |
1017 | }; |
1018 | |
1019 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
1020 | // positive zero unless rounding to minus infinity, except that |
1021 | // adding two like-signed zeroes gives that zero. |
1022 | if self.category() == Category::Zero |
1023 | && (rhs.category() != Category::Zero || self.is_negative() != rhs.is_negative()) |
1024 | { |
1025 | self = self.with_sign(round == Round::TowardNegative); |
1026 | } |
1027 | |
1028 | status.and(self) |
1029 | } |
1030 | |
1031 | // NOTE(eddyb) we can't rely on the `sub_r` method default implementation |
1032 | // because NaN handling needs the original `rhs` (i.e. without negation). |
1033 | fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
1034 | match (self.category(), rhs.category()) { |
1035 | (Category::NaN, _) | (_, Category::NaN) => { |
1036 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
1037 | } |
1038 | |
1039 | _ => self.add_r(-rhs, round), |
1040 | } |
1041 | } |
1042 | |
1043 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
1044 | self = self.negate_if(rhs.is_negative()); |
1045 | |
1046 | match (self.category(), rhs.category()) { |
1047 | (Category::NaN, _) | (_, Category::NaN) => { |
1048 | self = self.negate_if(rhs.is_negative()); // restore the original sign |
1049 | |
1050 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
1051 | } |
1052 | |
1053 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { |
1054 | Status::INVALID_OP.and(Self::NAN) |
1055 | } |
1056 | |
1057 | (Category::Infinity, _) | (_, Category::Infinity) => Status::OK.and(Self::INFINITY.copy_sign(self)), |
1058 | |
1059 | (Category::Zero, _) | (_, Category::Zero) => Status::OK.and(Self::ZERO.copy_sign(self)), |
1060 | |
1061 | (Category::Normal, Category::Normal) => { |
1062 | self.exp += rhs.exp; |
1063 | let mut wide_sig = [0; 2]; |
1064 | let loss = sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION); |
1065 | self.sig = [wide_sig[0]]; |
1066 | let mut status; |
1067 | self = unpack!(status=, self.normalize(round, loss)); |
1068 | if loss != Loss::ExactlyZero { |
1069 | status |= Status::INEXACT; |
1070 | } |
1071 | status.and(self) |
1072 | } |
1073 | } |
1074 | } |
1075 | |
1076 | fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { |
1077 | // If and only if all arguments are normal do we need to do an |
1078 | // extended-precision calculation. |
1079 | if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() { |
1080 | let mut status; |
1081 | if self.is_finite_non_zero() && multiplicand.is_finite_non_zero() { |
1082 | // HACK(eddyb) this corresponds to the case where the C++ code |
1083 | // (`multiplySpecials`) is effectively a noop: no multiplication |
1084 | // is actually performed, because we're really only here to handle |
1085 | // the "infinite/NaN `addend`" special-case, which needs to ignore |
1086 | // the "finite * finite" multiplication entirely, instead of letting |
1087 | // it e.g. overflow into infinity (and trample over `addend`). |
1088 | assert!(!addend.is_finite()); |
1089 | status = Status::OK; |
1090 | } else { |
1091 | self = unpack!(status=, self.mul_r(multiplicand, round)); |
1092 | } |
1093 | |
1094 | // FS can only be Status::OK or Status::INVALID_OP. There is no more work |
1095 | // to do in the latter case. The IEEE-754R standard says it is |
1096 | // implementation-defined in this case whether, if ADDEND is a |
1097 | // quiet NaN, we raise invalid op; this implementation does so. |
1098 | // |
1099 | // If we need to do the addition we can do so with normal |
1100 | // precision. |
1101 | if status == Status::OK { |
1102 | self = unpack!(status=, self.add_r(addend, round)); |
1103 | } |
1104 | return status.and(self); |
1105 | } |
1106 | |
1107 | // Post-multiplication sign, before addition. |
1108 | self = self.negate_if(multiplicand.is_negative()); |
1109 | |
1110 | // Allocate space for twice as many bits as the original significand, plus one |
1111 | // extra bit for the addition to overflow into. |
1112 | assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2); |
1113 | let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]); |
1114 | |
1115 | let mut loss = Loss::ExactlyZero; |
1116 | let mut omsb = sig::omsb(&wide_sig); |
1117 | self.exp += multiplicand.exp; |
1118 | |
1119 | // Assume the operands involved in the multiplication are single-precision |
1120 | // FP, and the two multiplicants are: |
1121 | // lhs = a23 . a22 ... a0 * 2^e1 |
1122 | // rhs = b23 . b22 ... b0 * 2^e2 |
1123 | // the result of multiplication is: |
1124 | // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) |
1125 | // Note that there are three significant bits at the left-hand side of the |
1126 | // radix point: two for the multiplication, and an overflow bit for the |
1127 | // addition (that will always be zero at this point). Move the radix point |
1128 | // toward left by two bits, and adjust exponent accordingly. |
1129 | self.exp += 2; |
1130 | |
1131 | if addend.is_non_zero() { |
1132 | // Normalize our MSB to one below the top bit to allow for overflow. |
1133 | let ext_precision = 2 * S::PRECISION + 1; |
1134 | if omsb != ext_precision - 1 { |
1135 | assert!(ext_precision > omsb); |
1136 | sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb); |
1137 | } |
1138 | |
1139 | // The intermediate result of the multiplication has "2 * S::PRECISION" |
1140 | // signicant bit; adjust the addend to be consistent with mul result. |
1141 | let mut ext_addend_sig = [addend.sig[0], 0]; |
1142 | |
1143 | // Extend the addend significand to ext_precision - 1. This guarantees |
1144 | // that the high bit of the significand is zero (same as wide_sig), |
1145 | // so the addition will overflow (if it does overflow at all) into the top bit. |
1146 | sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION); |
1147 | |
1148 | let mut sign = self.is_negative(); |
1149 | loss = sig::add_or_sub( |
1150 | &mut wide_sig, |
1151 | &mut self.exp, |
1152 | &mut sign, |
1153 | &mut ext_addend_sig, |
1154 | addend.exp + 1, |
1155 | addend.is_negative(), |
1156 | ); |
1157 | self = self.with_sign(sign); |
1158 | |
1159 | omsb = sig::omsb(&wide_sig); |
1160 | } |
1161 | |
1162 | // Convert the result having "2 * S::PRECISION" significant-bits back to the one |
1163 | // having "S::PRECISION" significant-bits. First, move the radix point from |
1164 | // poision "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be |
1165 | // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION". |
1166 | self.exp -= S::PRECISION as ExpInt + 1; |
1167 | |
1168 | // In case MSB resides at the left-hand side of radix point, shift the |
1169 | // mantissa right by some amount to make sure the MSB reside right before |
1170 | // the radix point (i.e. "MSB . rest-significant-bits"). |
1171 | if omsb > S::PRECISION { |
1172 | let bits = omsb - S::PRECISION; |
1173 | loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss); |
1174 | } |
1175 | |
1176 | self.sig[0] = wide_sig[0]; |
1177 | |
1178 | let mut status; |
1179 | self = unpack!(status=, self.normalize(round, loss)); |
1180 | if loss != Loss::ExactlyZero { |
1181 | status |= Status::INEXACT; |
1182 | } |
1183 | |
1184 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
1185 | // positive zero unless rounding to minus infinity, except that |
1186 | // adding two like-signed zeroes gives that zero. |
1187 | if self.category() == Category::Zero |
1188 | && !status.intersects(Status::UNDERFLOW) |
1189 | && self.is_negative() != addend.is_negative() |
1190 | { |
1191 | self = self.with_sign(round == Round::TowardNegative); |
1192 | } |
1193 | |
1194 | status.and(self) |
1195 | } |
1196 | |
1197 | fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
1198 | self = self.negate_if(rhs.is_negative()); |
1199 | |
1200 | match (self.category(), rhs.category()) { |
1201 | (Category::NaN, _) | (_, Category::NaN) => { |
1202 | self = self.negate_if(rhs.is_negative()); // restore the original sign |
1203 | |
1204 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
1205 | } |
1206 | |
1207 | (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => { |
1208 | Status::INVALID_OP.and(Self::NAN) |
1209 | } |
1210 | |
1211 | (Category::Infinity | Category::Zero, _) => Status::OK.and(self), |
1212 | |
1213 | (_, Category::Infinity) => Status::OK.and(Self::ZERO.copy_sign(self)), |
1214 | |
1215 | (_, Category::Zero) => Status::DIV_BY_ZERO.and(Self::INFINITY.copy_sign(self)), |
1216 | |
1217 | (Category::Normal, Category::Normal) => { |
1218 | self.exp -= rhs.exp; |
1219 | let dividend = self.sig[0]; |
1220 | let loss = sig::div(&mut self.sig, &mut self.exp, &mut [dividend], &mut [rhs.sig[0]], S::PRECISION); |
1221 | let mut status; |
1222 | self = unpack!(status=, self.normalize(round, loss)); |
1223 | if loss != Loss::ExactlyZero { |
1224 | status |= Status::INEXACT; |
1225 | } |
1226 | status.and(self) |
1227 | } |
1228 | } |
1229 | } |
1230 | |
1231 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { |
1232 | match (self.category(), rhs.category()) { |
1233 | (Category::NaN, _) | (_, Category::NaN) => { |
1234 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
1235 | } |
1236 | |
1237 | (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), |
1238 | |
1239 | (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), |
1240 | |
1241 | (Category::Normal, Category::Normal) => { |
1242 | let mut status; |
1243 | |
1244 | let mut x = self; |
1245 | let mut p = rhs; |
1246 | |
1247 | // Make sure the current value is less than twice the denom. If the addition |
1248 | // did not succeed (an overflow has happened), which means that the finite |
1249 | // value we currently posses must be less than twice the denom (as we are |
1250 | // using the same semantics). |
1251 | let p2 = unpack!(status=, p + p); |
1252 | if status == Status::OK { |
1253 | x = unpack!(status=, x.c_fmod(p2)); |
1254 | assert_eq!(status, Status::OK); |
1255 | } |
1256 | |
1257 | // Lets work with absolute numbers. |
1258 | p = p.abs(); |
1259 | x = x.abs(); |
1260 | |
1261 | // |
1262 | // To calculate the remainder we use the following scheme. |
1263 | // |
1264 | // The remainder is defained as follows: |
1265 | // |
1266 | // remainder = numer - rquot * denom = x - r * p |
1267 | // |
1268 | // Where r is the result of: x/p, rounded toward the nearest integral value |
1269 | // (with halfway cases rounded toward the even number). |
1270 | // |
1271 | // Currently, (after x mod 2p): |
1272 | // r is the number of 2p's present inside x, which is inherently, an even |
1273 | // number of p's. |
1274 | // |
1275 | // We may split the remaining calculation into 4 options: |
1276 | // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
1277 | // - if x == 0.5p then we round to the nearest even number which is 0, and we |
1278 | // are done as well. |
1279 | // - if 0.5p < x < p then we round to nearest number which is 1, and we have |
1280 | // to subtract 1p at least once. |
1281 | // - if x >= p then we must subtract p at least once, as x must be a |
1282 | // remainder. |
1283 | // |
1284 | // By now, we were done, or we added 1 to r, which in turn, now an odd number. |
1285 | // |
1286 | // We can now split the remaining calculation to the following 3 options: |
1287 | // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
1288 | // - if x == 0.5p then we round to the nearest even number. As r is odd, we |
1289 | // must round up to the next even number. so we must subtract p once more. |
1290 | // - if x > 0.5p (and inherently x < p) then we must round r up to the next |
1291 | // integral, and subtract p once more. |
1292 | // |
1293 | |
1294 | // Return `x * 2` at no loss of precision (i.e. no overflow). |
1295 | // |
1296 | // HACK(eddyb) this may seem a bit sketchy because it can return |
1297 | // values that `normalize` would've replaced with `overflow_result` |
1298 | // (e.g. overflowing to infinity), but the result is only used for |
1299 | // comparisons, where both sides of such comparison can be seen |
1300 | // as transiently having a larger *effective* exponent range. |
1301 | let lossless_2x = |mut x: Self| { |
1302 | x.exp += 1; |
1303 | |
1304 | if x.exp >= Self::MAX_EXP { |
1305 | // HACK(eddyb) skip lossy `normalize` (see above). |
1306 | } else { |
1307 | let status; |
1308 | x = unpack!(status=, x.normalize(Round::NearestTiesToEven, Loss::ExactlyZero)); |
1309 | assert_eq!(status, Status::OK); |
1310 | } |
1311 | |
1312 | x |
1313 | }; |
1314 | |
1315 | if lossless_2x(x) > p { |
1316 | x = unpack!(status=, x - p); |
1317 | assert_eq!(status, Status::OK); |
1318 | |
1319 | if lossless_2x(x) >= p { |
1320 | x = unpack!(status=, x - p); |
1321 | assert_eq!(status, Status::OK); |
1322 | } |
1323 | } |
1324 | |
1325 | if x.is_zero() { |
1326 | Status::OK.and(x.copy_sign(self)) // IEEE754 requires this |
1327 | } else { |
1328 | Status::OK.and(x.negate_if(self.is_negative())) |
1329 | } |
1330 | } |
1331 | } |
1332 | } |
1333 | |
1334 | fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> { |
1335 | match (self.category(), rhs.category()) { |
1336 | (Category::NaN, _) | (_, Category::NaN) => { |
1337 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
1338 | } |
1339 | |
1340 | (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), |
1341 | |
1342 | (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), |
1343 | |
1344 | (Category::Normal, Category::Normal) => { |
1345 | let orig = self; |
1346 | |
1347 | while self.is_finite_non_zero() |
1348 | && rhs.is_finite_non_zero() |
1349 | && self.cmp_abs_normal(rhs) != Ordering::Less |
1350 | { |
1351 | let exp = self.ilogb() - rhs.ilogb(); |
1352 | let mut v = rhs.scalbn(exp); |
1353 | // `v` can overflow to NaN with `NonfiniteBehavior::NanOnly`, so explicitly |
1354 | // check for it. |
1355 | if v.is_nan() || self.cmp_abs_normal(v) == Ordering::Less { |
1356 | v = rhs.scalbn(exp - 1); |
1357 | } |
1358 | v = v.copy_sign(self); |
1359 | |
1360 | let status; |
1361 | self = unpack!(status=, self - v); |
1362 | assert_eq!(status, Status::OK); |
1363 | } |
1364 | if self.is_zero() { |
1365 | self = self.copy_sign(orig); |
1366 | } |
1367 | Status::OK.and(self) |
1368 | } |
1369 | } |
1370 | } |
1371 | |
1372 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { |
1373 | match self.category() { |
1374 | Category::NaN => IeeeDefaultExceptionHandling::result_from_nan(self), |
1375 | |
1376 | // [IEEE Std 754-2008 6.1]: |
1377 | // The behavior of infinity in floating-point arithmetic is derived from the |
1378 | // limiting cases of real arithmetic with operands of arbitrarily |
1379 | // large magnitude, when such a limit exists. |
1380 | // ... |
1381 | // Operations on infinite operands are usually exact and therefore signal no |
1382 | // exceptions ... |
1383 | Category::Infinity => Status::OK.and(self), |
1384 | |
1385 | // [IEEE Std 754-2008 6.3]: |
1386 | // ... the sign of the result of conversions, the quantize operation, the |
1387 | // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is |
1388 | // the sign of the first or only operand. |
1389 | Category::Zero => Status::OK.and(self), |
1390 | |
1391 | Category::Normal => { |
1392 | // If the exponent is large enough, we know that this value is already |
1393 | // integral, and the arithmetic below would potentially cause it to saturate |
1394 | // to +/-Inf. Bail out early instead. |
1395 | if self.exp + 1 >= S::PRECISION as ExpInt { |
1396 | return Status::OK.and(self); |
1397 | } |
1398 | |
1399 | // The algorithm here is quite simple: we add 2^(p-1), where p is the |
1400 | // precision of our format, and then subtract it back off again. The choice |
1401 | // of rounding modes for the addition/subtraction determines the rounding mode |
1402 | // for our integral rounding as well. |
1403 | // NOTE: When the input value is negative, we do subtraction followed by |
1404 | // addition instead. |
1405 | assert!(S::PRECISION <= 128); |
1406 | let mut status; |
1407 | let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); |
1408 | assert_eq!(status, Status::OK); |
1409 | let magic_const = magic_const.copy_sign(self); |
1410 | |
1411 | let mut r = self; |
1412 | r = unpack!(status=, r.add_r(magic_const, round)); |
1413 | |
1414 | // Current value and 'MagicConstant' are both integers, so the result of the |
1415 | // subtraction is always exact according to Sterbenz' lemma. |
1416 | r = r.sub_r(magic_const, round).value; |
1417 | |
1418 | // Restore the input sign to handle the case of zero result |
1419 | // correctly. |
1420 | status.and(r.copy_sign(self)) |
1421 | } |
1422 | } |
1423 | } |
1424 | |
1425 | fn next_up(mut self) -> StatusAnd<Self> { |
1426 | // Compute nextUp(x), handling each float category separately. |
1427 | match self.category() { |
1428 | Category::Infinity => { |
1429 | if self.is_negative() { |
1430 | // nextUp(-inf) = -largest |
1431 | Status::OK.and(-Self::largest()) |
1432 | } else { |
1433 | // nextUp(+inf) = +inf |
1434 | Status::OK.and(self) |
1435 | } |
1436 | } |
1437 | Category::NaN => { |
1438 | // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. |
1439 | // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not |
1440 | // change the payload. |
1441 | if self.is_signaling() { |
1442 | // For consistency, propagate the sign of the sNaN to the qNaN. |
1443 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) |
1444 | } else { |
1445 | Status::OK.and(self) |
1446 | } |
1447 | } |
1448 | Category::Zero => { |
1449 | // nextUp(pm 0) = +smallest |
1450 | Status::OK.and(Self::SMALLEST) |
1451 | } |
1452 | Category::Normal => { |
1453 | // nextUp(-smallest) = -0 |
1454 | if self.is_smallest() && self.is_negative() { |
1455 | return Status::OK.and(-Self::ZERO); |
1456 | } |
1457 | |
1458 | // nextUp(largest) == INFINITY |
1459 | if self.is_largest() && !self.is_negative() { |
1460 | return Status::OK.and(Self::INFINITY); |
1461 | } |
1462 | |
1463 | // Excluding the integral bit. This allows us to test for binade boundaries. |
1464 | let sig_mask = (1 << (S::PRECISION - 1)) - 1; |
1465 | |
1466 | // nextUp(normal) == normal + inc. |
1467 | if self.is_negative() { |
1468 | // If we are negative, we need to decrement the significand. |
1469 | |
1470 | // We only cross a binade boundary that requires adjusting the exponent |
1471 | // if: |
1472 | // 1. exponent != S::MIN_EXP. This implies we are not in the |
1473 | // smallest binade or are dealing with denormals. |
1474 | // 2. Our significand excluding the integral bit is all zeros. |
1475 | let crossing_binade_boundary = self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0; |
1476 | |
1477 | // Decrement the significand. |
1478 | // |
1479 | // We always do this since: |
1480 | // 1. If we are dealing with a non-binade decrement, by definition we |
1481 | // just decrement the significand. |
1482 | // 2. If we are dealing with a normal -> normal binade decrement, since |
1483 | // we have an explicit integral bit the fact that all bits but the |
1484 | // integral bit are zero implies that subtracting one will yield a |
1485 | // significand with 0 integral bit and 1 in all other spots. Thus we |
1486 | // must just adjust the exponent and set the integral bit to 1. |
1487 | // 3. If we are dealing with a normal -> denormal binade decrement, |
1488 | // since we set the integral bit to 0 when we represent denormals, we |
1489 | // just decrement the significand. |
1490 | sig::decrement(&mut self.sig); |
1491 | |
1492 | if crossing_binade_boundary { |
1493 | // Our result is a normal number. Do the following: |
1494 | // 1. Set the integral bit to 1. |
1495 | // 2. Decrement the exponent. |
1496 | sig::set_bit(&mut self.sig, S::PRECISION - 1); |
1497 | self.exp -= 1; |
1498 | } |
1499 | } else { |
1500 | // If we are positive, we need to increment the significand. |
1501 | |
1502 | // We only cross a binade boundary that requires adjusting the exponent if |
1503 | // the input is not a denormal and all of said input's significand bits |
1504 | // are set. If all of said conditions are true: clear the significand, set |
1505 | // the integral bit to 1, and increment the exponent. If we have a |
1506 | // denormal always increment since moving denormals and the numbers in the |
1507 | // smallest normal binade have the same exponent in our representation. |
1508 | let crossing_binade_boundary = !self.is_denormal() && self.sig[0] & sig_mask == sig_mask; |
1509 | |
1510 | if crossing_binade_boundary { |
1511 | self.sig = [0]; |
1512 | sig::set_bit(&mut self.sig, S::PRECISION - 1); |
1513 | assert_ne!( |
1514 | self.exp, |
1515 | S::MAX_EXP, |
1516 | "We can not increment an exponent beyond the MAX_EXP \ |
1517 | allowed by the given floating point semantics." |
1518 | ); |
1519 | self.exp += 1; |
1520 | } else { |
1521 | sig::increment(&mut self.sig); |
1522 | } |
1523 | } |
1524 | Status::OK.and(self) |
1525 | } |
1526 | } |
1527 | } |
1528 | |
1529 | fn from_bits(input: u128) -> Self { |
1530 | // Dispatch to semantics. |
1531 | S::from_bits(input) |
1532 | } |
1533 | |
1534 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { |
1535 | IeeeFloat { |
1536 | sig: [input], |
1537 | exp: S::PRECISION as ExpInt - 1, |
1538 | read_only_category_do_not_mutate: Category::Normal, |
1539 | read_only_sign_do_not_mutate: false, |
1540 | marker: PhantomData, |
1541 | } |
1542 | .normalize(round, Loss::ExactlyZero) |
1543 | } |
1544 | |
1545 | fn from_str_r(s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> { |
1546 | if s.is_empty() { |
1547 | return Err(ParseError("Invalid string length" )); |
1548 | } |
1549 | |
1550 | // Handle a leading minus sign. |
1551 | let (minus, s) = s.strip_prefix("-" ).map(|s| (true, s)).unwrap_or((false, s)); |
1552 | let from_abs = |r: Self| r.negate_if(minus); |
1553 | |
1554 | // Handle a leading plus sign (mutually exclusive with minus). |
1555 | let (explicit_plus, s) = s |
1556 | .strip_prefix("+" ) |
1557 | .filter(|_| !minus) |
1558 | .map(|s| (true, s)) |
1559 | .unwrap_or((false, s)); |
1560 | |
1561 | // Handle special cases. |
1562 | let special = match s { |
1563 | "Inf" if minus || explicit_plus => Some(Self::INFINITY), |
1564 | |
1565 | "inf" | "INFINITY" if !explicit_plus => Some(Self::INFINITY), |
1566 | |
1567 | _ if !explicit_plus => { |
1568 | // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. |
1569 | let (is_signaling, s) = s.strip_prefix(['s' , 'S' ]).map_or((false, s), |s| (true, s)); |
1570 | |
1571 | s.strip_prefix("nan" ).or_else(|| s.strip_prefix("NaN" )).and_then(|s| { |
1572 | // Allow the payload to be inside parentheses. |
1573 | let s = s |
1574 | .strip_prefix("(" ) |
1575 | .and_then(|s| { |
1576 | // Parentheses should be balanced (and not empty). |
1577 | s.strip_suffix(")" ).filter(|s| !s.is_empty()) |
1578 | }) |
1579 | .unwrap_or(s); |
1580 | |
1581 | let payload = if s.is_empty() { |
1582 | // A NaN without payload. |
1583 | None |
1584 | } else { |
1585 | // Determine the payload number's radix. |
1586 | let (radix, s) = s |
1587 | .strip_prefix("0" ) |
1588 | .filter(|s| !s.is_empty()) |
1589 | .map(|s| s.strip_prefix(['x' , 'X' ]).map(|s| (16, s)).unwrap_or((8, s))) |
1590 | .unwrap_or((10, s)); |
1591 | |
1592 | // Parse the payload and make the NaN. |
1593 | Some(u128::from_str_radix(s, radix).ok()?) |
1594 | }; |
1595 | |
1596 | Some(if is_signaling { |
1597 | Self::snan(payload) |
1598 | } else { |
1599 | Self::qnan(payload) |
1600 | }) |
1601 | }) |
1602 | } |
1603 | |
1604 | _ => None, |
1605 | }; |
1606 | if let Some(r) = special { |
1607 | return Ok(Status::OK.and(from_abs(r))); |
1608 | } |
1609 | |
1610 | if s.is_empty() { |
1611 | return Err(ParseError("String has no digits" )); |
1612 | } |
1613 | |
1614 | // Adjust the rounding mode for the absolute value below. |
1615 | round = round.negate_if(minus); |
1616 | |
1617 | let (is_hex, s) = s |
1618 | .strip_prefix("0" ) |
1619 | .and_then(|s| s.strip_prefix(['x' , 'X' ])) |
1620 | .map(|s| (true, s)) |
1621 | .unwrap_or((false, s)); |
1622 | |
1623 | let r = if is_hex { |
1624 | if s.is_empty() { |
1625 | return Err(ParseError("Invalid string" )); |
1626 | } |
1627 | Self::from_hexadecimal_string(s, round)? |
1628 | } else { |
1629 | Self::from_decimal_string(s, round)? |
1630 | }; |
1631 | |
1632 | Ok(r.map(from_abs)) |
1633 | } |
1634 | |
1635 | fn to_bits(self) -> u128 { |
1636 | // Dispatch to semantics. |
1637 | S::to_bits(self) |
1638 | } |
1639 | |
1640 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { |
1641 | // The result of trying to convert a number too large. |
1642 | let overflow = if self.is_negative() { |
1643 | // Negative numbers cannot be represented as unsigned. |
1644 | 0 |
1645 | } else { |
1646 | // Largest unsigned integer of the given width. |
1647 | !0 >> (128 - width) |
1648 | }; |
1649 | |
1650 | *is_exact = false; |
1651 | |
1652 | match self.category() { |
1653 | Category::NaN => Status::INVALID_OP.and(0), |
1654 | |
1655 | Category::Infinity => Status::INVALID_OP.and(overflow), |
1656 | |
1657 | Category::Zero => { |
1658 | // Negative zero can't be represented as an int. |
1659 | *is_exact = !self.is_negative(); |
1660 | Status::OK.and(0) |
1661 | } |
1662 | |
1663 | Category::Normal => { |
1664 | let mut r = 0; |
1665 | |
1666 | // Step 1: place our absolute value, with any fraction truncated, in |
1667 | // the destination. |
1668 | let truncated_bits = if self.exp < 0 { |
1669 | // Our absolute value is less than one; truncate everything. |
1670 | // For exponent -1 the integer bit represents .5, look at that. |
1671 | // For smaller exponents leftmost truncated bit is 0. |
1672 | S::PRECISION - 1 + (-self.exp) as usize |
1673 | } else { |
1674 | // We want the most significant (exponent + 1) bits; the rest are |
1675 | // truncated. |
1676 | let bits = self.exp as usize + 1; |
1677 | |
1678 | // Hopelessly large in magnitude? |
1679 | if bits > width { |
1680 | return Status::INVALID_OP.and(overflow); |
1681 | } |
1682 | |
1683 | if bits < S::PRECISION { |
1684 | // We truncate (S::PRECISION - bits) bits. |
1685 | r = self.sig[0] >> (S::PRECISION - bits); |
1686 | S::PRECISION - bits |
1687 | } else { |
1688 | // We want at least as many bits as are available. |
1689 | r = self.sig[0] << (bits - S::PRECISION); |
1690 | 0 |
1691 | } |
1692 | }; |
1693 | |
1694 | // Step 2: work out any lost fraction, and increment the absolute |
1695 | // value if we would round away from zero. |
1696 | let mut loss = Loss::ExactlyZero; |
1697 | if truncated_bits > 0 { |
1698 | loss = Loss::through_truncation(&self.sig, truncated_bits); |
1699 | if loss != Loss::ExactlyZero && self.round_away_from_zero(round, loss, truncated_bits) { |
1700 | r = r.wrapping_add(1); |
1701 | if r == 0 { |
1702 | return Status::INVALID_OP.and(overflow); // Overflow. |
1703 | } |
1704 | } |
1705 | } |
1706 | |
1707 | // Step 3: check if we fit in the destination. |
1708 | if r > overflow { |
1709 | return Status::INVALID_OP.and(overflow); |
1710 | } |
1711 | |
1712 | if loss == Loss::ExactlyZero { |
1713 | *is_exact = true; |
1714 | Status::OK.and(r) |
1715 | } else { |
1716 | Status::INEXACT.and(r) |
1717 | } |
1718 | } |
1719 | } |
1720 | } |
1721 | |
1722 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { |
1723 | assert!(self.is_finite_non_zero()); |
1724 | assert!(rhs.is_finite_non_zero()); |
1725 | |
1726 | // If exponents are equal, do an unsigned comparison of the significands. |
1727 | self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig)) |
1728 | } |
1729 | |
1730 | fn bitwise_eq(self, rhs: Self) -> bool { |
1731 | if self.category() != rhs.category() || self.is_negative() != rhs.is_negative() { |
1732 | return false; |
1733 | } |
1734 | |
1735 | if self.category() == Category::Zero || self.category() == Category::Infinity { |
1736 | return true; |
1737 | } |
1738 | |
1739 | if self.is_finite_non_zero() && self.exp != rhs.exp { |
1740 | return false; |
1741 | } |
1742 | |
1743 | self.sig == rhs.sig |
1744 | } |
1745 | |
1746 | fn is_negative(self) -> bool { |
1747 | self.read_only_sign_do_not_mutate |
1748 | } |
1749 | |
1750 | fn is_denormal(self) -> bool { |
1751 | self.is_finite_non_zero() && self.exp == S::MIN_EXP && !sig::get_bit(&self.sig, S::PRECISION - 1) |
1752 | } |
1753 | |
1754 | fn is_signaling(self) -> bool { |
1755 | // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the |
1756 | // first bit of the trailing significand being 0. |
1757 | self.is_nan() && self.sig[0] & S::QNAN_SIGNIFICAND != S::QNAN_SIGNIFICAND |
1758 | } |
1759 | |
1760 | fn category(self) -> Category { |
1761 | self.read_only_category_do_not_mutate |
1762 | } |
1763 | |
1764 | fn get_exact_inverse(self) -> Option<Self> { |
1765 | // Special floats and denormals have no exact inverse. |
1766 | if !self.is_finite_non_zero() { |
1767 | return None; |
1768 | } |
1769 | |
1770 | // Check that the number is a power of two by making sure that only the |
1771 | // integer bit is set in the significand. |
1772 | if self.sig != [1 << (S::PRECISION - 1)] { |
1773 | return None; |
1774 | } |
1775 | |
1776 | // Get the inverse. |
1777 | let mut reciprocal = Self::from_u128(1).value; |
1778 | let status; |
1779 | reciprocal = unpack!(status=, reciprocal / self); |
1780 | if status != Status::OK { |
1781 | return None; |
1782 | } |
1783 | |
1784 | // Avoid multiplication with a denormal, it is not safe on all platforms and |
1785 | // may be slower than a normal division. |
1786 | if reciprocal.is_denormal() { |
1787 | return None; |
1788 | } |
1789 | |
1790 | assert!(reciprocal.is_finite_non_zero()); |
1791 | assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]); |
1792 | |
1793 | Some(reciprocal) |
1794 | } |
1795 | |
1796 | fn ilogb(mut self) -> ExpInt { |
1797 | if self.is_nan() { |
1798 | return IEK_NAN; |
1799 | } |
1800 | if self.is_zero() { |
1801 | return IEK_ZERO; |
1802 | } |
1803 | if self.is_infinite() { |
1804 | return IEK_INF; |
1805 | } |
1806 | if !self.is_denormal() { |
1807 | return self.exp; |
1808 | } |
1809 | |
1810 | let sig_bits = (S::PRECISION - 1) as ExpInt; |
1811 | self.exp += sig_bits; |
1812 | self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value; |
1813 | self.exp - sig_bits |
1814 | } |
1815 | |
1816 | fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self { |
1817 | // If exp is wildly out-of-scale, simply adding it to self.exp will |
1818 | // overflow; clamp it to a safe range before adding, but ensure that the range |
1819 | // is large enough that the clamp does not change the result. The range we |
1820 | // need to support is the difference between the largest possible exponent and |
1821 | // the normalized exponent of half the smallest denormal. |
1822 | |
1823 | let sig_bits = (S::PRECISION - 1) as i32; |
1824 | let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1; |
1825 | |
1826 | // Clamp to one past the range ends to let normalize handle overflow. |
1827 | let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change); |
1828 | self.exp = self.exp.saturating_add(exp_change as ExpInt); |
1829 | self = self.normalize(round, Loss::ExactlyZero).value; |
1830 | if self.is_nan() { |
1831 | self = IeeeDefaultExceptionHandling::result_from_nan(self).value; |
1832 | } |
1833 | self |
1834 | } |
1835 | |
1836 | fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self { |
1837 | *exp = self.ilogb(); |
1838 | |
1839 | // Quiet signalling nans. |
1840 | if *exp == IEK_NAN { |
1841 | self = IeeeDefaultExceptionHandling::result_from_nan(self).value; |
1842 | return self; |
1843 | } |
1844 | |
1845 | if *exp == IEK_INF { |
1846 | return self; |
1847 | } |
1848 | |
1849 | // 1 is added because frexp is defined to return a normalized fraction in |
1850 | // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). |
1851 | if *exp == IEK_ZERO { |
1852 | *exp = 0; |
1853 | } else { |
1854 | *exp += 1; |
1855 | } |
1856 | self.scalbn_r(-*exp, round) |
1857 | } |
1858 | } |
1859 | |
1860 | impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> { |
1861 | fn convert_r(mut self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> { |
1862 | // FIXME(eddyb) move this into the return result. |
1863 | *loses_info = false; |
1864 | |
1865 | // x86 has some unusual NaNs which cannot be represented in any other |
1866 | // format; note them here. |
1867 | fn is_x87_double_extended<S: Semantics>() -> bool { |
1868 | S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND |
1869 | } |
1870 | let loses_x87_pseudo_nan = is_x87_double_extended::<S>() |
1871 | && !is_x87_double_extended::<T>() |
1872 | && self.category() == Category::NaN |
1873 | && (self.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; |
1874 | |
1875 | // NOTE(eddyb) this is done early because the target semantics may not |
1876 | // actually support encoding the distinction between SNaN and QNaN. |
1877 | // |
1878 | // Convert of sNaN creates qNaN and raises an exception (invalid op). |
1879 | // This also guarantees that a sNaN does not become Inf on a truncation |
1880 | // that loses all payload bits. |
1881 | let mut status = Status::OK; |
1882 | if self.is_nan() { |
1883 | self = unpack!(status|=, IeeeDefaultExceptionHandling::result_from_nan(self)); |
1884 | } |
1885 | |
1886 | let Self { mut sig, mut exp, .. } = self; |
1887 | |
1888 | // If this is a truncation of a denormal number, and the target semantics |
1889 | // has larger exponent range than the source semantics (this can happen |
1890 | // when truncating from PowerPC double-double to double format), the |
1891 | // right shift could lose result mantissa bits. Adjust exponent instead |
1892 | // of performing excessive shift. |
1893 | // Also do a similar trick in case shifting denormal would produce zero |
1894 | // significand as this case isn't handled correctly by normalize. |
1895 | let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt; |
1896 | if shift < 0 && self.is_finite_non_zero() { |
1897 | let omsb = sig::omsb(&sig) as ExpInt; |
1898 | let mut exp_change = omsb - S::PRECISION as ExpInt; |
1899 | if exp + exp_change < T::MIN_EXP { |
1900 | exp_change = T::MIN_EXP - exp; |
1901 | } |
1902 | if exp_change < shift { |
1903 | exp_change = shift; |
1904 | } |
1905 | if exp_change < 0 { |
1906 | shift -= exp_change; |
1907 | exp += exp_change; |
1908 | } else if omsb <= -shift { |
1909 | exp_change = omsb + shift - 1; // leave at least one bit set |
1910 | shift -= exp_change; |
1911 | exp += exp_change; |
1912 | } |
1913 | } |
1914 | |
1915 | // If this is a truncation, perform the shift. |
1916 | let mut loss = Loss::ExactlyZero; |
1917 | if shift < 0 && (self.is_finite_non_zero() || self.category() == Category::NaN && S::NAN_PAYLOAD_MASK != 0) { |
1918 | loss = sig::shift_right(&mut sig, &mut 0, -shift as usize); |
1919 | } |
1920 | |
1921 | // If this is an extension, perform the shift. |
1922 | if shift > 0 && (self.is_finite_non_zero() || self.category() == Category::NaN) { |
1923 | sig::shift_left(&mut sig, &mut 0, shift as usize); |
1924 | } |
1925 | |
1926 | let r = match self.category() { |
1927 | Category::Normal => { |
1928 | let r = IeeeFloat::<T> { |
1929 | sig, |
1930 | exp, |
1931 | read_only_category_do_not_mutate: self.category(), |
1932 | read_only_sign_do_not_mutate: self.is_negative(), |
1933 | marker: PhantomData, |
1934 | }; |
1935 | unpack!(status|=, r.normalize(round, loss)) |
1936 | } |
1937 | Category::NaN => { |
1938 | *loses_info = loss != Loss::ExactlyZero |
1939 | || loses_x87_pseudo_nan |
1940 | || S::NAN_PAYLOAD_MASK != 0 && T::NAN_PAYLOAD_MASK == 0; |
1941 | |
1942 | IeeeFloat::<T>::qnan(Some(sig[0])).with_sign(self.is_negative()) |
1943 | } |
1944 | Category::Infinity => IeeeFloat::<T>::INFINITY.with_sign(self.is_negative()), |
1945 | Category::Zero => IeeeFloat::<T>::ZERO.with_sign(self.is_negative()), |
1946 | }; |
1947 | |
1948 | // NOTE(eddyb) this catches all cases of e.g. ±Inf turning into NaN, |
1949 | // because of `T::NONFINITE_BEHAVIOR` not being `IEEE754`. |
1950 | if matches!(self.category(), Category::Infinity | Category::Zero) |
1951 | && (r.category() != self.category() || r.is_negative() != self.is_negative()) |
1952 | { |
1953 | status |= Status::INEXACT; |
1954 | } |
1955 | |
1956 | *loses_info |= (status - Status::INVALID_OP) != Status::OK; |
1957 | |
1958 | status.and(r) |
1959 | } |
1960 | } |
1961 | |
1962 | impl<S: Semantics> IeeeFloat<S> { |
1963 | /// Handle positive overflow. We either return infinity or |
1964 | /// the largest finite number. For negative overflow, |
1965 | /// negate the `round` argument before calling. |
1966 | fn overflow_result(round: Round) -> StatusAnd<Self> { |
1967 | match round { |
1968 | // Infinity? |
1969 | Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => { |
1970 | (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY) |
1971 | } |
1972 | // Otherwise we become the largest finite number. |
1973 | Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()), |
1974 | } |
1975 | } |
1976 | |
1977 | /// Returns TRUE if, when truncating the current number, with BIT the |
1978 | /// new LSB, with the given lost fraction and rounding mode, the result |
1979 | /// would need to be rounded away from zero (i.e., by increasing the |
1980 | /// signficand). This routine must work for Category::Zero of both signs, and |
1981 | /// Category::Normal numbers. |
1982 | fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool { |
1983 | // NaNs and infinities should not have lost fractions. |
1984 | assert!(self.is_finite_non_zero() || self.is_zero()); |
1985 | |
1986 | // Current callers never pass this so we don't handle it. |
1987 | assert_ne!(loss, Loss::ExactlyZero); |
1988 | |
1989 | match round { |
1990 | Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf, |
1991 | Round::NearestTiesToEven => { |
1992 | if loss == Loss::MoreThanHalf { |
1993 | return true; |
1994 | } |
1995 | |
1996 | // Our zeros don't have a significand to test. |
1997 | if loss == Loss::ExactlyHalf && self.category() != Category::Zero { |
1998 | return sig::get_bit(&self.sig, bit); |
1999 | } |
2000 | |
2001 | false |
2002 | } |
2003 | Round::TowardZero => false, |
2004 | Round::TowardPositive => !self.is_negative(), |
2005 | Round::TowardNegative => self.is_negative(), |
2006 | } |
2007 | } |
2008 | |
2009 | fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> { |
2010 | if !self.is_finite_non_zero() { |
2011 | return Status::OK.and(self); |
2012 | } |
2013 | |
2014 | // Before rounding normalize the exponent of Category::Normal numbers. |
2015 | let mut omsb = sig::omsb(&self.sig); |
2016 | |
2017 | if omsb > 0 { |
2018 | // OMSB is numbered from 1. We want to place it in the integer |
2019 | // bit numbered PRECISION if possible, with a compensating change in |
2020 | // the exponent. |
2021 | let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt); |
2022 | |
2023 | // If the resulting exponent is too high, overflow according to |
2024 | // the rounding mode. |
2025 | if final_exp > S::MAX_EXP { |
2026 | let round = round.negate_if(self.is_negative()); |
2027 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
2028 | } |
2029 | |
2030 | // Subnormal numbers have exponent MIN_EXP, and their MSB |
2031 | // is forced based on that. |
2032 | if final_exp < S::MIN_EXP { |
2033 | final_exp = S::MIN_EXP; |
2034 | } |
2035 | |
2036 | // Shifting left is easy as we don't lose precision. |
2037 | if final_exp < self.exp { |
2038 | assert_eq!(loss, Loss::ExactlyZero); |
2039 | |
2040 | let exp_change = (self.exp - final_exp) as usize; |
2041 | sig::shift_left(&mut self.sig, &mut self.exp, exp_change); |
2042 | |
2043 | return Status::OK.and(self); |
2044 | } |
2045 | |
2046 | // Shift right and capture any new lost fraction. |
2047 | if final_exp > self.exp { |
2048 | let exp_change = (final_exp - self.exp) as usize; |
2049 | loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss); |
2050 | |
2051 | // Keep OMSB up-to-date. |
2052 | omsb = omsb.saturating_sub(exp_change); |
2053 | } |
2054 | } |
2055 | |
2056 | // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up |
2057 | // the largest significand of `MAX_EXP` (which also has normals), though |
2058 | // comparing significands needs to ignore the integer bit `NAN` lacks. |
2059 | if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly |
2060 | && self.exp == Self::NAN.exp |
2061 | && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig |
2062 | { |
2063 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
2064 | } |
2065 | |
2066 | // Now round the number according to round given the lost |
2067 | // fraction. |
2068 | |
2069 | // As specified in IEEE 754, since we do not trap we do not report |
2070 | // underflow for exact results. |
2071 | if loss == Loss::ExactlyZero { |
2072 | // Canonicalize zeros. |
2073 | if omsb == 0 { |
2074 | self = Self::ZERO.copy_sign(self); |
2075 | } |
2076 | |
2077 | return Status::OK.and(self); |
2078 | } |
2079 | |
2080 | // Increment the significand if we're rounding away from zero. |
2081 | if self.round_away_from_zero(round, loss, 0) { |
2082 | if omsb == 0 { |
2083 | self.exp = S::MIN_EXP; |
2084 | } |
2085 | |
2086 | // We should never overflow. |
2087 | assert_eq!(sig::increment(&mut self.sig), 0); |
2088 | omsb = sig::omsb(&self.sig); |
2089 | |
2090 | // Did the significand increment overflow? |
2091 | if omsb == S::PRECISION + 1 { |
2092 | // Renormalize by incrementing the exponent and shifting our |
2093 | // significand right one. However if we already have the |
2094 | // maximum exponent we overflow to infinity. |
2095 | if self.exp == S::MAX_EXP { |
2096 | return (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY.copy_sign(self)); |
2097 | } |
2098 | |
2099 | let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1); |
2100 | |
2101 | return Status::INEXACT.and(self); |
2102 | } |
2103 | |
2104 | // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up |
2105 | // the largest significand of `MAX_EXP` (which also has normals), though |
2106 | // comparing significands needs to ignore the integer bit `NAN` lacks. |
2107 | if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly |
2108 | && self.exp == Self::NAN.exp |
2109 | && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig |
2110 | { |
2111 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
2112 | } |
2113 | } |
2114 | |
2115 | // The normal case - we were and are not denormal, and any |
2116 | // significand increment above didn't overflow. |
2117 | if omsb == S::PRECISION { |
2118 | return Status::INEXACT.and(self); |
2119 | } |
2120 | |
2121 | // We have a non-zero denormal. |
2122 | assert!(omsb < S::PRECISION); |
2123 | |
2124 | // Canonicalize zeros. |
2125 | if omsb == 0 { |
2126 | self = Self::ZERO.copy_sign(self); |
2127 | } |
2128 | |
2129 | // The Category::Zero case is a denormal that underflowed to zero. |
2130 | (Status::UNDERFLOW | Status::INEXACT).and(self) |
2131 | } |
2132 | |
2133 | fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
2134 | let mut r = IeeeFloat { |
2135 | sig: [0], |
2136 | exp: 0, |
2137 | read_only_category_do_not_mutate: Category::Normal, |
2138 | read_only_sign_do_not_mutate: false, |
2139 | marker: PhantomData, |
2140 | }; |
2141 | |
2142 | let mut any_digits = false; |
2143 | let mut has_exp = false; |
2144 | let mut bit_pos = LIMB_BITS as isize; |
2145 | let mut loss = None; |
2146 | |
2147 | // Without leading or trailing zeros, irrespective of the dot. |
2148 | let mut first_sig_digit = None; |
2149 | let mut dot = s.len(); |
2150 | |
2151 | for (p, c) in s.char_indices() { |
2152 | // Skip leading zeros and any (hexa)decimal point. |
2153 | if c == '.' { |
2154 | if dot != s.len() { |
2155 | return Err(ParseError("String contains multiple dots" )); |
2156 | } |
2157 | dot = p; |
2158 | } else if let Some(hex_value) = c.to_digit(16) { |
2159 | any_digits = true; |
2160 | |
2161 | if first_sig_digit.is_none() { |
2162 | if hex_value == 0 { |
2163 | continue; |
2164 | } |
2165 | first_sig_digit = Some(p); |
2166 | } |
2167 | |
2168 | // Store the number while we have space. |
2169 | bit_pos -= 4; |
2170 | if bit_pos >= 0 { |
2171 | r.sig[0] |= (hex_value as Limb) << bit_pos; |
2172 | } else { |
2173 | // If zero or one-half (the hexadecimal digit 8) are followed |
2174 | // by non-zero, they're a little more than zero or one-half. |
2175 | if let Some(ref mut loss) = loss { |
2176 | if hex_value != 0 { |
2177 | if *loss == Loss::ExactlyZero { |
2178 | *loss = Loss::LessThanHalf; |
2179 | } |
2180 | if *loss == Loss::ExactlyHalf { |
2181 | *loss = Loss::MoreThanHalf; |
2182 | } |
2183 | } |
2184 | } else { |
2185 | loss = Some(match hex_value { |
2186 | 0 => Loss::ExactlyZero, |
2187 | 1..=7 => Loss::LessThanHalf, |
2188 | 8 => Loss::ExactlyHalf, |
2189 | 9..=15 => Loss::MoreThanHalf, |
2190 | _ => unreachable!(), |
2191 | }); |
2192 | } |
2193 | } |
2194 | } else if c == 'p' || c == 'P' { |
2195 | if !any_digits { |
2196 | return Err(ParseError("Significand has no digits" )); |
2197 | } |
2198 | |
2199 | if dot == s.len() { |
2200 | dot = p; |
2201 | } |
2202 | |
2203 | let mut chars = s[p + 1..].chars().peekable(); |
2204 | |
2205 | // Adjust for the given exponent. |
2206 | let exp_minus = chars.peek() == Some(&'-' ); |
2207 | if exp_minus || chars.peek() == Some(&'+' ) { |
2208 | chars.next(); |
2209 | } |
2210 | |
2211 | for c in chars { |
2212 | if let Some(value) = c.to_digit(10) { |
2213 | has_exp = true; |
2214 | r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt); |
2215 | } else { |
2216 | return Err(ParseError("Invalid character in exponent" )); |
2217 | } |
2218 | } |
2219 | if !has_exp { |
2220 | return Err(ParseError("Exponent has no digits" )); |
2221 | } |
2222 | |
2223 | r.exp = r.exp.negate_if(exp_minus); |
2224 | |
2225 | break; |
2226 | } else { |
2227 | return Err(ParseError("Invalid character in significand" )); |
2228 | } |
2229 | } |
2230 | if !any_digits { |
2231 | return Err(ParseError("Significand has no digits" )); |
2232 | } |
2233 | |
2234 | // Hex floats require an exponent but not a hexadecimal point. |
2235 | if !has_exp { |
2236 | return Err(ParseError("Hex strings require an exponent" )); |
2237 | } |
2238 | |
2239 | // Ignore the exponent if we are zero. |
2240 | let first_sig_digit = match first_sig_digit { |
2241 | Some(p) => p, |
2242 | None => return Ok(Status::OK.and(Self::ZERO)), |
2243 | }; |
2244 | |
2245 | // Calculate the exponent adjustment implicit in the number of |
2246 | // significant digits and adjust for writing the significand starting |
2247 | // at the most significant nibble. |
2248 | let exp_adjustment = if dot > first_sig_digit { |
2249 | ExpInt::try_from(dot - first_sig_digit).unwrap() |
2250 | } else { |
2251 | -ExpInt::try_from(first_sig_digit - dot - 1).unwrap() |
2252 | }; |
2253 | let exp_adjustment = exp_adjustment |
2254 | .saturating_mul(4) |
2255 | .saturating_sub(1) |
2256 | .saturating_add(S::PRECISION as ExpInt) |
2257 | .saturating_sub(LIMB_BITS as ExpInt); |
2258 | r.exp = r.exp.saturating_add(exp_adjustment); |
2259 | |
2260 | Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero))) |
2261 | } |
2262 | |
2263 | fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
2264 | // Given a normal decimal floating point number of the form |
2265 | // |
2266 | // dddd.dddd[eE][+-]ddd |
2267 | // |
2268 | // where the decimal point and exponent are optional, fill out the |
2269 | // variables below. Exponent is appropriate if the significand is |
2270 | // treated as an integer, and normalized_exp if the significand |
2271 | // is taken to have the decimal point after a single leading |
2272 | // non-zero digit. |
2273 | // |
2274 | // If the value is zero, first_sig_digit is None. |
2275 | |
2276 | let mut any_digits = false; |
2277 | let mut dec_exp = 0i32; |
2278 | |
2279 | // Without leading or trailing zeros, irrespective of the dot. |
2280 | let mut first_sig_digit = None; |
2281 | let mut last_sig_digit = 0; |
2282 | let mut dot = s.len(); |
2283 | |
2284 | for (p, c) in s.char_indices() { |
2285 | if c == '.' { |
2286 | if dot != s.len() { |
2287 | return Err(ParseError("String contains multiple dots" )); |
2288 | } |
2289 | dot = p; |
2290 | } else if let Some(dec_value) = c.to_digit(10) { |
2291 | any_digits = true; |
2292 | |
2293 | if dec_value != 0 { |
2294 | if first_sig_digit.is_none() { |
2295 | first_sig_digit = Some(p); |
2296 | } |
2297 | last_sig_digit = p; |
2298 | } |
2299 | } else if c == 'e' || c == 'E' { |
2300 | if !any_digits { |
2301 | return Err(ParseError("Significand has no digits" )); |
2302 | } |
2303 | |
2304 | if dot == s.len() { |
2305 | dot = p; |
2306 | } |
2307 | |
2308 | let mut chars = s[p + 1..].chars().peekable(); |
2309 | |
2310 | // Adjust for the given exponent. |
2311 | let exp_minus = chars.peek() == Some(&'-' ); |
2312 | if exp_minus || chars.peek() == Some(&'+' ) { |
2313 | chars.next(); |
2314 | } |
2315 | |
2316 | let mut any_exp_digits = false; |
2317 | for c in chars { |
2318 | if let Some(value) = c.to_digit(10) { |
2319 | any_exp_digits = true; |
2320 | dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32); |
2321 | } else { |
2322 | return Err(ParseError("Invalid character in exponent" )); |
2323 | } |
2324 | } |
2325 | // Treat no exponent as 0 to match binutils |
2326 | if !any_exp_digits { |
2327 | assert_eq!(dec_exp, 0); |
2328 | } |
2329 | |
2330 | dec_exp = dec_exp.negate_if(exp_minus); |
2331 | |
2332 | break; |
2333 | } else { |
2334 | return Err(ParseError("Invalid character in significand" )); |
2335 | } |
2336 | } |
2337 | if !any_digits { |
2338 | return Err(ParseError("Significand has no digits" )); |
2339 | } |
2340 | |
2341 | // Test if we have a zero number allowing for non-zero exponents. |
2342 | let first_sig_digit = match first_sig_digit { |
2343 | Some(p) => p, |
2344 | None => return Ok(Status::OK.and(Self::ZERO)), |
2345 | }; |
2346 | |
2347 | // Adjust the exponents for any decimal point. |
2348 | if dot > last_sig_digit { |
2349 | dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32); |
2350 | } else { |
2351 | dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32); |
2352 | } |
2353 | let significand_digits = |
2354 | last_sig_digit - first_sig_digit + 1 - (dot > first_sig_digit && dot < last_sig_digit) as usize; |
2355 | let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1); |
2356 | |
2357 | // Handle the cases where exponents are obviously too large or too |
2358 | // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp |
2359 | // definitely overflows if |
2360 | // |
2361 | // (dec_exp - 1) * L >= MAX_EXP |
2362 | // |
2363 | // and definitely underflows to zero where |
2364 | // |
2365 | // (dec_exp + 1) * L <= MIN_EXP - PRECISION |
2366 | // |
2367 | // With integer arithmetic the tightest bounds for L are |
2368 | // |
2369 | // 93/28 < L < 196/59 [ numerator <= 256 ] |
2370 | // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] |
2371 | |
2372 | // Check for MAX_EXP. |
2373 | if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 { |
2374 | // Overflow and round. |
2375 | return Ok(Self::overflow_result(round)); |
2376 | } |
2377 | |
2378 | // Check for MIN_EXP. |
2379 | if normalized_exp.saturating_add(1).saturating_mul(28738) <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) { |
2380 | // Underflow to zero and round. |
2381 | let r = if round == Round::TowardPositive { |
2382 | IeeeFloat::SMALLEST |
2383 | } else { |
2384 | IeeeFloat::ZERO |
2385 | }; |
2386 | return Ok((Status::UNDERFLOW | Status::INEXACT).and(r)); |
2387 | } |
2388 | |
2389 | // A tight upper bound on number of bits required to hold an |
2390 | // N-digit decimal integer is N * 196 / 59. Allocate enough space |
2391 | // to hold the full significand, and an extra limb required by |
2392 | // tcMultiplyPart. |
2393 | let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59); |
2394 | let mut dec_sig = DynPrecisionLimbVec::with_capacity(max_limbs); |
2395 | |
2396 | // Convert to binary efficiently - we do almost all multiplication |
2397 | // in a Limb. When this would overflow do we do a single |
2398 | // bignum multiplication, and then revert again to multiplication |
2399 | // in a Limb. |
2400 | let mut chars = s[first_sig_digit..last_sig_digit + 1].chars(); |
2401 | loop { |
2402 | let mut val = 0; |
2403 | let mut multiplier = 1; |
2404 | |
2405 | loop { |
2406 | let dec_value = match chars.next() { |
2407 | Some('.' ) => continue, |
2408 | Some(c) => c.to_digit(10).unwrap(), |
2409 | None => break, |
2410 | }; |
2411 | |
2412 | multiplier *= 10; |
2413 | val = val * 10 + dec_value as Limb; |
2414 | |
2415 | // The maximum number that can be multiplied by ten with any |
2416 | // digit added without overflowing a Limb. |
2417 | if multiplier > (!0 - 9) / 10 { |
2418 | break; |
2419 | } |
2420 | } |
2421 | |
2422 | // If we've consumed no digits, we're done. |
2423 | if multiplier == 1 { |
2424 | break; |
2425 | } |
2426 | |
2427 | // Multiply out the current limb. |
2428 | let mut carry = val; |
2429 | for x in &mut dec_sig { |
2430 | let [low, mut high] = sig::widening_mul(*x, multiplier); |
2431 | |
2432 | // Now add carry. |
2433 | let (low, overflow) = low.overflowing_add(carry); |
2434 | high += overflow as Limb; |
2435 | |
2436 | *x = low; |
2437 | carry = high; |
2438 | } |
2439 | |
2440 | // If we had carry, we need another limb (likely but not guaranteed). |
2441 | if carry > 0 { |
2442 | dec_sig.push(carry); |
2443 | } |
2444 | } |
2445 | |
2446 | // Calculate pow(5, abs(dec_exp)) into `pow5_full`. |
2447 | // The *_calc Vec's are reused scratch space, as an optimization. |
2448 | let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = { |
2449 | let mut power = dec_exp.abs() as usize; |
2450 | |
2451 | const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125]; |
2452 | |
2453 | let mut p5_scratch = DynPrecisionLimbVec::new(); |
2454 | let mut p5: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[4]].into_iter().collect(); |
2455 | |
2456 | let mut r_scratch = DynPrecisionLimbVec::new(); |
2457 | let mut r: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[power & 7]].into_iter().collect(); |
2458 | power >>= 3; |
2459 | |
2460 | while power > 0 { |
2461 | // Calculate pow(5,pow(2,n+3)). |
2462 | p5_scratch.resize(p5.len() * 2, 0); |
2463 | let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); |
2464 | while p5_scratch.last() == Some(&0) { |
2465 | p5_scratch.pop(); |
2466 | } |
2467 | mem::swap(&mut p5, &mut p5_scratch); |
2468 | |
2469 | if power & 1 != 0 { |
2470 | r_scratch.resize(r.len() + p5.len(), 0); |
2471 | let _: Loss = sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS); |
2472 | while r_scratch.last() == Some(&0) { |
2473 | r_scratch.pop(); |
2474 | } |
2475 | mem::swap(&mut r, &mut r_scratch); |
2476 | } |
2477 | |
2478 | power >>= 1; |
2479 | } |
2480 | |
2481 | (r, r_scratch, p5, p5_scratch) |
2482 | }; |
2483 | |
2484 | // Attempt dec_sig * 10^dec_exp with increasing precision. |
2485 | let mut attempt = 0; |
2486 | loop { |
2487 | let calc_precision = (LIMB_BITS << attempt) - 1; |
2488 | attempt += 1; |
2489 | |
2490 | let calc_normal_from_limbs = |sig: &mut DynPrecisionLimbVec, limbs: &[Limb]| -> StatusAnd<ExpInt> { |
2491 | sig.resize(limbs_for_bits(calc_precision), 0); |
2492 | let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision); |
2493 | |
2494 | // Before rounding normalize the exponent of Category::Normal numbers. |
2495 | let mut omsb = sig::omsb(sig); |
2496 | |
2497 | assert_ne!(omsb, 0); |
2498 | |
2499 | // OMSB is numbered from 1. We want to place it in the integer |
2500 | // bit numbered PRECISION if possible, with a compensating change in |
2501 | // the exponent. |
2502 | let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt); |
2503 | |
2504 | // Shifting left is easy as we don't lose precision. |
2505 | if final_exp < exp { |
2506 | assert_eq!(loss, Loss::ExactlyZero); |
2507 | |
2508 | let exp_change = (exp - final_exp) as usize; |
2509 | sig::shift_left(sig, &mut exp, exp_change); |
2510 | |
2511 | return Status::OK.and(exp); |
2512 | } |
2513 | |
2514 | // Shift right and capture any new lost fraction. |
2515 | if final_exp > exp { |
2516 | let exp_change = (final_exp - exp) as usize; |
2517 | loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss); |
2518 | |
2519 | // Keep OMSB up-to-date. |
2520 | omsb = omsb.saturating_sub(exp_change); |
2521 | } |
2522 | |
2523 | assert_eq!(omsb, calc_precision); |
2524 | |
2525 | // Now round the number according to round given the lost |
2526 | // fraction. |
2527 | |
2528 | // As specified in IEEE 754, since we do not trap we do not report |
2529 | // underflow for exact results. |
2530 | if loss == Loss::ExactlyZero { |
2531 | return Status::OK.and(exp); |
2532 | } |
2533 | |
2534 | // Increment the significand if we're rounding away from zero. |
2535 | if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) { |
2536 | // We should never overflow. |
2537 | assert_eq!(sig::increment(sig), 0); |
2538 | omsb = sig::omsb(sig); |
2539 | |
2540 | // Did the significand increment overflow? |
2541 | if omsb == calc_precision + 1 { |
2542 | let _: Loss = sig::shift_right(sig, &mut exp, 1); |
2543 | |
2544 | return Status::INEXACT.and(exp); |
2545 | } |
2546 | } |
2547 | |
2548 | // The normal case - we were and are not denormal, and any |
2549 | // significand increment above didn't overflow. |
2550 | Status::INEXACT.and(exp) |
2551 | }; |
2552 | |
2553 | let status; |
2554 | let mut exp = unpack!(status=, |
2555 | calc_normal_from_limbs(&mut sig_calc, &dec_sig)); |
2556 | let pow5_status; |
2557 | let pow5_exp = unpack!(pow5_status=, |
2558 | calc_normal_from_limbs(&mut pow5_calc, &pow5_full)); |
2559 | |
2560 | // Add dec_exp, as 10^n = 5^n * 2^n. |
2561 | exp += dec_exp as ExpInt; |
2562 | |
2563 | let mut used_bits = S::PRECISION; |
2564 | let mut truncated_bits = calc_precision - used_bits; |
2565 | |
2566 | let half_ulp_err1 = (status != Status::OK) as Limb; |
2567 | let (calc_loss, half_ulp_err2); |
2568 | if dec_exp >= 0 { |
2569 | exp += pow5_exp; |
2570 | |
2571 | sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0); |
2572 | calc_loss = sig::mul(&mut sig_scratch_calc, &mut exp, &sig_calc, &pow5_calc, calc_precision); |
2573 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); |
2574 | |
2575 | half_ulp_err2 = (pow5_status != Status::OK) as Limb; |
2576 | } else { |
2577 | exp -= pow5_exp; |
2578 | |
2579 | sig_scratch_calc.resize(sig_calc.len(), 0); |
2580 | calc_loss = sig::div(&mut sig_scratch_calc, &mut exp, &mut sig_calc, &mut pow5_calc, calc_precision); |
2581 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); |
2582 | |
2583 | // Denormal numbers have less precision. |
2584 | if exp < S::MIN_EXP { |
2585 | truncated_bits += (S::MIN_EXP - exp) as usize; |
2586 | used_bits = calc_precision.saturating_sub(truncated_bits); |
2587 | } |
2588 | // Extra half-ulp lost in reciprocal of exponent. |
2589 | half_ulp_err2 = 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb; |
2590 | } |
2591 | |
2592 | // Both sig::mul and sig::div return the |
2593 | // result with the integer bit set. |
2594 | assert!(sig::get_bit(&sig_calc, calc_precision - 1)); |
2595 | |
2596 | // The error from the true value, in half-ulps, on multiplying two |
2597 | // floating point numbers, which differ from the value they |
2598 | // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less |
2599 | // than the returned value. |
2600 | // |
2601 | // See "How to Read Floating Point Numbers Accurately" by William D Clinger. |
2602 | assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8)); |
2603 | |
2604 | let inexact = (calc_loss != Loss::ExactlyZero) as Limb; |
2605 | let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 { |
2606 | inexact * 2 // <= inexact half-ulps. |
2607 | } else { |
2608 | inexact + 2 * (half_ulp_err1 + half_ulp_err2) |
2609 | }; |
2610 | |
2611 | let ulps_from_boundary = { |
2612 | let bits = calc_precision - used_bits - 1; |
2613 | |
2614 | let i = bits / LIMB_BITS; |
2615 | let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS)); |
2616 | let boundary = match round { |
2617 | Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS), |
2618 | _ => 0, |
2619 | }; |
2620 | if i == 0 { |
2621 | let delta = limb.wrapping_sub(boundary); |
2622 | cmp::min(delta, delta.wrapping_neg()) |
2623 | } else if limb == boundary { |
2624 | if !sig::is_all_zeros(&sig_calc[1..i]) { |
2625 | !0 // A lot. |
2626 | } else { |
2627 | sig_calc[0] |
2628 | } |
2629 | } else if limb == boundary.wrapping_sub(1) { |
2630 | if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) { |
2631 | !0 // A lot. |
2632 | } else { |
2633 | sig_calc[0].wrapping_neg() |
2634 | } |
2635 | } else { |
2636 | !0 // A lot. |
2637 | } |
2638 | }; |
2639 | |
2640 | // Are we guaranteed to round correctly if we truncate? |
2641 | if ulps_from_boundary.saturating_mul(2) >= half_ulp_err { |
2642 | let mut r = IeeeFloat { |
2643 | sig: [0], |
2644 | exp, |
2645 | read_only_category_do_not_mutate: Category::Normal, |
2646 | read_only_sign_do_not_mutate: false, |
2647 | marker: PhantomData, |
2648 | }; |
2649 | sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits); |
2650 | // If we extracted less bits above we must adjust our exponent |
2651 | // to compensate for the implicit right shift. |
2652 | r.exp += (S::PRECISION - used_bits) as ExpInt; |
2653 | let loss = Loss::through_truncation(&sig_calc, truncated_bits); |
2654 | return Ok(r.normalize(round, loss)); |
2655 | } |
2656 | } |
2657 | } |
2658 | } |
2659 | |
2660 | impl Loss { |
2661 | /// Combine the effect of two lost fractions. |
2662 | #[inline ] |
2663 | fn combine(self, less_significant: Loss) -> Loss { |
2664 | let mut more_significant = self; |
2665 | if less_significant != Loss::ExactlyZero { |
2666 | if more_significant == Loss::ExactlyZero { |
2667 | more_significant = Loss::LessThanHalf; |
2668 | } else if more_significant == Loss::ExactlyHalf { |
2669 | more_significant = Loss::MoreThanHalf; |
2670 | } |
2671 | } |
2672 | |
2673 | more_significant |
2674 | } |
2675 | |
2676 | /// Return the fraction lost were a bignum truncated losing the least |
2677 | /// significant `bits` bits. |
2678 | #[inline ] |
2679 | fn through_truncation(limbs: &[Limb], bits: usize) -> Loss { |
2680 | if bits == 0 { |
2681 | return Loss::ExactlyZero; |
2682 | } |
2683 | |
2684 | let half_bit = bits - 1; |
2685 | let half_limb = half_bit / LIMB_BITS; |
2686 | let (half_limb, rest) = if half_limb < limbs.len() { |
2687 | (limbs[half_limb], &limbs[..half_limb]) |
2688 | } else { |
2689 | (0, limbs) |
2690 | }; |
2691 | let half = 1 << (half_bit % LIMB_BITS); |
2692 | let has_half = half_limb & half != 0; |
2693 | let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest); |
2694 | |
2695 | match (has_half, has_rest) { |
2696 | (false, false) => Loss::ExactlyZero, |
2697 | (false, true) => Loss::LessThanHalf, |
2698 | (true, false) => Loss::ExactlyHalf, |
2699 | (true, true) => Loss::MoreThanHalf, |
2700 | } |
2701 | } |
2702 | } |
2703 | |
2704 | /// Implementation details of IeeeFloat significands, such as big integer arithmetic. |
2705 | /// As a rule of thumb, no functions in this module should dynamically allocate. |
2706 | mod sig { |
2707 | use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}; |
2708 | use core::cmp::Ordering; |
2709 | use core::mem; |
2710 | |
2711 | #[inline ] |
2712 | pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool { |
2713 | limbs.iter().all(|&l| l == 0) |
2714 | } |
2715 | |
2716 | /// One, not zero, based LSB. That is, returns 0 for a zeroed significand. |
2717 | #[inline ] |
2718 | pub(super) fn olsb(limbs: &[Limb]) -> usize { |
2719 | for i in 0..limbs.len() { |
2720 | if limbs[i] != 0 { |
2721 | return i * LIMB_BITS + limbs[i].trailing_zeros() as usize + 1; |
2722 | } |
2723 | } |
2724 | |
2725 | 0 |
2726 | } |
2727 | |
2728 | /// One, not zero, based MSB. That is, returns 0 for a zeroed significand. |
2729 | #[inline ] |
2730 | pub(super) fn omsb(limbs: &[Limb]) -> usize { |
2731 | for i in (0..limbs.len()).rev() { |
2732 | if limbs[i] != 0 { |
2733 | return (i + 1) * LIMB_BITS - limbs[i].leading_zeros() as usize; |
2734 | } |
2735 | } |
2736 | |
2737 | 0 |
2738 | } |
2739 | |
2740 | /// Comparison (unsigned) of two significands. |
2741 | #[inline ] |
2742 | pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering { |
2743 | assert_eq!(a.len(), b.len()); |
2744 | for (a, b) in a.iter().zip(b).rev() { |
2745 | match a.cmp(b) { |
2746 | Ordering::Equal => {} |
2747 | o => return o, |
2748 | } |
2749 | } |
2750 | |
2751 | Ordering::Equal |
2752 | } |
2753 | |
2754 | /// Extract the given bit. |
2755 | #[inline ] |
2756 | pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool { |
2757 | limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0 |
2758 | } |
2759 | |
2760 | /// Set the given bit. |
2761 | #[inline ] |
2762 | pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) { |
2763 | limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS); |
2764 | } |
2765 | |
2766 | /// Shift `dst` left `bits` bits, subtract `bits` from its exponent. |
2767 | #[inline ] |
2768 | pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) { |
2769 | if bits > 0 { |
2770 | // Our exponent should not underflow. |
2771 | *exp = exp.checked_sub(bits as ExpInt).unwrap(); |
2772 | |
2773 | // Jump is the inter-limb jump; shift is is intra-limb shift. |
2774 | let jump = bits / LIMB_BITS; |
2775 | let shift = bits % LIMB_BITS; |
2776 | |
2777 | for i in (0..dst.len()).rev() { |
2778 | let mut limb; |
2779 | |
2780 | if i < jump { |
2781 | limb = 0; |
2782 | } else { |
2783 | // dst[i] comes from the two limbs src[i - jump] and, if we have |
2784 | // an intra-limb shift, src[i - jump - 1]. |
2785 | limb = dst[i - jump]; |
2786 | if shift > 0 { |
2787 | limb <<= shift; |
2788 | if i >= jump + 1 { |
2789 | limb |= dst[i - jump - 1] >> (LIMB_BITS - shift); |
2790 | } |
2791 | } |
2792 | } |
2793 | |
2794 | dst[i] = limb; |
2795 | } |
2796 | } |
2797 | } |
2798 | |
2799 | /// Shift `dst` right `bits` bits noting lost fraction. |
2800 | #[inline ] |
2801 | pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss { |
2802 | let loss = Loss::through_truncation(dst, bits); |
2803 | |
2804 | if bits > 0 { |
2805 | // Our exponent should not overflow. |
2806 | *exp = exp.checked_add(bits as ExpInt).unwrap(); |
2807 | |
2808 | // Jump is the inter-limb jump; shift is is intra-limb shift. |
2809 | let jump = bits / LIMB_BITS; |
2810 | let shift = bits % LIMB_BITS; |
2811 | |
2812 | // Perform the shift. This leaves the most significant `bits` bits |
2813 | // of the result at zero. |
2814 | for i in 0..dst.len() { |
2815 | let mut limb; |
2816 | |
2817 | if i + jump >= dst.len() { |
2818 | limb = 0; |
2819 | } else { |
2820 | limb = dst[i + jump]; |
2821 | if shift > 0 { |
2822 | limb >>= shift; |
2823 | if i + jump + 1 < dst.len() { |
2824 | limb |= dst[i + jump + 1] << (LIMB_BITS - shift); |
2825 | } |
2826 | } |
2827 | } |
2828 | |
2829 | dst[i] = limb; |
2830 | } |
2831 | } |
2832 | |
2833 | loss |
2834 | } |
2835 | |
2836 | /// Copy the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB, |
2837 | /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`. |
2838 | /// All high bits above `src_bits` in `dst` are zero-filled. |
2839 | #[inline ] |
2840 | pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) { |
2841 | if src_bits == 0 { |
2842 | return; |
2843 | } |
2844 | |
2845 | let dst_limbs = limbs_for_bits(src_bits); |
2846 | assert!(dst_limbs <= dst.len()); |
2847 | |
2848 | let src = &src[src_lsb / LIMB_BITS..]; |
2849 | dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]); |
2850 | |
2851 | let shift = src_lsb % LIMB_BITS; |
2852 | let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift); |
2853 | |
2854 | // We now have (dst_limbs * LIMB_BITS - shift) bits from `src` |
2855 | // in `dst`. If this is less that src_bits, append the rest, else |
2856 | // clear the high bits. |
2857 | let n = dst_limbs * LIMB_BITS - shift; |
2858 | if n < src_bits { |
2859 | let mask = (1 << (src_bits - n)) - 1; |
2860 | dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << n % LIMB_BITS; |
2861 | } else if n > src_bits && src_bits % LIMB_BITS > 0 { |
2862 | dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1; |
2863 | } |
2864 | |
2865 | // Clear high limbs. |
2866 | for x in &mut dst[dst_limbs..] { |
2867 | *x = 0; |
2868 | } |
2869 | } |
2870 | |
2871 | /// We want the most significant PRECISION bits of `src`. There may not |
2872 | /// be that many; extract what we can. |
2873 | #[inline ] |
2874 | pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) { |
2875 | let omsb = omsb(src); |
2876 | |
2877 | if precision <= omsb { |
2878 | extract(dst, src, precision, omsb - precision); |
2879 | (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1) |
2880 | } else { |
2881 | extract(dst, src, omsb, 0); |
2882 | (Loss::ExactlyZero, precision as ExpInt - 1) |
2883 | } |
2884 | } |
2885 | |
2886 | /// For every consecutive chunk of `bits` bits from `limbs`, |
2887 | /// going from most significant to the least significant bits, |
2888 | /// call `f` to transform those bits and store the result back. |
2889 | #[inline ] |
2890 | pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) { |
2891 | assert_eq!(LIMB_BITS % bits, 0); |
2892 | for limb in limbs.iter_mut().rev() { |
2893 | let mut r = 0; |
2894 | for i in (0..LIMB_BITS / bits).rev() { |
2895 | r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits); |
2896 | } |
2897 | *limb = r; |
2898 | } |
2899 | } |
2900 | |
2901 | /// Increment in-place, return the carry flag. |
2902 | #[inline ] |
2903 | pub(super) fn increment(dst: &mut [Limb]) -> Limb { |
2904 | for x in dst { |
2905 | *x = x.wrapping_add(1); |
2906 | if *x != 0 { |
2907 | return 0; |
2908 | } |
2909 | } |
2910 | |
2911 | 1 |
2912 | } |
2913 | |
2914 | /// Decrement in-place, return the borrow flag. |
2915 | #[inline ] |
2916 | pub(super) fn decrement(dst: &mut [Limb]) -> Limb { |
2917 | for x in dst { |
2918 | *x = x.wrapping_sub(1); |
2919 | if *x != !0 { |
2920 | return 0; |
2921 | } |
2922 | } |
2923 | |
2924 | 1 |
2925 | } |
2926 | |
2927 | /// `a += b + c` where `c` is zero or one. Returns the carry flag. |
2928 | #[inline ] |
2929 | pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { |
2930 | assert!(c <= 1); |
2931 | |
2932 | for (a, &b) in a.iter_mut().zip(b) { |
2933 | let (r, overflow) = a.overflowing_add(b); |
2934 | let (r, overflow2) = r.overflowing_add(c); |
2935 | *a = r; |
2936 | c = (overflow | overflow2) as Limb; |
2937 | } |
2938 | |
2939 | c |
2940 | } |
2941 | |
2942 | /// `a -= b + c` where `c` is zero or one. Returns the borrow flag. |
2943 | #[inline ] |
2944 | pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { |
2945 | assert!(c <= 1); |
2946 | |
2947 | for (a, &b) in a.iter_mut().zip(b) { |
2948 | let (r, overflow) = a.overflowing_sub(b); |
2949 | let (r, overflow2) = r.overflowing_sub(c); |
2950 | *a = r; |
2951 | c = (overflow | overflow2) as Limb; |
2952 | } |
2953 | |
2954 | c |
2955 | } |
2956 | |
2957 | /// `a += b` or `a -= b`. Does not preserve `b`. |
2958 | #[inline ] |
2959 | pub(super) fn add_or_sub( |
2960 | a_sig: &mut [Limb], |
2961 | a_exp: &mut ExpInt, |
2962 | a_sign: &mut bool, |
2963 | b_sig: &mut [Limb], |
2964 | b_exp: ExpInt, |
2965 | b_sign: bool, |
2966 | ) -> Loss { |
2967 | // Are we bigger exponent-wise than the RHS? |
2968 | let bits = *a_exp - b_exp; |
2969 | |
2970 | // Determine if the operation on the absolute values is effectively |
2971 | // an addition or subtraction. |
2972 | // Subtraction is more subtle than one might naively expect. |
2973 | if *a_sign ^ b_sign { |
2974 | let loss; |
2975 | |
2976 | if bits == 0 { |
2977 | loss = Loss::ExactlyZero; |
2978 | } else if bits > 0 { |
2979 | loss = shift_right(b_sig, &mut 0, (bits - 1) as usize); |
2980 | shift_left(a_sig, a_exp, 1); |
2981 | } else { |
2982 | loss = shift_right(a_sig, a_exp, (-bits - 1) as usize); |
2983 | shift_left(b_sig, &mut 0, 1); |
2984 | } |
2985 | |
2986 | let borrow = (loss != Loss::ExactlyZero) as Limb; |
2987 | |
2988 | // Should we reverse the subtraction. |
2989 | if cmp(a_sig, b_sig) == Ordering::Less { |
2990 | // The code above is intended to ensure that no borrow is necessary. |
2991 | assert_eq!(sub(b_sig, a_sig, borrow), 0); |
2992 | a_sig.copy_from_slice(b_sig); |
2993 | *a_sign = !*a_sign; |
2994 | } else { |
2995 | // The code above is intended to ensure that no borrow is necessary. |
2996 | assert_eq!(sub(a_sig, b_sig, borrow), 0); |
2997 | } |
2998 | |
2999 | // Invert the lost fraction - it was on the RHS and subtracted. |
3000 | match loss { |
3001 | Loss::LessThanHalf => Loss::MoreThanHalf, |
3002 | Loss::MoreThanHalf => Loss::LessThanHalf, |
3003 | _ => loss, |
3004 | } |
3005 | } else { |
3006 | let loss = if bits > 0 { |
3007 | shift_right(b_sig, &mut 0, bits as usize) |
3008 | } else { |
3009 | shift_right(a_sig, a_exp, -bits as usize) |
3010 | }; |
3011 | // We have a guard bit; generating a carry cannot happen. |
3012 | assert_eq!(add(a_sig, b_sig, 0), 0); |
3013 | loss |
3014 | } |
3015 | } |
3016 | |
3017 | /// `[low, high] = a * b`. |
3018 | /// |
3019 | /// This cannot overflow, because |
3020 | /// |
3021 | /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)` |
3022 | /// |
3023 | /// which is less than n^2. |
3024 | #[inline ] |
3025 | pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] { |
3026 | let mut wide = [0, 0]; |
3027 | |
3028 | if a == 0 || b == 0 { |
3029 | return wide; |
3030 | } |
3031 | |
3032 | const HALF_BITS: usize = LIMB_BITS / 2; |
3033 | |
3034 | let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1); |
3035 | for i in 0..2 { |
3036 | for j in 0..2 { |
3037 | let mut x = [select(a, i) * select(b, j), 0]; |
3038 | shift_left(&mut x, &mut 0, (i + j) * HALF_BITS); |
3039 | assert_eq!(add(&mut wide, &x, 0), 0); |
3040 | } |
3041 | } |
3042 | |
3043 | wide |
3044 | } |
3045 | |
3046 | /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction. |
3047 | #[inline ] |
3048 | pub(super) fn mul<'a>( |
3049 | dst: &mut [Limb], |
3050 | exp: &mut ExpInt, |
3051 | mut a: &'a [Limb], |
3052 | mut b: &'a [Limb], |
3053 | precision: usize, |
3054 | ) -> Loss { |
3055 | // Put the narrower number on the `a` for less loops below. |
3056 | if a.len() > b.len() { |
3057 | mem::swap(&mut a, &mut b); |
3058 | } |
3059 | |
3060 | for x in &mut dst[..b.len()] { |
3061 | *x = 0; |
3062 | } |
3063 | |
3064 | for i in 0..a.len() { |
3065 | let mut carry = 0; |
3066 | for j in 0..b.len() { |
3067 | let [low, mut high] = widening_mul(a[i], b[j]); |
3068 | |
3069 | // Now add carry. |
3070 | let (low, overflow) = low.overflowing_add(carry); |
3071 | high += overflow as Limb; |
3072 | |
3073 | // And now `dst[i + j]`, and store the new low part there. |
3074 | let (low, overflow) = low.overflowing_add(dst[i + j]); |
3075 | high += overflow as Limb; |
3076 | |
3077 | dst[i + j] = low; |
3078 | carry = high; |
3079 | } |
3080 | dst[i + b.len()] = carry; |
3081 | } |
3082 | |
3083 | // Assume the operands involved in the multiplication are single-precision |
3084 | // FP, and the two multiplicants are: |
3085 | // a = a23 . a22 ... a0 * 2^e1 |
3086 | // b = b23 . b22 ... b0 * 2^e2 |
3087 | // the result of multiplication is: |
3088 | // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) |
3089 | // Note that there are three significant bits at the left-hand side of the |
3090 | // radix point: two for the multiplication, and an overflow bit for the |
3091 | // addition (that will always be zero at this point). Move the radix point |
3092 | // toward left by two bits, and adjust exponent accordingly. |
3093 | *exp += 2; |
3094 | |
3095 | // Convert the result having "2 * precision" significant-bits back to the one |
3096 | // having "precision" significant-bits. First, move the radix point from |
3097 | // poision "2*precision - 1" to "precision - 1". The exponent need to be |
3098 | // adjusted by "2*precision - 1" - "precision - 1" = "precision". |
3099 | *exp -= precision as ExpInt + 1; |
3100 | |
3101 | // In case MSB resides at the left-hand side of radix point, shift the |
3102 | // mantissa right by some amount to make sure the MSB reside right before |
3103 | // the radix point (i.e. "MSB . rest-significant-bits"). |
3104 | // |
3105 | // Note that the result is not normalized when "omsb < precision". So, the |
3106 | // caller needs to call IeeeFloat::normalize() if normalized value is |
3107 | // expected. |
3108 | let omsb = omsb(dst); |
3109 | if omsb <= precision { |
3110 | Loss::ExactlyZero |
3111 | } else { |
3112 | shift_right(dst, exp, omsb - precision) |
3113 | } |
3114 | } |
3115 | |
3116 | /// `quotient = dividend / divisor`. Returns the lost fraction. |
3117 | /// Does not preserve `dividend` or `divisor`. |
3118 | #[inline ] |
3119 | pub(super) fn div( |
3120 | quotient: &mut [Limb], |
3121 | exp: &mut ExpInt, |
3122 | dividend: &mut [Limb], |
3123 | divisor: &mut [Limb], |
3124 | precision: usize, |
3125 | ) -> Loss { |
3126 | // Normalize the divisor. |
3127 | let bits = precision - omsb(divisor); |
3128 | shift_left(divisor, &mut 0, bits); |
3129 | *exp += bits as ExpInt; |
3130 | |
3131 | // Normalize the dividend. |
3132 | let bits = precision - omsb(dividend); |
3133 | shift_left(dividend, exp, bits); |
3134 | |
3135 | // Division by 1. |
3136 | let olsb_divisor = olsb(divisor); |
3137 | if olsb_divisor == precision { |
3138 | quotient.copy_from_slice(dividend); |
3139 | return Loss::ExactlyZero; |
3140 | } |
3141 | |
3142 | // Ensure the dividend >= divisor initially for the loop below. |
3143 | // Incidentally, this means that the division loop below is |
3144 | // guaranteed to set the integer bit to one. |
3145 | if cmp(dividend, divisor) == Ordering::Less { |
3146 | shift_left(dividend, exp, 1); |
3147 | assert_ne!(cmp(dividend, divisor), Ordering::Less) |
3148 | } |
3149 | |
3150 | // Helper for figuring out the lost fraction. |
3151 | let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) { |
3152 | Ordering::Greater => Loss::MoreThanHalf, |
3153 | Ordering::Equal => Loss::ExactlyHalf, |
3154 | Ordering::Less => { |
3155 | if is_all_zeros(dividend) { |
3156 | Loss::ExactlyZero |
3157 | } else { |
3158 | Loss::LessThanHalf |
3159 | } |
3160 | } |
3161 | }; |
3162 | |
3163 | // Try to perform a (much faster) short division for small divisors. |
3164 | let divisor_bits = precision - (olsb_divisor - 1); |
3165 | macro_rules! try_short_div { |
3166 | ($W:ty, $H:ty, $half:expr) => { |
3167 | if divisor_bits * 2 <= $half { |
3168 | // Extract the small divisor. |
3169 | let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1); |
3170 | let divisor = divisor[0] as $H as $W; |
3171 | |
3172 | // Shift the dividend to produce a quotient with the unit bit set. |
3173 | let top_limb = *dividend.last().unwrap(); |
3174 | let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H; |
3175 | shift_left(dividend, &mut 0, divisor_bits - 1); |
3176 | |
3177 | // Apply short division in place on $H (of $half bits) chunks. |
3178 | each_chunk(dividend, $half, |chunk| { |
3179 | let chunk = chunk as $H; |
3180 | let combined = ((rem as $W) << $half) | (chunk as $W); |
3181 | rem = (combined % divisor) as $H; |
3182 | (combined / divisor) as $H as Limb |
3183 | }); |
3184 | quotient.copy_from_slice(dividend); |
3185 | |
3186 | return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]); |
3187 | } |
3188 | }; |
3189 | } |
3190 | |
3191 | try_short_div!(u32, u16, 16); |
3192 | try_short_div!(u64, u32, 32); |
3193 | try_short_div!(u128, u64, 64); |
3194 | |
3195 | // Zero the quotient before setting bits in it. |
3196 | for x in &mut quotient[..limbs_for_bits(precision)] { |
3197 | *x = 0; |
3198 | } |
3199 | |
3200 | // Long division. |
3201 | for bit in (0..precision).rev() { |
3202 | if cmp(dividend, divisor) != Ordering::Less { |
3203 | sub(dividend, divisor, 0); |
3204 | set_bit(quotient, bit); |
3205 | } |
3206 | shift_left(dividend, &mut 0, 1); |
3207 | } |
3208 | |
3209 | lost_fraction(dividend, divisor) |
3210 | } |
3211 | } |
3212 | |