1//! Support for floating point types compatible with IEEE 754.
2
3use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO};
4use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd};
5
6use core::cmp::{self, Ordering};
7use core::convert::TryFrom;
8use core::fmt::{self, Write};
9use core::marker::PhantomData;
10use core::mem;
11use core::ops::Neg;
12
13/// A floating point number that uses IEEE semantics.
14///
15/// Usually you will want to use the available type aliases of this type
16/// (e.g., [`Single`], [`Double`]) rather than referencing it directly.
17///
18/// If `S` implements [`Semantics`], this type will implement [`Float`].
19#[must_use]
20pub struct IeeeFloat<S> {
21 /// Absolute significand value (including the integer bit).
22 sig: [Limb; 1],
23
24 /// The signed unbiased exponent of the value.
25 exp: ExpInt,
26
27 /// What kind of floating point number this is.
28 //
29 // HACK(eddyb) because mutating this without accounting for `exp`/`sig`
30 // can break some subtle edge cases, it should be only read through the
31 // `.category()` method, and only set during initialization, either for
32 // one of the special value constants, or for conversion from bits.
33 read_only_category_do_not_mutate: Category,
34
35 /// Sign bit of the number.
36 //
37 // HACK(eddyb) because mutating this without accounting for `category`
38 // can break some subtle edge cases, it should be only read through the
39 // `.is_negative()` method, and only set through negation (which can be
40 // more easily used through e.g. `copy_sign`/`negate_if`/`with_sign`).
41 read_only_sign_do_not_mutate: bool,
42
43 marker: PhantomData<S>,
44}
45
46/// Fundamental unit of big integer arithmetic, but also
47/// large to store the largest significands by itself.
48type Limb = u128;
49const LIMB_BITS: usize = 128;
50#[inline(always)]
51fn limbs_for_bits(bits: usize) -> usize {
52 (bits + LIMB_BITS - 1) / LIMB_BITS
53}
54
55/// Growable `[Limb]` (i.e. heap-allocated and typically `Vec`/`SmallVec`/etc.),
56/// used only by algorithms that may require dynamically arbitrary precision,
57/// i.e. conversions from/to decimal strings.
58///
59/// Note: the specific type was chosen by starting with `SmallVec<[_; 1]>` and
60/// increasing the inline length as long as benchmarks were showing improvements,
61/// or at least the `Double::from_str` ones, which roughly had these behaviors:
62/// * `Vec<_>` -> `SmallVec<[_; 1]>`: ~15% speedup, but only for shorter inputs
63/// * `SmallVec<[_; 1]>` -> `SmallVec<[_; 2]>`: ~10% speedup for longer inputs
64/// * `SmallVec<[_; 2]>` -> `SmallVec<[_; 3]>`: noise and/or diminishing returns
65///
66/// Note: the choice of type described above, and the factors in its decision,
67/// are specific to `Limb` being `u128`, so if `Limb` changes, this should too.
68type DynPrecisionLimbVec = smallvec::SmallVec<[Limb; 2]>;
69
70/// Enum that represents what fraction of the LSB truncated bits of an fp number
71/// represent.
72///
73/// This essentially combines the roles of guard and sticky bits.
74#[must_use]
75#[derive(Copy, Clone, PartialEq, Eq, Debug)]
76enum Loss {
77 // Example of truncated bits:
78 ExactlyZero, // 000000
79 LessThanHalf, // 0xxxxx x's not all zero
80 ExactlyHalf, // 100000
81 MoreThanHalf, // 1xxxxx x's not all zero
82}
83
84/// How the nonfinite values Inf and NaN are represented.
85#[derive(Copy, Clone, PartialEq, Eq)]
86pub enum NonfiniteBehavior {
87 /// Represents standard IEEE 754 behavior. A value is nonfinite if the
88 /// exponent field is all 1s. In such cases, a value is Inf if the
89 /// significand bits are all zero, and NaN otherwise
90 IEEE754,
91
92 /// Only the Float8E5M2 has this behavior. There is no Inf representation. A
93 /// value is NaN if the exponent field and the mantissa field are all 1s.
94 /// This behavior matches the FP8 E4M3 type described in
95 /// <https://arxiv.org/abs/2209.05433>. We treat both signed and unsigned NaNs
96 /// as non-signalling, although the paper does not state whether the NaN
97 /// values are signalling or not.
98 NanOnly,
99}
100
101// HACK(eddyb) extension method flipping/changing the sign based on `bool`s.
102trait NegExt: Neg<Output = Self> + Sized {
103 fn negate_if(self, negate: bool) -> Self {
104 if negate {
105 -self
106 } else {
107 self
108 }
109 }
110
111 fn with_sign(self, sign: bool) -> Self
112 where
113 Self: Float,
114 {
115 self.negate_if(self.is_negative() != sign)
116 }
117}
118impl<T: Neg<Output = Self>> NegExt for T {}
119
120/// Represents floating point arithmetic semantics.
121pub trait Semantics: Sized {
122 /// Total number of bits in the interchange format.
123 const BITS: usize;
124
125 /// Number of exponent bits in the interchange format.
126 const EXP_BITS: usize;
127
128 /// Number of bits in the significand. This includes the integer bit.
129 const PRECISION: usize = (Self::BITS - 1 - Self::EXP_BITS) + 1;
130
131 /// How the nonfinite values Inf and NaN are represented.
132 const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754;
133
134 /// The largest E such that 2^E is representable; this matches the
135 /// definition of IEEE 754.
136 const MAX_EXP: ExpInt = {
137 let ieee_inf_and_nan_replaced_with_extra_normals = match Self::NONFINITE_BEHAVIOR {
138 NonfiniteBehavior::IEEE754 => false,
139 NonfiniteBehavior::NanOnly => true,
140 };
141 Self::IEEE_MAX_EXP
142 + (Self::MIN_EXP - Self::IEEE_MIN_EXP)
143 + (ieee_inf_and_nan_replaced_with_extra_normals as ExpInt)
144 };
145 const IEEE_MAX_EXP: ExpInt = -Self::IEEE_MIN_EXP + 1;
146
147 /// The smallest E such that 2^E is a normalized number; this
148 /// matches the definition of IEEE 754.
149 const MIN_EXP: ExpInt = Self::IEEE_MIN_EXP;
150 const IEEE_MIN_EXP: ExpInt = -(1 << (Self::EXP_BITS - 1)) + 2;
151
152 /// The base significand bitpattern of NaNs, i.e. the bits that must always
153 /// be set in all NaNs, with other significand bits being either used for
154 /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset.
155 const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR {
156 NonfiniteBehavior::IEEE754 => 0,
157 NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1,
158 };
159
160 /// The significand bitmask for the payload of a NaN (if supported),
161 /// including the "quiet bit" (telling QNaNs apart from SNaNs).
162 const NAN_PAYLOAD_MASK: Limb = match Self::NONFINITE_BEHAVIOR {
163 NonfiniteBehavior::IEEE754 => (1 << (Self::PRECISION - 1)) - 1,
164 NonfiniteBehavior::NanOnly => 0,
165 };
166
167 /// The significand bitpattern to mark a NaN as quiet (if supported).
168 ///
169 /// NOTE: for X87DoubleExtended we need to set two bits instead of one.
170 ///
171 /// NOTE: all NaNs are quiet if unsupported (see `NonfiniteBehavior::NanOnly`).
172 const QNAN_SIGNIFICAND: Limb = match Self::NONFINITE_BEHAVIOR {
173 NonfiniteBehavior::IEEE754 => 1 << (Self::PRECISION - 2),
174 NonfiniteBehavior::NanOnly => 0,
175 };
176
177 fn from_bits(bits: u128) -> IeeeFloat<Self> {
178 assert!(Self::BITS > Self::PRECISION);
179
180 let sign = bits & (1 << (Self::BITS - 1));
181 let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1);
182 let mut r = IeeeFloat {
183 sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)],
184 // Convert the exponent from its bias representation to a signed integer.
185 exp: (exponent as ExpInt) + (Self::MIN_EXP - 1),
186 read_only_category_do_not_mutate: Category::Zero,
187 read_only_sign_do_not_mutate: sign != 0,
188 marker: PhantomData,
189 };
190
191 // NOTE(eddyb) unlike the original C++ code, this doesn't check for
192 // specific exponent/significand combinations, but instead relies on
193 // being able to construct known-good special values to compare to.
194 let try_category_from_special = |mut special: IeeeFloat<Self>| {
195 special = special.copy_sign(r);
196
197 // Ignore NaN payload to avoid needing a separate NaN check.
198 let sig_mask = if special.is_nan() { !Self::NAN_PAYLOAD_MASK } else { !0 };
199
200 if special.is_negative() == r.is_negative()
201 && special.exp == r.exp
202 && special.sig[0] & sig_mask == r.sig[0] & sig_mask
203 {
204 Some(special.category())
205 } else {
206 None
207 }
208 };
209
210 // NOTE(eddyb) the order here matters, i.e. `NAN` needs to be last, as
211 // its relaxed check (see above) overlaps `INFINITY`, for IEEE NaNs.
212 let specials = [
213 IeeeFloat::<Self>::ZERO,
214 IeeeFloat::<Self>::INFINITY,
215 IeeeFloat::<Self>::NAN,
216 ];
217
218 let category = specials
219 .into_iter()
220 .find_map(try_category_from_special)
221 .unwrap_or_else(|| {
222 if r.exp == Self::MIN_EXP - 1 {
223 // Denormal.
224 r.exp = Self::MIN_EXP;
225 } else {
226 // Set integer bit.
227 sig::set_bit(&mut r.sig, Self::PRECISION - 1);
228 }
229 Category::Normal
230 });
231
232 r.read_only_category_do_not_mutate = category;
233
234 r
235 }
236
237 fn to_bits(x: IeeeFloat<Self>) -> u128 {
238 assert!(Self::BITS > Self::PRECISION);
239
240 // Split integer bit from significand.
241 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
242 let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1);
243 let mut exponent = x.exp;
244 match x.category() {
245 Category::Normal => {
246 if exponent == Self::MIN_EXP && !integer_bit {
247 // Denormal.
248 exponent -= 1;
249 }
250 }
251 Category::Zero => {
252 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
253 IeeeFloat::<Self> {
254 sig: [significand],
255 exp: exponent,
256 ..
257 } = Float::ZERO;
258 }
259 Category::Infinity => {
260 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
261 IeeeFloat::<Self> {
262 sig: [significand],
263 exp: exponent,
264 ..
265 } = Float::INFINITY;
266 }
267 Category::NaN => {
268 IeeeFloat::<Self> { exp: exponent, .. } = Float::NAN;
269 }
270 }
271
272 // Convert the exponent from a signed integer to its bias representation.
273 let exponent = (exponent - (Self::MIN_EXP - 1)) as u128;
274
275 ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand
276 }
277}
278
279impl<S> Copy for IeeeFloat<S> {}
280impl<S> Clone for IeeeFloat<S> {
281 fn clone(&self) -> Self {
282 *self
283 }
284}
285
286macro_rules! ieee_semantics {
287 ($(
288 $(#[$meta:meta])*
289 $name:ident = $sem:ident($bits:tt : $exp_bits:tt) $({ $($extra:tt)* })?
290 ),* $(,)?) => {
291 $(
292 #[doc = concat!("Floating point semantics for [`", stringify!($name), "`].")]
293 ///
294 /// See that type for more details.
295 pub struct $sem;
296
297 $(#[$meta])*
298 pub type $name = IeeeFloat<$sem>;
299
300 impl Semantics for $sem {
301 const BITS: usize = $bits;
302 const EXP_BITS: usize = $exp_bits;
303
304 $($($extra)*)?
305 }
306 )*
307 }
308}
309
310ieee_semantics! {
311 /// IEEE binary16 half-precision (16-bit) floating point number.
312 Half = HalfS(16:5),
313
314 /// IEEE binary32 single-precision (32-bit) floating point number.
315 Single = SingleS(32:8),
316
317 /// IEEE binary64 double-precision (64-bit) floating point number.
318 Double = DoubleS(64:11),
319
320 /// IEEE binary128 quadruple-precision (128-bit) floating point number.
321 Quad = QuadS(128:15),
322
323 /// 16-bit brain floating point number.
324 ///
325 /// This is not an IEEE kind but uses the same semantics.
326 BFloat = BFloatS(16:8),
327
328 /// 8-bit floating point number with S1E5M2 bit layout.
329 ///
330 /// Follows IEEE-754 conventions with S1E5M2 bit layout as described in
331 /// <https://arxiv.org/abs/2209.05433>.
332 Float8E5M2 = Float8E5M2S(8:5),
333
334 /// 8-bit floating point number with S1E4M3 bit layout.
335 ///
336 /// This type mostly follows IEEE-754 conventions with a
337 /// bit layout S1E4M3 as described in <https://arxiv.org/abs/2209.05433>.
338 /// Unlike IEEE-754 types, there are no infinity values, and NaN is
339 /// represented with the exponent and mantissa bits set to all 1s.
340 Float8E4M3FN = Float8E4M3FNS(8:4) {
341 const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly;
342 },
343}
344
345// FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit"
346// associated `const` on `Semantics` itself.
347/// Floating point semantics for [`X87DoubleExtended`].
348///
349/// See that type for more details.
350pub struct X87DoubleExtendedS;
351
352/// 80-bit floating point number that uses IEEE extended precision semantics, as used
353/// by x87 `long double`.
354pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>;
355
356impl Semantics for X87DoubleExtendedS {
357 const BITS: usize = 80;
358 const EXP_BITS: usize = 15;
359
360 // HACK(eddyb) overwriting `EXP_BITS` because its default is incorrect.
361 // FIMXE(eddyb) get rid of this by allowing `Semantics` to generically have
362 // the concept of "explicitly included integer bit", which is the main way
363 // in which the 80-bit X87 format differs from standard IEEE encodings.
364 const PRECISION: usize = 64;
365
366 /// For x87 extended precision, we want to make a NaN, not a
367 /// pseudo-NaN. Maybe we should expose the ability to make
368 /// pseudo-NaNs?
369 const QNAN_SIGNIFICAND: Limb = 0b11 << (Self::PRECISION - 2);
370
371 /// Integer bit is explicit in this format. Intel hardware (387 and later)
372 /// does not support these bit patterns:
373 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
374 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
375 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
376 /// exponent = 0, integer bit 1 ("pseudodenormal")
377 /// At the moment, the first three are treated as NaNs, the last one as Normal.
378 #[inline]
379 fn from_bits(bits: u128) -> IeeeFloat<Self> {
380 let sign = bits & (1 << (Self::BITS - 1));
381 let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1);
382 let mut r = IeeeFloat {
383 sig: [bits & ((1 << Self::PRECISION) - 1)],
384 // Convert the exponent from its bias representation to a signed integer.
385 exp: (exponent as ExpInt) + (Self::MIN_EXP - 1),
386 read_only_category_do_not_mutate: Category::Zero,
387 read_only_sign_do_not_mutate: sign != 0,
388 marker: PhantomData,
389 };
390
391 let integer_bit = r.sig[0] >> (Self::PRECISION - 1);
392
393 let category = if r.exp == Self::MIN_EXP - 1 && r.sig == [0] {
394 Category::Zero
395 } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] {
396 Category::Infinity
397 } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)]
398 || r.exp != Self::MAX_EXP + 1 && r.exp != Self::MIN_EXP - 1 && integer_bit == 0
399 {
400 r.exp = Self::MAX_EXP + 1;
401 Category::NaN
402 } else {
403 if r.exp == Self::MIN_EXP - 1 {
404 // Denormal.
405 r.exp = Self::MIN_EXP;
406 }
407 Category::Normal
408 };
409
410 r.read_only_category_do_not_mutate = category;
411
412 r
413 }
414
415 #[inline]
416 fn to_bits(x: IeeeFloat<Self>) -> u128 {
417 // Get integer bit from significand.
418 let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1);
419 let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1);
420 let exponent = match x.category() {
421 Category::Normal => {
422 if x.exp == Self::MIN_EXP && !integer_bit {
423 // Denormal.
424 Self::MIN_EXP - 1
425 } else {
426 x.exp
427 }
428 }
429 Category::Zero => {
430 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
431 significand = 0;
432 Self::MIN_EXP - 1
433 }
434 Category::Infinity => {
435 // FIXME(eddyb) Maybe we should guarantee an invariant instead?
436 significand = 1 << (Self::PRECISION - 1);
437 Self::MAX_EXP + 1
438 }
439 Category::NaN => Self::MAX_EXP + 1,
440 };
441
442 // Convert the exponent from a signed integer to its bias representation.
443 let exponent = (exponent - (Self::MIN_EXP - 1)) as u128;
444
445 ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand
446 }
447}
448
449float_common_impls!(IeeeFloat<S>);
450
451impl<S: Semantics> PartialEq for IeeeFloat<S> {
452 fn eq(&self, rhs: &Self) -> bool {
453 self.partial_cmp(rhs) == Some(Ordering::Equal)
454 }
455}
456
457impl<S: Semantics> PartialOrd for IeeeFloat<S> {
458 fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> {
459 match (self.category(), rhs.category()) {
460 (Category::NaN, _) | (_, Category::NaN) => None,
461
462 (Category::Infinity, Category::Infinity) => Some((!self.is_negative()).cmp(&(!rhs.is_negative()))),
463
464 (Category::Zero, Category::Zero) => Some(Ordering::Equal),
465
466 (Category::Infinity, _) | (Category::Normal, Category::Zero) => {
467 Some((!self.is_negative()).cmp(&self.is_negative()))
468 }
469
470 (_, Category::Infinity) | (Category::Zero, Category::Normal) => {
471 Some(rhs.is_negative().cmp(&(!rhs.is_negative())))
472 }
473
474 (Category::Normal, Category::Normal) => {
475 // Two normal numbers. Do they have the same sign?
476 Some((!self.is_negative()).cmp(&(!rhs.is_negative())).then_with(|| {
477 // Compare absolute values; invert result if negative.
478 let result = self.cmp_abs_normal(*rhs);
479
480 if self.is_negative() {
481 result.reverse()
482 } else {
483 result
484 }
485 }))
486 }
487 }
488 }
489}
490
491impl<S: Semantics> Neg for IeeeFloat<S> {
492 type Output = Self;
493 fn neg(mut self) -> Self {
494 self.read_only_sign_do_not_mutate = !self.is_negative();
495 self
496 }
497}
498
499/// Prints this value as a decimal string.
500///
501/// \param precision The maximum number of digits of
502/// precision to output. If there are fewer digits available,
503/// zero padding will not be used unless the value is
504/// integral and small enough to be expressed in
505/// precision digits. 0 means to use the natural
506/// precision of the number.
507/// \param width The maximum number of zeros to
508/// consider inserting before falling back to scientific
509/// notation. 0 means to always use scientific notation.
510///
511/// \param alternate Indicate whether to remove the trailing zero in
512/// fraction part or not. Also setting this parameter to true forces
513/// producing of output more similar to default printf behavior.
514/// Specifically the lower e is used as exponent delimiter and exponent
515/// always contains no less than two digits.
516///
517/// Number precision width Result
518/// ------ --------- ----- ------
519/// 1.01E+4 5 2 10100
520/// 1.01E+4 4 2 1.01E+4
521/// 1.01E+4 5 1 1.01E+4
522/// 1.01E-2 5 2 0.0101
523/// 1.01E-2 4 2 0.0101
524/// 1.01E-2 4 1 1.01E-2
525impl<S: Semantics> fmt::Display for IeeeFloat<S> {
526 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
527 let width = f.width().unwrap_or(3);
528 let alternate = f.alternate();
529
530 match self.category() {
531 Category::Infinity => {
532 if self.is_negative() {
533 return f.write_str("-Inf");
534 } else {
535 return f.write_str("+Inf");
536 }
537 }
538
539 Category::NaN => return f.write_str("NaN"),
540
541 Category::Zero => {
542 if self.is_negative() {
543 f.write_char('-')?;
544 }
545
546 if width == 0 {
547 if alternate {
548 f.write_str("0.0")?;
549 if let Some(n) = f.precision() {
550 for _ in 1..n {
551 f.write_char('0')?;
552 }
553 }
554 f.write_str("e+00")?;
555 } else {
556 f.write_str("0.0E+0")?;
557 }
558 } else {
559 f.write_char('0')?;
560 }
561 return Ok(());
562 }
563
564 Category::Normal => {}
565 }
566
567 if self.is_negative() {
568 f.write_char('-')?;
569 }
570
571 // We use enough digits so the number can be round-tripped back to an
572 // APFloat. The formula comes from "How to Print Floating-Point Numbers
573 // Accurately" by Steele and White.
574 // FIXME: Using a formula based purely on the precision is conservative;
575 // we can print fewer digits depending on the actual value being printed.
576
577 // precision = 2 + floor(S::PRECISION / lg_2(10))
578 let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196);
579
580 // Decompose the number into an APInt and an exponent.
581 let mut exp = self.exp - (S::PRECISION as ExpInt - 1);
582 let mut sig: DynPrecisionLimbVec = [self.sig[0]].into_iter().collect();
583
584 // Ignore trailing binary zeros.
585 let trailing_zeros = sig[0].trailing_zeros();
586 let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize);
587
588 // Change the exponent from 2^e to 10^e.
589 if exp == 0 {
590 // Nothing to do.
591 } else if exp > 0 {
592 // Just shift left.
593 let shift = exp as usize;
594 sig.resize(limbs_for_bits(S::PRECISION + shift), 0);
595 sig::shift_left(&mut sig, &mut exp, shift);
596 } else {
597 // exp < 0
598 let mut texp = -exp as usize;
599
600 // We transform this using the identity:
601 // (N)(2^-e) == (N)(5^e)(10^-e)
602
603 // Multiply significand by 5^e.
604 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
605 let mut sig_scratch = DynPrecisionLimbVec::new();
606 let mut p5 = DynPrecisionLimbVec::new();
607 let mut p5_scratch = DynPrecisionLimbVec::new();
608 while texp != 0 {
609 if p5.is_empty() {
610 p5.push(5);
611 } else {
612 p5_scratch.resize(p5.len() * 2, 0);
613 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
614 while p5_scratch.last() == Some(&0) {
615 p5_scratch.pop();
616 }
617 mem::swap(&mut p5, &mut p5_scratch);
618 }
619 if texp & 1 != 0 {
620 sig_scratch.resize(sig.len() + p5.len(), 0);
621 let _: Loss = sig::mul(&mut sig_scratch, &mut 0, &sig, &p5, (sig.len() + p5.len()) * LIMB_BITS);
622 while sig_scratch.last() == Some(&0) {
623 sig_scratch.pop();
624 }
625 mem::swap(&mut sig, &mut sig_scratch);
626 }
627 texp >>= 1;
628 }
629 }
630
631 // Fill the buffer.
632 let mut buffer = smallvec::SmallVec::<[u8; 64]>::new();
633
634 // Ignore digits from the significand until it is no more
635 // precise than is required for the desired precision.
636 // 196/59 is a very slight overestimate of lg_2(10).
637 let required = (precision * 196 + 58) / 59;
638 let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196;
639 let mut in_trail = true;
640 while !sig.is_empty() {
641 // Perform short division by 10 to extract the rightmost digit.
642 // rem <- sig % 10
643 // sig <- sig / 10
644 let mut rem = 0;
645
646 // Use 64-bit division and remainder, with 32-bit chunks from sig.
647 sig::each_chunk(&mut sig, 32, |chunk| {
648 let chunk = chunk as u32;
649 let combined = ((rem as u64) << 32) | (chunk as u64);
650 rem = (combined % 10) as u8;
651 (combined / 10) as u32 as Limb
652 });
653
654 // Reduce the sigificand to avoid wasting time dividing 0's.
655 while sig.last() == Some(&0) {
656 sig.pop();
657 }
658
659 let digit = rem;
660
661 // Ignore digits we don't need.
662 if discard_digits > 0 {
663 discard_digits -= 1;
664 exp += 1;
665 continue;
666 }
667
668 // Drop trailing zeros.
669 if in_trail && digit == 0 {
670 exp += 1;
671 } else {
672 in_trail = false;
673 buffer.push(b'0' + digit);
674 }
675 }
676
677 assert!(!buffer.is_empty(), "no characters in buffer!");
678
679 // Drop down to precision.
680 // FIXME: don't do more precise calculations above than are required.
681 if buffer.len() > precision {
682 // The most significant figures are the last ones in the buffer.
683 let mut first_sig = buffer.len() - precision;
684
685 // Round.
686 // FIXME: this probably shouldn't use 'round half up'.
687
688 // Rounding down is just a truncation, except we also want to drop
689 // trailing zeros from the new result.
690 if buffer[first_sig - 1] < b'5' {
691 while first_sig < buffer.len() && buffer[first_sig] == b'0' {
692 first_sig += 1;
693 }
694 } else {
695 // Rounding up requires a decimal add-with-carry. If we continue
696 // the carry, the newly-introduced zeros will just be truncated.
697 for x in &mut buffer[first_sig..] {
698 if *x == b'9' {
699 first_sig += 1;
700 } else {
701 *x += 1;
702 break;
703 }
704 }
705 }
706
707 exp += first_sig as ExpInt;
708 buffer.drain(..first_sig);
709
710 // If we carried through, we have exactly one digit of precision.
711 if buffer.is_empty() {
712 buffer.push(b'1');
713 }
714 }
715
716 let digits = buffer.len();
717
718 // Check whether we should use scientific notation.
719 let scientific = if width == 0 {
720 true
721 } else {
722 if exp >= 0 {
723 // 765e3 --> 765000
724 // ^^^
725 // But we shouldn't make the number look more precise than it is.
726 exp as usize > width || digits + exp as usize > precision
727 } else {
728 // Power of the most significant digit.
729 let msd = exp + (digits - 1) as ExpInt;
730 if msd >= 0 {
731 // 765e-2 == 7.65
732 false
733 } else {
734 // 765e-5 == 0.00765
735 // ^ ^^
736 -msd as usize > width
737 }
738 }
739 };
740
741 // Scientific formatting is pretty straightforward.
742 if scientific {
743 exp += digits as ExpInt - 1;
744
745 f.write_char(buffer[digits - 1] as char)?;
746 f.write_char('.')?;
747 let truncate_zero = !alternate;
748 if digits == 1 && truncate_zero {
749 f.write_char('0')?;
750 } else {
751 for &d in buffer[..digits - 1].iter().rev() {
752 f.write_char(d as char)?;
753 }
754 }
755 // Fill with zeros up to precision.
756 if !truncate_zero && precision > digits - 1 {
757 for _ in 0..precision - digits + 1 {
758 f.write_char('0')?;
759 }
760 }
761 // For alternate we use lower 'e'.
762 f.write_char(if alternate { 'e' } else { 'E' })?;
763
764 // Exponent always at least two digits if we do not truncate zeros.
765 if truncate_zero {
766 write!(f, "{:+}", exp)?;
767 } else {
768 write!(f, "{:+03}", exp)?;
769 }
770
771 return Ok(());
772 }
773
774 // Non-scientific, positive exponents.
775 if exp >= 0 {
776 for &d in buffer.iter().rev() {
777 f.write_char(d as char)?;
778 }
779 for _ in 0..exp {
780 f.write_char('0')?;
781 }
782 return Ok(());
783 }
784
785 // Non-scientific, negative exponents.
786 let unit_place = -exp as usize;
787 if unit_place < digits {
788 for &d in buffer[unit_place..].iter().rev() {
789 f.write_char(d as char)?;
790 }
791 f.write_char('.')?;
792 for &d in buffer[..unit_place].iter().rev() {
793 f.write_char(d as char)?;
794 }
795 } else {
796 f.write_str("0.")?;
797 for _ in digits..unit_place {
798 f.write_char('0')?;
799 }
800 for &d in buffer.iter().rev() {
801 f.write_char(d as char)?;
802 }
803 }
804
805 Ok(())
806 }
807}
808
809impl<S: Semantics> fmt::Debug for IeeeFloat<S> {
810 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
811 write!(
812 f,
813 "{}({:?} | {}{:?} * 2^{})",
814 self,
815 self.category(),
816 if self.is_negative() { "-" } else { "+" },
817 self.sig,
818 self.exp
819 )
820 }
821}
822
823// HACK(eddyb) this logic is duplicated throughout the original C++ code,
824// but it's a bit too long to keep repeating in the Rust port for all ops.
825// FIXME(eddyb) find a better name/organization for all of this functionality
826// (`IeeeDefaultExceptionHandling` doesn't have a counterpart in the C++ code).
827struct IeeeDefaultExceptionHandling;
828impl IeeeDefaultExceptionHandling {
829 fn result_from_nan<S: Semantics>(mut r: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> {
830 assert!(r.is_nan());
831
832 let status = if r.is_signaling() {
833 // [IEEE Std 754-2008 6.2]:
834 // Under default exception handling, any operation signaling an invalid
835 // operation exception and for which a floating-point result is to be
836 // delivered shall deliver a quiet NaN.
837 let [sig] = &mut r.sig;
838 *sig |= if S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND {
839 // HACK(eddyb) remain bug-compatible with the original C++ code
840 // which doesn't appear to attempt avoiding creating pseudo-NaNs
841 // (see https://github.com/llvm/llvm-project/issues/63938).
842 S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK
843 } else {
844 S::QNAN_SIGNIFICAND
845 };
846
847 // [IEEE Std 754-2008 6.2]:
848 // Signaling NaNs shall be reserved operands that, under default exception
849 // handling, signal the invalid operation exception(see 7.2) for every
850 // general-computational and signaling-computational operation except for
851 // the conversions described in 5.12.
852 Status::INVALID_OP
853 } else {
854 // [IEEE Std 754-2008 6.2]:
855 // For an operation with quiet NaN inputs, other than maximum and minimum
856 // operations, if a floating-point result is to be delivered the result
857 // shall be a quiet NaN which should be one of the input NaNs.
858 // ...
859 // Every general-computational and quiet-computational operation involving
860 // one or more input NaNs, none of them signaling, shall signal no
861 // exception, except fusedMultiplyAdd might signal the invalid operation
862 // exception(see 7.2).
863 Status::OK
864 };
865 status.and(r)
866 }
867
868 fn binop_result_from_either_nan<S: Semantics>(a: IeeeFloat<S>, b: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> {
869 let r = match (a.category(), b.category()) {
870 (Category::NaN, _) => a,
871 (_, Category::NaN) => b,
872 _ => unreachable!(),
873 };
874 let mut status_and_r = Self::result_from_nan(r);
875 if b.is_signaling() {
876 status_and_r.status |= Status::INVALID_OP;
877 }
878 status_and_r
879 }
880}
881
882impl<S: Semantics> IeeeFloat<S> {
883 // HACK(eddyb) allow `Self::qnan` to be used from `IeeeFloat::NAN`.
884 // FIXME(eddyb) move back to the trait impl when that can be `const fn`.
885 const fn qnan(payload: Option<u128>) -> Self {
886 let sig = [S::NAN_SIGNIFICAND_BASE
887 | S::QNAN_SIGNIFICAND
888 | match payload {
889 // Zero out the excess bits of the significand.
890 Some(payload) => payload & S::NAN_PAYLOAD_MASK,
891 None => 0,
892 }];
893
894 let exp = match S::NONFINITE_BEHAVIOR {
895 NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1,
896 NonfiniteBehavior::NanOnly => S::MAX_EXP,
897 };
898
899 IeeeFloat {
900 sig,
901 exp,
902 read_only_category_do_not_mutate: Category::NaN,
903 read_only_sign_do_not_mutate: false,
904 marker: PhantomData,
905 }
906 }
907}
908
909impl<S: Semantics> Float for IeeeFloat<S> {
910 const BITS: usize = S::BITS;
911 const PRECISION: usize = S::PRECISION;
912 const MAX_EXP: ExpInt = S::MAX_EXP;
913 const MIN_EXP: ExpInt = S::MIN_EXP;
914
915 const ZERO: Self = IeeeFloat {
916 sig: [0],
917 exp: S::MIN_EXP - 1,
918 read_only_category_do_not_mutate: Category::Zero,
919 read_only_sign_do_not_mutate: false,
920 marker: PhantomData,
921 };
922
923 const INFINITY: Self = match S::NONFINITE_BEHAVIOR {
924 NonfiniteBehavior::IEEE754 => IeeeFloat {
925 sig: [0],
926 exp: S::MAX_EXP + 1,
927 read_only_category_do_not_mutate: Category::Infinity,
928 read_only_sign_do_not_mutate: false,
929 marker: PhantomData,
930 },
931
932 // There is no Inf, so make NaN instead.
933 NonfiniteBehavior::NanOnly => Self::NAN,
934 };
935
936 const NAN: Self = Self::qnan(None);
937
938 fn qnan(payload: Option<u128>) -> Self {
939 // NOTE(eddyb) this is not a recursive self-call, but rather it calls
940 // the `const fn` inherent mehtod (see above).
941 Self::qnan(payload)
942 }
943
944 fn snan(payload: Option<u128>) -> Self {
945 let mut snan = Self::qnan(payload);
946
947 let [sig] = &mut snan.sig;
948
949 // We always have to clear the QNaN bit to make it an SNaN.
950 *sig &= !(S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK);
951
952 // If there are no bits set in the payload, we have to set
953 // *something* to make it a NaN instead of an infinity;
954 // conventionally, this is the next bit down from the QNaN bit.
955 if *sig & S::NAN_PAYLOAD_MASK == 0 {
956 *sig |= (S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK) >> 1;
957 }
958
959 snan
960 }
961
962 fn largest() -> Self {
963 // We want (in interchange format):
964 // exponent = 1..10
965 // significand = 1..1
966 IeeeFloat {
967 sig: [((1 << S::PRECISION) - 1)
968 & match S::NONFINITE_BEHAVIOR {
969 // The largest number by magnitude in our format will be the floating point
970 // number with maximum exponent and with significand that is all ones.
971 NonfiniteBehavior::IEEE754 => !0,
972
973 // The largest number by magnitude in our format will be the floating point
974 // number with maximum exponent and with significand that is all ones except
975 // the LSB.
976 NonfiniteBehavior::NanOnly => !1,
977 }],
978 exp: S::MAX_EXP,
979 read_only_category_do_not_mutate: Category::Normal,
980 read_only_sign_do_not_mutate: false,
981 marker: PhantomData,
982 }
983 }
984
985 // We want (in interchange format):
986 // exponent = 0..0
987 // significand = 0..01
988 const SMALLEST: Self = IeeeFloat {
989 sig: [1],
990 exp: S::MIN_EXP,
991 read_only_category_do_not_mutate: Category::Normal,
992 read_only_sign_do_not_mutate: false,
993 marker: PhantomData,
994 };
995
996 fn smallest_normalized() -> Self {
997 // We want (in interchange format):
998 // exponent = 0..0
999 // significand = 10..0
1000 IeeeFloat {
1001 sig: [1 << (S::PRECISION - 1)],
1002 exp: S::MIN_EXP,
1003 read_only_category_do_not_mutate: Category::Normal,
1004 read_only_sign_do_not_mutate: false,
1005 marker: PhantomData,
1006 }
1007 }
1008
1009 fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
1010 let status = match (self.category(), rhs.category()) {
1011 (Category::NaN, _) | (_, Category::NaN) => {
1012 return IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs);
1013 }
1014
1015 (Category::Infinity, Category::Infinity) => {
1016 // Differently signed infinities can only be validly
1017 // subtracted.
1018 if self.is_negative() != rhs.is_negative() {
1019 self = Self::NAN;
1020 Status::INVALID_OP
1021 } else {
1022 Status::OK
1023 }
1024 }
1025
1026 // Sign may depend on rounding mode; handled below.
1027 (_, Category::Zero) | (Category::Infinity, Category::Normal) => Status::OK,
1028
1029 (Category::Zero, _) | (_, Category::Infinity) => {
1030 self = rhs;
1031 Status::OK
1032 }
1033
1034 (Category::Normal, Category::Normal) => {
1035 let mut sign = self.is_negative();
1036 let loss = sig::add_or_sub(
1037 &mut self.sig,
1038 &mut self.exp,
1039 &mut sign,
1040 &mut [rhs.sig[0]],
1041 rhs.exp,
1042 rhs.is_negative(),
1043 );
1044 self = self.with_sign(sign);
1045
1046 let status;
1047 self = unpack!(status=, self.normalize(round, loss));
1048
1049 // Can only be zero if we lost no fraction.
1050 assert!(self.category() != Category::Zero || loss == Loss::ExactlyZero);
1051
1052 status
1053 }
1054 };
1055
1056 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1057 // positive zero unless rounding to minus infinity, except that
1058 // adding two like-signed zeroes gives that zero.
1059 if self.category() == Category::Zero
1060 && (rhs.category() != Category::Zero || self.is_negative() != rhs.is_negative())
1061 {
1062 self = self.with_sign(round == Round::TowardNegative);
1063 }
1064
1065 status.and(self)
1066 }
1067
1068 // NOTE(eddyb) we can't rely on the `sub_r` method default implementation
1069 // because NaN handling needs the original `rhs` (i.e. without negation).
1070 fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
1071 match (self.category(), rhs.category()) {
1072 (Category::NaN, _) | (_, Category::NaN) => {
1073 IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
1074 }
1075
1076 _ => self.add_r(-rhs, round),
1077 }
1078 }
1079
1080 fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
1081 self = self.negate_if(rhs.is_negative());
1082
1083 match (self.category(), rhs.category()) {
1084 (Category::NaN, _) | (_, Category::NaN) => {
1085 self = self.negate_if(rhs.is_negative()); // restore the original sign
1086
1087 IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
1088 }
1089
1090 (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => {
1091 Status::INVALID_OP.and(Self::NAN)
1092 }
1093
1094 (Category::Infinity, _) | (_, Category::Infinity) => Status::OK.and(Self::INFINITY.copy_sign(self)),
1095
1096 (Category::Zero, _) | (_, Category::Zero) => Status::OK.and(Self::ZERO.copy_sign(self)),
1097
1098 (Category::Normal, Category::Normal) => {
1099 self.exp += rhs.exp;
1100 let mut wide_sig = [0; 2];
1101 let loss = sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION);
1102 self.sig = [wide_sig[0]];
1103 let mut status;
1104 self = unpack!(status=, self.normalize(round, loss));
1105 if loss != Loss::ExactlyZero {
1106 status |= Status::INEXACT;
1107 }
1108 status.and(self)
1109 }
1110 }
1111 }
1112
1113 fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
1114 // If and only if all arguments are normal do we need to do an
1115 // extended-precision calculation.
1116 if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() {
1117 let mut status;
1118 if self.is_finite_non_zero() && multiplicand.is_finite_non_zero() {
1119 // HACK(eddyb) this corresponds to the case where the C++ code
1120 // (`multiplySpecials`) is effectively a noop: no multiplication
1121 // is actually performed, because we're really only here to handle
1122 // the "infinite/NaN `addend`" special-case, which needs to ignore
1123 // the "finite * finite" multiplication entirely, instead of letting
1124 // it e.g. overflow into infinity (and trample over `addend`).
1125 assert!(!addend.is_finite());
1126 status = Status::OK;
1127 } else {
1128 self = unpack!(status=, self.mul_r(multiplicand, round));
1129 }
1130
1131 // FS can only be Status::OK or Status::INVALID_OP. There is no more work
1132 // to do in the latter case. The IEEE-754R standard says it is
1133 // implementation-defined in this case whether, if ADDEND is a
1134 // quiet NaN, we raise invalid op; this implementation does so.
1135 //
1136 // If we need to do the addition we can do so with normal
1137 // precision.
1138 if status == Status::OK {
1139 self = unpack!(status=, self.add_r(addend, round));
1140 }
1141 return status.and(self);
1142 }
1143
1144 // Post-multiplication sign, before addition.
1145 self = self.negate_if(multiplicand.is_negative());
1146
1147 // Allocate space for twice as many bits as the original significand, plus one
1148 // extra bit for the addition to overflow into.
1149 assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2);
1150 let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]);
1151
1152 let mut loss = Loss::ExactlyZero;
1153 let mut omsb = sig::omsb(&wide_sig);
1154 self.exp += multiplicand.exp;
1155
1156 // Assume the operands involved in the multiplication are single-precision
1157 // FP, and the two multiplicants are:
1158 // lhs = a23 . a22 ... a0 * 2^e1
1159 // rhs = b23 . b22 ... b0 * 2^e2
1160 // the result of multiplication is:
1161 // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1162 // Note that there are three significant bits at the left-hand side of the
1163 // radix point: two for the multiplication, and an overflow bit for the
1164 // addition (that will always be zero at this point). Move the radix point
1165 // toward left by two bits, and adjust exponent accordingly.
1166 self.exp += 2;
1167
1168 if addend.is_non_zero() {
1169 // Normalize our MSB to one below the top bit to allow for overflow.
1170 let ext_precision = 2 * S::PRECISION + 1;
1171 if omsb != ext_precision - 1 {
1172 assert!(ext_precision > omsb);
1173 sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb);
1174 }
1175
1176 // The intermediate result of the multiplication has "2 * S::PRECISION"
1177 // signicant bit; adjust the addend to be consistent with mul result.
1178 let mut ext_addend_sig = [addend.sig[0], 0];
1179
1180 // Extend the addend significand to ext_precision - 1. This guarantees
1181 // that the high bit of the significand is zero (same as wide_sig),
1182 // so the addition will overflow (if it does overflow at all) into the top bit.
1183 sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION);
1184
1185 let mut sign = self.is_negative();
1186 loss = sig::add_or_sub(
1187 &mut wide_sig,
1188 &mut self.exp,
1189 &mut sign,
1190 &mut ext_addend_sig,
1191 addend.exp + 1,
1192 addend.is_negative(),
1193 );
1194 self = self.with_sign(sign);
1195
1196 omsb = sig::omsb(&wide_sig);
1197 }
1198
1199 // Convert the result having "2 * S::PRECISION" significant-bits back to the one
1200 // having "S::PRECISION" significant-bits. First, move the radix point from
1201 // poision "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be
1202 // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION".
1203 self.exp -= S::PRECISION as ExpInt + 1;
1204
1205 // In case MSB resides at the left-hand side of radix point, shift the
1206 // mantissa right by some amount to make sure the MSB reside right before
1207 // the radix point (i.e. "MSB . rest-significant-bits").
1208 if omsb > S::PRECISION {
1209 let bits = omsb - S::PRECISION;
1210 loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss);
1211 }
1212
1213 self.sig[0] = wide_sig[0];
1214
1215 let mut status;
1216 self = unpack!(status=, self.normalize(round, loss));
1217 if loss != Loss::ExactlyZero {
1218 status |= Status::INEXACT;
1219 }
1220
1221 // If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1222 // positive zero unless rounding to minus infinity, except that
1223 // adding two like-signed zeroes gives that zero.
1224 if self.category() == Category::Zero
1225 && !status.intersects(Status::UNDERFLOW)
1226 && self.is_negative() != addend.is_negative()
1227 {
1228 self = self.with_sign(round == Round::TowardNegative);
1229 }
1230
1231 status.and(self)
1232 }
1233
1234 fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
1235 self = self.negate_if(rhs.is_negative());
1236
1237 match (self.category(), rhs.category()) {
1238 (Category::NaN, _) | (_, Category::NaN) => {
1239 self = self.negate_if(rhs.is_negative()); // restore the original sign
1240
1241 IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
1242 }
1243
1244 (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => {
1245 Status::INVALID_OP.and(Self::NAN)
1246 }
1247
1248 (Category::Infinity | Category::Zero, _) => Status::OK.and(self),
1249
1250 (_, Category::Infinity) => Status::OK.and(Self::ZERO.copy_sign(self)),
1251
1252 (_, Category::Zero) => Status::DIV_BY_ZERO.and(Self::INFINITY.copy_sign(self)),
1253
1254 (Category::Normal, Category::Normal) => {
1255 self.exp -= rhs.exp;
1256 let dividend = self.sig[0];
1257 let loss = sig::div(&mut self.sig, &mut self.exp, &mut [dividend], &mut [rhs.sig[0]], S::PRECISION);
1258 let mut status;
1259 self = unpack!(status=, self.normalize(round, loss));
1260 if loss != Loss::ExactlyZero {
1261 status |= Status::INEXACT;
1262 }
1263 status.and(self)
1264 }
1265 }
1266 }
1267
1268 fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
1269 match (self.category(), rhs.category()) {
1270 (Category::NaN, _) | (_, Category::NaN) => {
1271 IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
1272 }
1273
1274 (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
1275
1276 (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self),
1277
1278 (Category::Normal, Category::Normal) => {
1279 let mut status;
1280
1281 let mut x = self;
1282 let mut p = rhs;
1283
1284 // Make sure the current value is less than twice the denom. If the addition
1285 // did not succeed (an overflow has happened), which means that the finite
1286 // value we currently posses must be less than twice the denom (as we are
1287 // using the same semantics).
1288 let p2 = unpack!(status=, p + p);
1289 if status == Status::OK {
1290 x = unpack!(status=, x.c_fmod(p2));
1291 assert_eq!(status, Status::OK);
1292 }
1293
1294 // Lets work with absolute numbers.
1295 p = p.abs();
1296 x = x.abs();
1297
1298 //
1299 // To calculate the remainder we use the following scheme.
1300 //
1301 // The remainder is defained as follows:
1302 //
1303 // remainder = numer - rquot * denom = x - r * p
1304 //
1305 // Where r is the result of: x/p, rounded toward the nearest integral value
1306 // (with halfway cases rounded toward the even number).
1307 //
1308 // Currently, (after x mod 2p):
1309 // r is the number of 2p's present inside x, which is inherently, an even
1310 // number of p's.
1311 //
1312 // We may split the remaining calculation into 4 options:
1313 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1314 // - if x == 0.5p then we round to the nearest even number which is 0, and we
1315 // are done as well.
1316 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1317 // to subtract 1p at least once.
1318 // - if x >= p then we must subtract p at least once, as x must be a
1319 // remainder.
1320 //
1321 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1322 //
1323 // We can now split the remaining calculation to the following 3 options:
1324 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1325 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1326 // must round up to the next even number. so we must subtract p once more.
1327 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1328 // integral, and subtract p once more.
1329 //
1330
1331 // Return `x * 2` at no loss of precision (i.e. no overflow).
1332 //
1333 // HACK(eddyb) this may seem a bit sketchy because it can return
1334 // values that `normalize` would've replaced with `overflow_result`
1335 // (e.g. overflowing to infinity), but the result is only used for
1336 // comparisons, where both sides of such comparison can be seen
1337 // as transiently having a larger *effective* exponent range.
1338 let lossless_2x = |mut x: Self| {
1339 x.exp += 1;
1340
1341 if x.exp >= Self::MAX_EXP {
1342 // HACK(eddyb) skip lossy `normalize` (see above).
1343 } else {
1344 let status;
1345 x = unpack!(status=, x.normalize(Round::NearestTiesToEven, Loss::ExactlyZero));
1346 assert_eq!(status, Status::OK);
1347 }
1348
1349 x
1350 };
1351
1352 if lossless_2x(x) > p {
1353 x = unpack!(status=, x - p);
1354 assert_eq!(status, Status::OK);
1355
1356 if lossless_2x(x) >= p {
1357 x = unpack!(status=, x - p);
1358 assert_eq!(status, Status::OK);
1359 }
1360 }
1361
1362 if x.is_zero() {
1363 Status::OK.and(x.copy_sign(self)) // IEEE754 requires this
1364 } else {
1365 Status::OK.and(x.negate_if(self.is_negative()))
1366 }
1367 }
1368 }
1369 }
1370
1371 fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> {
1372 match (self.category(), rhs.category()) {
1373 (Category::NaN, _) | (_, Category::NaN) => {
1374 IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs)
1375 }
1376
1377 (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN),
1378
1379 (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self),
1380
1381 (Category::Normal, Category::Normal) => {
1382 let orig = self;
1383
1384 while self.is_finite_non_zero()
1385 && rhs.is_finite_non_zero()
1386 && self.cmp_abs_normal(rhs) != Ordering::Less
1387 {
1388 let exp = self.ilogb() - rhs.ilogb();
1389 let mut v = rhs.scalbn(exp);
1390 // `v` can overflow to NaN with `NonfiniteBehavior::NanOnly`, so explicitly
1391 // check for it.
1392 if v.is_nan() || self.cmp_abs_normal(v) == Ordering::Less {
1393 v = rhs.scalbn(exp - 1);
1394 }
1395 v = v.copy_sign(self);
1396
1397 let status;
1398 self = unpack!(status=, self - v);
1399 assert_eq!(status, Status::OK);
1400 }
1401 if self.is_zero() {
1402 self = self.copy_sign(orig);
1403 }
1404 Status::OK.and(self)
1405 }
1406 }
1407 }
1408
1409 fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
1410 match self.category() {
1411 Category::NaN => IeeeDefaultExceptionHandling::result_from_nan(self),
1412
1413 // [IEEE Std 754-2008 6.1]:
1414 // The behavior of infinity in floating-point arithmetic is derived from the
1415 // limiting cases of real arithmetic with operands of arbitrarily
1416 // large magnitude, when such a limit exists.
1417 // ...
1418 // Operations on infinite operands are usually exact and therefore signal no
1419 // exceptions ...
1420 Category::Infinity => Status::OK.and(self),
1421
1422 // [IEEE Std 754-2008 6.3]:
1423 // ... the sign of the result of conversions, the quantize operation, the
1424 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
1425 // the sign of the first or only operand.
1426 Category::Zero => Status::OK.and(self),
1427
1428 Category::Normal => {
1429 // If the exponent is large enough, we know that this value is already
1430 // integral, and the arithmetic below would potentially cause it to saturate
1431 // to +/-Inf. Bail out early instead.
1432 if self.exp + 1 >= S::PRECISION as ExpInt {
1433 return Status::OK.and(self);
1434 }
1435
1436 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1437 // precision of our format, and then subtract it back off again. The choice
1438 // of rounding modes for the addition/subtraction determines the rounding mode
1439 // for our integral rounding as well.
1440 // NOTE: When the input value is negative, we do subtraction followed by
1441 // addition instead.
1442 assert!(S::PRECISION <= 128);
1443 let mut status;
1444 let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1)));
1445 assert_eq!(status, Status::OK);
1446 let magic_const = magic_const.copy_sign(self);
1447
1448 let mut r = self;
1449 r = unpack!(status=, r.add_r(magic_const, round));
1450
1451 // Current value and 'MagicConstant' are both integers, so the result of the
1452 // subtraction is always exact according to Sterbenz' lemma.
1453 r = r.sub_r(magic_const, round).value;
1454
1455 // Restore the input sign to handle the case of zero result
1456 // correctly.
1457 status.and(r.copy_sign(self))
1458 }
1459 }
1460 }
1461
1462 fn next_up(mut self) -> StatusAnd<Self> {
1463 // Compute nextUp(x), handling each float category separately.
1464 match self.category() {
1465 Category::Infinity => {
1466 if self.is_negative() {
1467 // nextUp(-inf) = -largest
1468 Status::OK.and(-Self::largest())
1469 } else {
1470 // nextUp(+inf) = +inf
1471 Status::OK.and(self)
1472 }
1473 }
1474 Category::NaN => {
1475 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
1476 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
1477 // change the payload.
1478 if self.is_signaling() {
1479 // For consistency, propagate the sign of the sNaN to the qNaN.
1480 Status::INVALID_OP.and(Self::NAN.copy_sign(self))
1481 } else {
1482 Status::OK.and(self)
1483 }
1484 }
1485 Category::Zero => {
1486 // nextUp(pm 0) = +smallest
1487 Status::OK.and(Self::SMALLEST)
1488 }
1489 Category::Normal => {
1490 // nextUp(-smallest) = -0
1491 if self.is_smallest() && self.is_negative() {
1492 return Status::OK.and(-Self::ZERO);
1493 }
1494
1495 // nextUp(largest) == INFINITY
1496 if self.is_largest() && !self.is_negative() {
1497 return Status::OK.and(Self::INFINITY);
1498 }
1499
1500 // Excluding the integral bit. This allows us to test for binade boundaries.
1501 let sig_mask = (1 << (S::PRECISION - 1)) - 1;
1502
1503 // nextUp(normal) == normal + inc.
1504 if self.is_negative() {
1505 // If we are negative, we need to decrement the significand.
1506
1507 // We only cross a binade boundary that requires adjusting the exponent
1508 // if:
1509 // 1. exponent != S::MIN_EXP. This implies we are not in the
1510 // smallest binade or are dealing with denormals.
1511 // 2. Our significand excluding the integral bit is all zeros.
1512 let crossing_binade_boundary = self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0;
1513
1514 // Decrement the significand.
1515 //
1516 // We always do this since:
1517 // 1. If we are dealing with a non-binade decrement, by definition we
1518 // just decrement the significand.
1519 // 2. If we are dealing with a normal -> normal binade decrement, since
1520 // we have an explicit integral bit the fact that all bits but the
1521 // integral bit are zero implies that subtracting one will yield a
1522 // significand with 0 integral bit and 1 in all other spots. Thus we
1523 // must just adjust the exponent and set the integral bit to 1.
1524 // 3. If we are dealing with a normal -> denormal binade decrement,
1525 // since we set the integral bit to 0 when we represent denormals, we
1526 // just decrement the significand.
1527 sig::decrement(&mut self.sig);
1528
1529 if crossing_binade_boundary {
1530 // Our result is a normal number. Do the following:
1531 // 1. Set the integral bit to 1.
1532 // 2. Decrement the exponent.
1533 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1534 self.exp -= 1;
1535 }
1536 } else {
1537 // If we are positive, we need to increment the significand.
1538
1539 // We only cross a binade boundary that requires adjusting the exponent if
1540 // the input is not a denormal and all of said input's significand bits
1541 // are set. If all of said conditions are true: clear the significand, set
1542 // the integral bit to 1, and increment the exponent. If we have a
1543 // denormal always increment since moving denormals and the numbers in the
1544 // smallest normal binade have the same exponent in our representation.
1545 let crossing_binade_boundary = !self.is_denormal() && self.sig[0] & sig_mask == sig_mask;
1546
1547 if crossing_binade_boundary {
1548 self.sig = [0];
1549 sig::set_bit(&mut self.sig, S::PRECISION - 1);
1550 assert_ne!(
1551 self.exp,
1552 S::MAX_EXP,
1553 "We can not increment an exponent beyond the MAX_EXP \
1554 allowed by the given floating point semantics."
1555 );
1556 self.exp += 1;
1557 } else {
1558 sig::increment(&mut self.sig);
1559 }
1560 }
1561 Status::OK.and(self)
1562 }
1563 }
1564 }
1565
1566 fn from_bits(input: u128) -> Self {
1567 // Dispatch to semantics.
1568 S::from_bits(input)
1569 }
1570
1571 fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
1572 IeeeFloat {
1573 sig: [input],
1574 exp: S::PRECISION as ExpInt - 1,
1575 read_only_category_do_not_mutate: Category::Normal,
1576 read_only_sign_do_not_mutate: false,
1577 marker: PhantomData,
1578 }
1579 .normalize(round, Loss::ExactlyZero)
1580 }
1581
1582 fn from_str_r(s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> {
1583 if s.is_empty() {
1584 return Err(ParseError("Invalid string length"));
1585 }
1586
1587 // Handle a leading minus sign.
1588 let (minus, s) = s.strip_prefix("-").map(|s| (true, s)).unwrap_or((false, s));
1589 let from_abs = |r: Self| r.negate_if(minus);
1590
1591 // Handle a leading plus sign (mutually exclusive with minus).
1592 let (explicit_plus, s) = s
1593 .strip_prefix("+")
1594 .filter(|_| !minus)
1595 .map(|s| (true, s))
1596 .unwrap_or((false, s));
1597
1598 // Handle special cases.
1599 let special = match s {
1600 "Inf" if minus || explicit_plus => Some(Self::INFINITY),
1601
1602 "inf" | "INFINITY" if !explicit_plus => Some(Self::INFINITY),
1603
1604 _ if !explicit_plus => {
1605 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
1606 let (is_signaling, s) = s.strip_prefix(['s', 'S']).map_or((false, s), |s| (true, s));
1607
1608 s.strip_prefix("nan").or_else(|| s.strip_prefix("NaN")).and_then(|s| {
1609 // Allow the payload to be inside parentheses.
1610 let s = s
1611 .strip_prefix("(")
1612 .and_then(|s| {
1613 // Parentheses should be balanced (and not empty).
1614 s.strip_suffix(")").filter(|s| !s.is_empty())
1615 })
1616 .unwrap_or(s);
1617
1618 let payload = if s.is_empty() {
1619 // A NaN without payload.
1620 None
1621 } else {
1622 // Determine the payload number's radix.
1623 let (radix, s) = s
1624 .strip_prefix("0")
1625 .filter(|s| !s.is_empty())
1626 .map(|s| s.strip_prefix(['x', 'X']).map(|s| (16, s)).unwrap_or((8, s)))
1627 .unwrap_or((10, s));
1628
1629 // Parse the payload and make the NaN.
1630 Some(u128::from_str_radix(s, radix).ok()?)
1631 };
1632
1633 Some(if is_signaling {
1634 Self::snan(payload)
1635 } else {
1636 Self::qnan(payload)
1637 })
1638 })
1639 }
1640
1641 _ => None,
1642 };
1643 if let Some(r) = special {
1644 return Ok(Status::OK.and(from_abs(r)));
1645 }
1646
1647 if s.is_empty() {
1648 return Err(ParseError("String has no digits"));
1649 }
1650
1651 // Adjust the rounding mode for the absolute value below.
1652 round = round.negate_if(minus);
1653
1654 let (is_hex, s) = s
1655 .strip_prefix("0")
1656 .and_then(|s| s.strip_prefix(['x', 'X']))
1657 .map(|s| (true, s))
1658 .unwrap_or((false, s));
1659
1660 let r = if is_hex {
1661 if s.is_empty() {
1662 return Err(ParseError("Invalid string"));
1663 }
1664 Self::from_hexadecimal_string(s, round)?
1665 } else {
1666 Self::from_decimal_string(s, round)?
1667 };
1668
1669 Ok(r.map(from_abs))
1670 }
1671
1672 fn to_bits(self) -> u128 {
1673 // Dispatch to semantics.
1674 S::to_bits(self)
1675 }
1676
1677 fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
1678 // The result of trying to convert a number too large.
1679 let overflow = if self.is_negative() {
1680 // Negative numbers cannot be represented as unsigned.
1681 0
1682 } else {
1683 // Largest unsigned integer of the given width.
1684 !0 >> (128 - width)
1685 };
1686
1687 *is_exact = false;
1688
1689 match self.category() {
1690 Category::NaN => Status::INVALID_OP.and(0),
1691
1692 Category::Infinity => Status::INVALID_OP.and(overflow),
1693
1694 Category::Zero => {
1695 // Negative zero can't be represented as an int.
1696 *is_exact = !self.is_negative();
1697 Status::OK.and(0)
1698 }
1699
1700 Category::Normal => {
1701 let mut r = 0;
1702
1703 // Step 1: place our absolute value, with any fraction truncated, in
1704 // the destination.
1705 let truncated_bits = if self.exp < 0 {
1706 // Our absolute value is less than one; truncate everything.
1707 // For exponent -1 the integer bit represents .5, look at that.
1708 // For smaller exponents leftmost truncated bit is 0.
1709 S::PRECISION - 1 + (-self.exp) as usize
1710 } else {
1711 // We want the most significant (exponent + 1) bits; the rest are
1712 // truncated.
1713 let bits = self.exp as usize + 1;
1714
1715 // Hopelessly large in magnitude?
1716 if bits > width {
1717 return Status::INVALID_OP.and(overflow);
1718 }
1719
1720 if bits < S::PRECISION {
1721 // We truncate (S::PRECISION - bits) bits.
1722 r = self.sig[0] >> (S::PRECISION - bits);
1723 S::PRECISION - bits
1724 } else {
1725 // We want at least as many bits as are available.
1726 r = self.sig[0] << (bits - S::PRECISION);
1727 0
1728 }
1729 };
1730
1731 // Step 2: work out any lost fraction, and increment the absolute
1732 // value if we would round away from zero.
1733 let mut loss = Loss::ExactlyZero;
1734 if truncated_bits > 0 {
1735 loss = Loss::through_truncation(&self.sig, truncated_bits);
1736 if loss != Loss::ExactlyZero && self.round_away_from_zero(round, loss, truncated_bits) {
1737 r = r.wrapping_add(1);
1738 if r == 0 {
1739 return Status::INVALID_OP.and(overflow); // Overflow.
1740 }
1741 }
1742 }
1743
1744 // Step 3: check if we fit in the destination.
1745 if r > overflow {
1746 return Status::INVALID_OP.and(overflow);
1747 }
1748
1749 if loss == Loss::ExactlyZero {
1750 *is_exact = true;
1751 Status::OK.and(r)
1752 } else {
1753 Status::INEXACT.and(r)
1754 }
1755 }
1756 }
1757 }
1758
1759 fn cmp_abs_normal(self, rhs: Self) -> Ordering {
1760 assert!(self.is_finite_non_zero());
1761 assert!(rhs.is_finite_non_zero());
1762
1763 // If exponents are equal, do an unsigned comparison of the significands.
1764 self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig))
1765 }
1766
1767 fn bitwise_eq(self, rhs: Self) -> bool {
1768 if self.category() != rhs.category() || self.is_negative() != rhs.is_negative() {
1769 return false;
1770 }
1771
1772 if self.category() == Category::Zero || self.category() == Category::Infinity {
1773 return true;
1774 }
1775
1776 if self.is_finite_non_zero() && self.exp != rhs.exp {
1777 return false;
1778 }
1779
1780 self.sig == rhs.sig
1781 }
1782
1783 fn is_negative(self) -> bool {
1784 self.read_only_sign_do_not_mutate
1785 }
1786
1787 fn is_denormal(self) -> bool {
1788 self.is_finite_non_zero() && self.exp == S::MIN_EXP && !sig::get_bit(&self.sig, S::PRECISION - 1)
1789 }
1790
1791 fn is_signaling(self) -> bool {
1792 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
1793 // first bit of the trailing significand being 0.
1794 self.is_nan() && self.sig[0] & S::QNAN_SIGNIFICAND != S::QNAN_SIGNIFICAND
1795 }
1796
1797 fn category(self) -> Category {
1798 self.read_only_category_do_not_mutate
1799 }
1800
1801 fn get_exact_inverse(self) -> Option<Self> {
1802 // Special floats and denormals have no exact inverse.
1803 if !self.is_finite_non_zero() {
1804 return None;
1805 }
1806
1807 // Check that the number is a power of two by making sure that only the
1808 // integer bit is set in the significand.
1809 if self.sig != [1 << (S::PRECISION - 1)] {
1810 return None;
1811 }
1812
1813 // Get the inverse.
1814 let mut reciprocal = Self::from_u128(1).value;
1815 let status;
1816 reciprocal = unpack!(status=, reciprocal / self);
1817 if status != Status::OK {
1818 return None;
1819 }
1820
1821 // Avoid multiplication with a denormal, it is not safe on all platforms and
1822 // may be slower than a normal division.
1823 if reciprocal.is_denormal() {
1824 return None;
1825 }
1826
1827 assert!(reciprocal.is_finite_non_zero());
1828 assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]);
1829
1830 Some(reciprocal)
1831 }
1832
1833 fn ilogb(mut self) -> ExpInt {
1834 if self.is_nan() {
1835 return IEK_NAN;
1836 }
1837 if self.is_zero() {
1838 return IEK_ZERO;
1839 }
1840 if self.is_infinite() {
1841 return IEK_INF;
1842 }
1843 if !self.is_denormal() {
1844 return self.exp;
1845 }
1846
1847 let sig_bits = (S::PRECISION - 1) as ExpInt;
1848 self.exp += sig_bits;
1849 self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value;
1850 self.exp - sig_bits
1851 }
1852
1853 fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self {
1854 // If exp is wildly out-of-scale, simply adding it to self.exp will
1855 // overflow; clamp it to a safe range before adding, but ensure that the range
1856 // is large enough that the clamp does not change the result. The range we
1857 // need to support is the difference between the largest possible exponent and
1858 // the normalized exponent of half the smallest denormal.
1859
1860 let sig_bits = (S::PRECISION - 1) as i32;
1861 let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1;
1862
1863 // Clamp to one past the range ends to let normalize handle overflow.
1864 let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change);
1865 self.exp = self.exp.saturating_add(exp_change as ExpInt);
1866 self = self.normalize(round, Loss::ExactlyZero).value;
1867 if self.is_nan() {
1868 self = IeeeDefaultExceptionHandling::result_from_nan(self).value;
1869 }
1870 self
1871 }
1872
1873 fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self {
1874 *exp = self.ilogb();
1875
1876 // Quiet signalling nans.
1877 if *exp == IEK_NAN {
1878 self = IeeeDefaultExceptionHandling::result_from_nan(self).value;
1879 return self;
1880 }
1881
1882 if *exp == IEK_INF {
1883 return self;
1884 }
1885
1886 // 1 is added because frexp is defined to return a normalized fraction in
1887 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
1888 if *exp == IEK_ZERO {
1889 *exp = 0;
1890 } else {
1891 *exp += 1;
1892 }
1893 self.scalbn_r(-*exp, round)
1894 }
1895}
1896
1897impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> {
1898 fn convert_r(mut self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> {
1899 // FIXME(eddyb) move this into the return result.
1900 *loses_info = false;
1901
1902 // x86 has some unusual NaNs which cannot be represented in any other
1903 // format; note them here.
1904 fn is_x87_double_extended<S: Semantics>() -> bool {
1905 S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND
1906 }
1907 let loses_x87_pseudo_nan = is_x87_double_extended::<S>()
1908 && !is_x87_double_extended::<T>()
1909 && self.category() == Category::NaN
1910 && (self.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND;
1911
1912 // NOTE(eddyb) this is done early because the target semantics may not
1913 // actually support encoding the distinction between SNaN and QNaN.
1914 //
1915 // Convert of sNaN creates qNaN and raises an exception (invalid op).
1916 // This also guarantees that a sNaN does not become Inf on a truncation
1917 // that loses all payload bits.
1918 let mut status = Status::OK;
1919 if self.is_nan() {
1920 self = unpack!(status|=, IeeeDefaultExceptionHandling::result_from_nan(self));
1921 }
1922
1923 let Self { mut sig, mut exp, .. } = self;
1924
1925 // If this is a truncation of a denormal number, and the target semantics
1926 // has larger exponent range than the source semantics (this can happen
1927 // when truncating from PowerPC double-double to double format), the
1928 // right shift could lose result mantissa bits. Adjust exponent instead
1929 // of performing excessive shift.
1930 // Also do a similar trick in case shifting denormal would produce zero
1931 // significand as this case isn't handled correctly by normalize.
1932 let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt;
1933 if shift < 0 && self.is_finite_non_zero() {
1934 let omsb = sig::omsb(&sig) as ExpInt;
1935 let mut exp_change = omsb - S::PRECISION as ExpInt;
1936 if exp + exp_change < T::MIN_EXP {
1937 exp_change = T::MIN_EXP - exp;
1938 }
1939 if exp_change < shift {
1940 exp_change = shift;
1941 }
1942 if exp_change < 0 {
1943 shift -= exp_change;
1944 exp += exp_change;
1945 } else if omsb <= -shift {
1946 exp_change = omsb + shift - 1; // leave at least one bit set
1947 shift -= exp_change;
1948 exp += exp_change;
1949 }
1950 }
1951
1952 // If this is a truncation, perform the shift.
1953 let mut loss = Loss::ExactlyZero;
1954 if shift < 0 && (self.is_finite_non_zero() || self.category() == Category::NaN && S::NAN_PAYLOAD_MASK != 0) {
1955 loss = sig::shift_right(&mut sig, &mut 0, -shift as usize);
1956 }
1957
1958 // If this is an extension, perform the shift.
1959 if shift > 0 && (self.is_finite_non_zero() || self.category() == Category::NaN) {
1960 sig::shift_left(&mut sig, &mut 0, shift as usize);
1961 }
1962
1963 let r = match self.category() {
1964 Category::Normal => {
1965 let r = IeeeFloat::<T> {
1966 sig,
1967 exp,
1968 read_only_category_do_not_mutate: self.category(),
1969 read_only_sign_do_not_mutate: self.is_negative(),
1970 marker: PhantomData,
1971 };
1972 unpack!(status|=, r.normalize(round, loss))
1973 }
1974 Category::NaN => {
1975 *loses_info = loss != Loss::ExactlyZero
1976 || loses_x87_pseudo_nan
1977 || S::NAN_PAYLOAD_MASK != 0 && T::NAN_PAYLOAD_MASK == 0;
1978
1979 IeeeFloat::<T>::qnan(Some(sig[0])).with_sign(self.is_negative())
1980 }
1981 Category::Infinity => IeeeFloat::<T>::INFINITY.with_sign(self.is_negative()),
1982 Category::Zero => IeeeFloat::<T>::ZERO.with_sign(self.is_negative()),
1983 };
1984
1985 // NOTE(eddyb) this catches all cases of e.g. ±Inf turning into NaN,
1986 // because of `T::NONFINITE_BEHAVIOR` not being `IEEE754`.
1987 if matches!(self.category(), Category::Infinity | Category::Zero)
1988 && (r.category() != self.category() || r.is_negative() != self.is_negative())
1989 {
1990 status |= Status::INEXACT;
1991 }
1992
1993 *loses_info |= (status - Status::INVALID_OP) != Status::OK;
1994
1995 status.and(r)
1996 }
1997}
1998
1999impl<S: Semantics> IeeeFloat<S> {
2000 /// Handle positive overflow. We either return infinity or
2001 /// the largest finite number. For negative overflow,
2002 /// negate the `round` argument before calling.
2003 fn overflow_result(round: Round) -> StatusAnd<Self> {
2004 match round {
2005 // Infinity?
2006 Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => {
2007 (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY)
2008 }
2009 // Otherwise we become the largest finite number.
2010 Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()),
2011 }
2012 }
2013
2014 /// Returns TRUE if, when truncating the current number, with BIT the
2015 /// new LSB, with the given lost fraction and rounding mode, the result
2016 /// would need to be rounded away from zero (i.e., by increasing the
2017 /// signficand). This routine must work for Category::Zero of both signs, and
2018 /// Category::Normal numbers.
2019 fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool {
2020 // NaNs and infinities should not have lost fractions.
2021 assert!(self.is_finite_non_zero() || self.is_zero());
2022
2023 // Current callers never pass this so we don't handle it.
2024 assert_ne!(loss, Loss::ExactlyZero);
2025
2026 match round {
2027 Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf,
2028 Round::NearestTiesToEven => {
2029 if loss == Loss::MoreThanHalf {
2030 return true;
2031 }
2032
2033 // Our zeros don't have a significand to test.
2034 if loss == Loss::ExactlyHalf && self.category() != Category::Zero {
2035 return sig::get_bit(&self.sig, bit);
2036 }
2037
2038 false
2039 }
2040 Round::TowardZero => false,
2041 Round::TowardPositive => !self.is_negative(),
2042 Round::TowardNegative => self.is_negative(),
2043 }
2044 }
2045
2046 fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> {
2047 if !self.is_finite_non_zero() {
2048 return Status::OK.and(self);
2049 }
2050
2051 // Before rounding normalize the exponent of Category::Normal numbers.
2052 let mut omsb = sig::omsb(&self.sig);
2053
2054 if omsb > 0 {
2055 // OMSB is numbered from 1. We want to place it in the integer
2056 // bit numbered PRECISION if possible, with a compensating change in
2057 // the exponent.
2058 let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt);
2059
2060 // If the resulting exponent is too high, overflow according to
2061 // the rounding mode.
2062 if final_exp > S::MAX_EXP {
2063 let round = round.negate_if(self.is_negative());
2064 return Self::overflow_result(round).map(|r| r.copy_sign(self));
2065 }
2066
2067 // Subnormal numbers have exponent MIN_EXP, and their MSB
2068 // is forced based on that.
2069 if final_exp < S::MIN_EXP {
2070 final_exp = S::MIN_EXP;
2071 }
2072
2073 // Shifting left is easy as we don't lose precision.
2074 if final_exp < self.exp {
2075 assert_eq!(loss, Loss::ExactlyZero);
2076
2077 let exp_change = (self.exp - final_exp) as usize;
2078 sig::shift_left(&mut self.sig, &mut self.exp, exp_change);
2079
2080 return Status::OK.and(self);
2081 }
2082
2083 // Shift right and capture any new lost fraction.
2084 if final_exp > self.exp {
2085 let exp_change = (final_exp - self.exp) as usize;
2086 loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss);
2087
2088 // Keep OMSB up-to-date.
2089 omsb = omsb.saturating_sub(exp_change);
2090 }
2091 }
2092
2093 // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2094 // the largest significand of `MAX_EXP` (which also has normals), though
2095 // comparing significands needs to ignore the integer bit `NAN` lacks.
2096 if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
2097 && self.exp == Self::NAN.exp
2098 && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
2099 {
2100 return Self::overflow_result(round).map(|r| r.copy_sign(self));
2101 }
2102
2103 // Now round the number according to round given the lost
2104 // fraction.
2105
2106 // As specified in IEEE 754, since we do not trap we do not report
2107 // underflow for exact results.
2108 if loss == Loss::ExactlyZero {
2109 // Canonicalize zeros.
2110 if omsb == 0 {
2111 self = Self::ZERO.copy_sign(self);
2112 }
2113
2114 return Status::OK.and(self);
2115 }
2116
2117 // Increment the significand if we're rounding away from zero.
2118 if self.round_away_from_zero(round, loss, 0) {
2119 if omsb == 0 {
2120 self.exp = S::MIN_EXP;
2121 }
2122
2123 // We should never overflow.
2124 assert_eq!(sig::increment(&mut self.sig), 0);
2125 omsb = sig::omsb(&self.sig);
2126
2127 // Did the significand increment overflow?
2128 if omsb == S::PRECISION + 1 {
2129 // Renormalize by incrementing the exponent and shifting our
2130 // significand right one. However if we already have the
2131 // maximum exponent we overflow to infinity.
2132 if self.exp == S::MAX_EXP {
2133 return (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY.copy_sign(self));
2134 }
2135
2136 let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1);
2137
2138 return Status::INEXACT.and(self);
2139 }
2140
2141 // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up
2142 // the largest significand of `MAX_EXP` (which also has normals), though
2143 // comparing significands needs to ignore the integer bit `NAN` lacks.
2144 if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly
2145 && self.exp == Self::NAN.exp
2146 && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig
2147 {
2148 return Self::overflow_result(round).map(|r| r.copy_sign(self));
2149 }
2150 }
2151
2152 // The normal case - we were and are not denormal, and any
2153 // significand increment above didn't overflow.
2154 if omsb == S::PRECISION {
2155 return Status::INEXACT.and(self);
2156 }
2157
2158 // We have a non-zero denormal.
2159 assert!(omsb < S::PRECISION);
2160
2161 // Canonicalize zeros.
2162 if omsb == 0 {
2163 self = Self::ZERO.copy_sign(self);
2164 }
2165
2166 // The Category::Zero case is a denormal that underflowed to zero.
2167 (Status::UNDERFLOW | Status::INEXACT).and(self)
2168 }
2169
2170 fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
2171 let mut r = IeeeFloat {
2172 sig: [0],
2173 exp: 0,
2174 read_only_category_do_not_mutate: Category::Normal,
2175 read_only_sign_do_not_mutate: false,
2176 marker: PhantomData,
2177 };
2178
2179 let mut any_digits = false;
2180 let mut has_exp = false;
2181 let mut bit_pos = LIMB_BITS as isize;
2182 let mut loss = None;
2183
2184 // Without leading or trailing zeros, irrespective of the dot.
2185 let mut first_sig_digit = None;
2186 let mut dot = s.len();
2187
2188 for (p, c) in s.char_indices() {
2189 // Skip leading zeros and any (hexa)decimal point.
2190 if c == '.' {
2191 if dot != s.len() {
2192 return Err(ParseError("String contains multiple dots"));
2193 }
2194 dot = p;
2195 } else if let Some(hex_value) = c.to_digit(16) {
2196 any_digits = true;
2197
2198 if first_sig_digit.is_none() {
2199 if hex_value == 0 {
2200 continue;
2201 }
2202 first_sig_digit = Some(p);
2203 }
2204
2205 // Store the number while we have space.
2206 bit_pos -= 4;
2207 if bit_pos >= 0 {
2208 r.sig[0] |= (hex_value as Limb) << bit_pos;
2209 } else {
2210 // If zero or one-half (the hexadecimal digit 8) are followed
2211 // by non-zero, they're a little more than zero or one-half.
2212 if let Some(ref mut loss) = loss {
2213 if hex_value != 0 {
2214 if *loss == Loss::ExactlyZero {
2215 *loss = Loss::LessThanHalf;
2216 }
2217 if *loss == Loss::ExactlyHalf {
2218 *loss = Loss::MoreThanHalf;
2219 }
2220 }
2221 } else {
2222 loss = Some(match hex_value {
2223 0 => Loss::ExactlyZero,
2224 1..=7 => Loss::LessThanHalf,
2225 8 => Loss::ExactlyHalf,
2226 9..=15 => Loss::MoreThanHalf,
2227 _ => unreachable!(),
2228 });
2229 }
2230 }
2231 } else if c == 'p' || c == 'P' {
2232 if !any_digits {
2233 return Err(ParseError("Significand has no digits"));
2234 }
2235
2236 if dot == s.len() {
2237 dot = p;
2238 }
2239
2240 let mut chars = s[p + 1..].chars().peekable();
2241
2242 // Adjust for the given exponent.
2243 let exp_minus = chars.peek() == Some(&'-');
2244 if exp_minus || chars.peek() == Some(&'+') {
2245 chars.next();
2246 }
2247
2248 for c in chars {
2249 if let Some(value) = c.to_digit(10) {
2250 has_exp = true;
2251 r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt);
2252 } else {
2253 return Err(ParseError("Invalid character in exponent"));
2254 }
2255 }
2256 if !has_exp {
2257 return Err(ParseError("Exponent has no digits"));
2258 }
2259
2260 r.exp = r.exp.negate_if(exp_minus);
2261
2262 break;
2263 } else {
2264 return Err(ParseError("Invalid character in significand"));
2265 }
2266 }
2267 if !any_digits {
2268 return Err(ParseError("Significand has no digits"));
2269 }
2270
2271 // Hex floats require an exponent but not a hexadecimal point.
2272 if !has_exp {
2273 return Err(ParseError("Hex strings require an exponent"));
2274 }
2275
2276 // Ignore the exponent if we are zero.
2277 let first_sig_digit = match first_sig_digit {
2278 Some(p) => p,
2279 None => return Ok(Status::OK.and(Self::ZERO)),
2280 };
2281
2282 // Calculate the exponent adjustment implicit in the number of
2283 // significant digits and adjust for writing the significand starting
2284 // at the most significant nibble.
2285 let exp_adjustment = if dot > first_sig_digit {
2286 ExpInt::try_from(dot - first_sig_digit).unwrap()
2287 } else {
2288 -ExpInt::try_from(first_sig_digit - dot - 1).unwrap()
2289 };
2290 let exp_adjustment = exp_adjustment
2291 .saturating_mul(4)
2292 .saturating_sub(1)
2293 .saturating_add(S::PRECISION as ExpInt)
2294 .saturating_sub(LIMB_BITS as ExpInt);
2295 r.exp = r.exp.saturating_add(exp_adjustment);
2296
2297 Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero)))
2298 }
2299
2300 fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
2301 // Given a normal decimal floating point number of the form
2302 //
2303 // dddd.dddd[eE][+-]ddd
2304 //
2305 // where the decimal point and exponent are optional, fill out the
2306 // variables below. Exponent is appropriate if the significand is
2307 // treated as an integer, and normalized_exp if the significand
2308 // is taken to have the decimal point after a single leading
2309 // non-zero digit.
2310 //
2311 // If the value is zero, first_sig_digit is None.
2312
2313 let mut any_digits = false;
2314 let mut dec_exp = 0i32;
2315
2316 // Without leading or trailing zeros, irrespective of the dot.
2317 let mut first_sig_digit = None;
2318 let mut last_sig_digit = 0;
2319 let mut dot = s.len();
2320
2321 for (p, c) in s.char_indices() {
2322 if c == '.' {
2323 if dot != s.len() {
2324 return Err(ParseError("String contains multiple dots"));
2325 }
2326 dot = p;
2327 } else if let Some(dec_value) = c.to_digit(10) {
2328 any_digits = true;
2329
2330 if dec_value != 0 {
2331 if first_sig_digit.is_none() {
2332 first_sig_digit = Some(p);
2333 }
2334 last_sig_digit = p;
2335 }
2336 } else if c == 'e' || c == 'E' {
2337 if !any_digits {
2338 return Err(ParseError("Significand has no digits"));
2339 }
2340
2341 if dot == s.len() {
2342 dot = p;
2343 }
2344
2345 let mut chars = s[p + 1..].chars().peekable();
2346
2347 // Adjust for the given exponent.
2348 let exp_minus = chars.peek() == Some(&'-');
2349 if exp_minus || chars.peek() == Some(&'+') {
2350 chars.next();
2351 }
2352
2353 let mut any_exp_digits = false;
2354 for c in chars {
2355 if let Some(value) = c.to_digit(10) {
2356 any_exp_digits = true;
2357 dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32);
2358 } else {
2359 return Err(ParseError("Invalid character in exponent"));
2360 }
2361 }
2362 // Treat no exponent as 0 to match binutils
2363 if !any_exp_digits {
2364 assert_eq!(dec_exp, 0);
2365 }
2366
2367 dec_exp = dec_exp.negate_if(exp_minus);
2368
2369 break;
2370 } else {
2371 return Err(ParseError("Invalid character in significand"));
2372 }
2373 }
2374 if !any_digits {
2375 return Err(ParseError("Significand has no digits"));
2376 }
2377
2378 // Test if we have a zero number allowing for non-zero exponents.
2379 let first_sig_digit = match first_sig_digit {
2380 Some(p) => p,
2381 None => return Ok(Status::OK.and(Self::ZERO)),
2382 };
2383
2384 // Adjust the exponents for any decimal point.
2385 if dot > last_sig_digit {
2386 dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32);
2387 } else {
2388 dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32);
2389 }
2390 let significand_digits =
2391 last_sig_digit - first_sig_digit + 1 - (dot > first_sig_digit && dot < last_sig_digit) as usize;
2392 let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1);
2393
2394 // Handle the cases where exponents are obviously too large or too
2395 // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp
2396 // definitely overflows if
2397 //
2398 // (dec_exp - 1) * L >= MAX_EXP
2399 //
2400 // and definitely underflows to zero where
2401 //
2402 // (dec_exp + 1) * L <= MIN_EXP - PRECISION
2403 //
2404 // With integer arithmetic the tightest bounds for L are
2405 //
2406 // 93/28 < L < 196/59 [ numerator <= 256 ]
2407 // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2408
2409 // Check for MAX_EXP.
2410 if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 {
2411 // Overflow and round.
2412 return Ok(Self::overflow_result(round));
2413 }
2414
2415 // Check for MIN_EXP.
2416 if normalized_exp.saturating_add(1).saturating_mul(28738) <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) {
2417 // Underflow to zero and round.
2418 let r = if round == Round::TowardPositive {
2419 IeeeFloat::SMALLEST
2420 } else {
2421 IeeeFloat::ZERO
2422 };
2423 return Ok((Status::UNDERFLOW | Status::INEXACT).and(r));
2424 }
2425
2426 // A tight upper bound on number of bits required to hold an
2427 // N-digit decimal integer is N * 196 / 59. Allocate enough space
2428 // to hold the full significand, and an extra limb required by
2429 // tcMultiplyPart.
2430 let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59);
2431 let mut dec_sig = DynPrecisionLimbVec::with_capacity(max_limbs);
2432
2433 // Convert to binary efficiently - we do almost all multiplication
2434 // in a Limb. When this would overflow do we do a single
2435 // bignum multiplication, and then revert again to multiplication
2436 // in a Limb.
2437 let mut chars = s[first_sig_digit..last_sig_digit + 1].chars();
2438 loop {
2439 let mut val = 0;
2440 let mut multiplier = 1;
2441
2442 loop {
2443 let dec_value = match chars.next() {
2444 Some('.') => continue,
2445 Some(c) => c.to_digit(10).unwrap(),
2446 None => break,
2447 };
2448
2449 multiplier *= 10;
2450 val = val * 10 + dec_value as Limb;
2451
2452 // The maximum number that can be multiplied by ten with any
2453 // digit added without overflowing a Limb.
2454 if multiplier > (!0 - 9) / 10 {
2455 break;
2456 }
2457 }
2458
2459 // If we've consumed no digits, we're done.
2460 if multiplier == 1 {
2461 break;
2462 }
2463
2464 // Multiply out the current limb.
2465 let mut carry = val;
2466 for x in &mut dec_sig {
2467 let [low, mut high] = sig::widening_mul(*x, multiplier);
2468
2469 // Now add carry.
2470 let (low, overflow) = low.overflowing_add(carry);
2471 high += overflow as Limb;
2472
2473 *x = low;
2474 carry = high;
2475 }
2476
2477 // If we had carry, we need another limb (likely but not guaranteed).
2478 if carry > 0 {
2479 dec_sig.push(carry);
2480 }
2481 }
2482
2483 // Calculate pow(5, abs(dec_exp)) into `pow5_full`.
2484 // The *_calc Vec's are reused scratch space, as an optimization.
2485 let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = {
2486 let mut power = dec_exp.abs() as usize;
2487
2488 const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125];
2489
2490 let mut p5_scratch = DynPrecisionLimbVec::new();
2491 let mut p5: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[4]].into_iter().collect();
2492
2493 let mut r_scratch = DynPrecisionLimbVec::new();
2494 let mut r: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[power & 7]].into_iter().collect();
2495 power >>= 3;
2496
2497 while power > 0 {
2498 // Calculate pow(5,pow(2,n+3)).
2499 p5_scratch.resize(p5.len() * 2, 0);
2500 let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS);
2501 while p5_scratch.last() == Some(&0) {
2502 p5_scratch.pop();
2503 }
2504 mem::swap(&mut p5, &mut p5_scratch);
2505
2506 if power & 1 != 0 {
2507 r_scratch.resize(r.len() + p5.len(), 0);
2508 let _: Loss = sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS);
2509 while r_scratch.last() == Some(&0) {
2510 r_scratch.pop();
2511 }
2512 mem::swap(&mut r, &mut r_scratch);
2513 }
2514
2515 power >>= 1;
2516 }
2517
2518 (r, r_scratch, p5, p5_scratch)
2519 };
2520
2521 // Attempt dec_sig * 10^dec_exp with increasing precision.
2522 let mut attempt = 0;
2523 loop {
2524 let calc_precision = (LIMB_BITS << attempt) - 1;
2525 attempt += 1;
2526
2527 let calc_normal_from_limbs = |sig: &mut DynPrecisionLimbVec, limbs: &[Limb]| -> StatusAnd<ExpInt> {
2528 sig.resize(limbs_for_bits(calc_precision), 0);
2529 let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision);
2530
2531 // Before rounding normalize the exponent of Category::Normal numbers.
2532 let mut omsb = sig::omsb(sig);
2533
2534 assert_ne!(omsb, 0);
2535
2536 // OMSB is numbered from 1. We want to place it in the integer
2537 // bit numbered PRECISION if possible, with a compensating change in
2538 // the exponent.
2539 let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt);
2540
2541 // Shifting left is easy as we don't lose precision.
2542 if final_exp < exp {
2543 assert_eq!(loss, Loss::ExactlyZero);
2544
2545 let exp_change = (exp - final_exp) as usize;
2546 sig::shift_left(sig, &mut exp, exp_change);
2547
2548 return Status::OK.and(exp);
2549 }
2550
2551 // Shift right and capture any new lost fraction.
2552 if final_exp > exp {
2553 let exp_change = (final_exp - exp) as usize;
2554 loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss);
2555
2556 // Keep OMSB up-to-date.
2557 omsb = omsb.saturating_sub(exp_change);
2558 }
2559
2560 assert_eq!(omsb, calc_precision);
2561
2562 // Now round the number according to round given the lost
2563 // fraction.
2564
2565 // As specified in IEEE 754, since we do not trap we do not report
2566 // underflow for exact results.
2567 if loss == Loss::ExactlyZero {
2568 return Status::OK.and(exp);
2569 }
2570
2571 // Increment the significand if we're rounding away from zero.
2572 if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) {
2573 // We should never overflow.
2574 assert_eq!(sig::increment(sig), 0);
2575 omsb = sig::omsb(sig);
2576
2577 // Did the significand increment overflow?
2578 if omsb == calc_precision + 1 {
2579 let _: Loss = sig::shift_right(sig, &mut exp, 1);
2580
2581 return Status::INEXACT.and(exp);
2582 }
2583 }
2584
2585 // The normal case - we were and are not denormal, and any
2586 // significand increment above didn't overflow.
2587 Status::INEXACT.and(exp)
2588 };
2589
2590 let status;
2591 let mut exp = unpack!(status=,
2592 calc_normal_from_limbs(&mut sig_calc, &dec_sig));
2593 let pow5_status;
2594 let pow5_exp = unpack!(pow5_status=,
2595 calc_normal_from_limbs(&mut pow5_calc, &pow5_full));
2596
2597 // Add dec_exp, as 10^n = 5^n * 2^n.
2598 exp += dec_exp as ExpInt;
2599
2600 let mut used_bits = S::PRECISION;
2601 let mut truncated_bits = calc_precision - used_bits;
2602
2603 let half_ulp_err1 = (status != Status::OK) as Limb;
2604 let (calc_loss, half_ulp_err2);
2605 if dec_exp >= 0 {
2606 exp += pow5_exp;
2607
2608 sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0);
2609 calc_loss = sig::mul(&mut sig_scratch_calc, &mut exp, &sig_calc, &pow5_calc, calc_precision);
2610 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2611
2612 half_ulp_err2 = (pow5_status != Status::OK) as Limb;
2613 } else {
2614 exp -= pow5_exp;
2615
2616 sig_scratch_calc.resize(sig_calc.len(), 0);
2617 calc_loss = sig::div(&mut sig_scratch_calc, &mut exp, &mut sig_calc, &mut pow5_calc, calc_precision);
2618 mem::swap(&mut sig_calc, &mut sig_scratch_calc);
2619
2620 // Denormal numbers have less precision.
2621 if exp < S::MIN_EXP {
2622 truncated_bits += (S::MIN_EXP - exp) as usize;
2623 used_bits = calc_precision.saturating_sub(truncated_bits);
2624 }
2625 // Extra half-ulp lost in reciprocal of exponent.
2626 half_ulp_err2 = 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb;
2627 }
2628
2629 // Both sig::mul and sig::div return the
2630 // result with the integer bit set.
2631 assert!(sig::get_bit(&sig_calc, calc_precision - 1));
2632
2633 // The error from the true value, in half-ulps, on multiplying two
2634 // floating point numbers, which differ from the value they
2635 // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less
2636 // than the returned value.
2637 //
2638 // See "How to Read Floating Point Numbers Accurately" by William D Clinger.
2639 assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8));
2640
2641 let inexact = (calc_loss != Loss::ExactlyZero) as Limb;
2642 let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 {
2643 inexact * 2 // <= inexact half-ulps.
2644 } else {
2645 inexact + 2 * (half_ulp_err1 + half_ulp_err2)
2646 };
2647
2648 let ulps_from_boundary = {
2649 let bits = calc_precision - used_bits - 1;
2650
2651 let i = bits / LIMB_BITS;
2652 let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS));
2653 let boundary = match round {
2654 Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS),
2655 _ => 0,
2656 };
2657 if i == 0 {
2658 let delta = limb.wrapping_sub(boundary);
2659 cmp::min(delta, delta.wrapping_neg())
2660 } else if limb == boundary {
2661 if !sig::is_all_zeros(&sig_calc[1..i]) {
2662 !0 // A lot.
2663 } else {
2664 sig_calc[0]
2665 }
2666 } else if limb == boundary.wrapping_sub(1) {
2667 if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) {
2668 !0 // A lot.
2669 } else {
2670 sig_calc[0].wrapping_neg()
2671 }
2672 } else {
2673 !0 // A lot.
2674 }
2675 };
2676
2677 // Are we guaranteed to round correctly if we truncate?
2678 if ulps_from_boundary.saturating_mul(2) >= half_ulp_err {
2679 let mut r = IeeeFloat {
2680 sig: [0],
2681 exp,
2682 read_only_category_do_not_mutate: Category::Normal,
2683 read_only_sign_do_not_mutate: false,
2684 marker: PhantomData,
2685 };
2686 sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits);
2687 // If we extracted less bits above we must adjust our exponent
2688 // to compensate for the implicit right shift.
2689 r.exp += (S::PRECISION - used_bits) as ExpInt;
2690 let loss = Loss::through_truncation(&sig_calc, truncated_bits);
2691 return Ok(r.normalize(round, loss));
2692 }
2693 }
2694 }
2695}
2696
2697impl Loss {
2698 /// Combine the effect of two lost fractions.
2699 #[inline]
2700 fn combine(self, less_significant: Loss) -> Loss {
2701 let mut more_significant = self;
2702 if less_significant != Loss::ExactlyZero {
2703 if more_significant == Loss::ExactlyZero {
2704 more_significant = Loss::LessThanHalf;
2705 } else if more_significant == Loss::ExactlyHalf {
2706 more_significant = Loss::MoreThanHalf;
2707 }
2708 }
2709
2710 more_significant
2711 }
2712
2713 /// Return the fraction lost were a bignum truncated losing the least
2714 /// significant `bits` bits.
2715 #[inline]
2716 fn through_truncation(limbs: &[Limb], bits: usize) -> Loss {
2717 if bits == 0 {
2718 return Loss::ExactlyZero;
2719 }
2720
2721 let half_bit = bits - 1;
2722 let half_limb = half_bit / LIMB_BITS;
2723 let (half_limb, rest) = if half_limb < limbs.len() {
2724 (limbs[half_limb], &limbs[..half_limb])
2725 } else {
2726 (0, limbs)
2727 };
2728 let half = 1 << (half_bit % LIMB_BITS);
2729 let has_half = half_limb & half != 0;
2730 let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest);
2731
2732 match (has_half, has_rest) {
2733 (false, false) => Loss::ExactlyZero,
2734 (false, true) => Loss::LessThanHalf,
2735 (true, false) => Loss::ExactlyHalf,
2736 (true, true) => Loss::MoreThanHalf,
2737 }
2738 }
2739}
2740
2741/// Implementation details of IeeeFloat significands, such as big integer arithmetic.
2742/// As a rule of thumb, no functions in this module should dynamically allocate.
2743mod sig {
2744 use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS};
2745 use core::cmp::Ordering;
2746 use core::mem;
2747
2748 #[inline]
2749 pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool {
2750 limbs.iter().all(|&l| l == 0)
2751 }
2752
2753 /// One, not zero, based LSB. That is, returns 0 for a zeroed significand.
2754 #[inline]
2755 pub(super) fn olsb(limbs: &[Limb]) -> usize {
2756 for i in 0..limbs.len() {
2757 if limbs[i] != 0 {
2758 return i * LIMB_BITS + limbs[i].trailing_zeros() as usize + 1;
2759 }
2760 }
2761
2762 0
2763 }
2764
2765 /// One, not zero, based MSB. That is, returns 0 for a zeroed significand.
2766 #[inline]
2767 pub(super) fn omsb(limbs: &[Limb]) -> usize {
2768 for i in (0..limbs.len()).rev() {
2769 if limbs[i] != 0 {
2770 return (i + 1) * LIMB_BITS - limbs[i].leading_zeros() as usize;
2771 }
2772 }
2773
2774 0
2775 }
2776
2777 /// Comparison (unsigned) of two significands.
2778 #[inline]
2779 pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering {
2780 assert_eq!(a.len(), b.len());
2781 for (a, b) in a.iter().zip(b).rev() {
2782 match a.cmp(b) {
2783 Ordering::Equal => {}
2784 o => return o,
2785 }
2786 }
2787
2788 Ordering::Equal
2789 }
2790
2791 /// Extract the given bit.
2792 #[inline]
2793 pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool {
2794 limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0
2795 }
2796
2797 /// Set the given bit.
2798 #[inline]
2799 pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) {
2800 limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS);
2801 }
2802
2803 /// Shift `dst` left `bits` bits, subtract `bits` from its exponent.
2804 #[inline]
2805 pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) {
2806 if bits > 0 {
2807 // Our exponent should not underflow.
2808 *exp = exp.checked_sub(bits as ExpInt).unwrap();
2809
2810 // Jump is the inter-limb jump; shift is is intra-limb shift.
2811 let jump = bits / LIMB_BITS;
2812 let shift = bits % LIMB_BITS;
2813
2814 for i in (0..dst.len()).rev() {
2815 let mut limb;
2816
2817 if i < jump {
2818 limb = 0;
2819 } else {
2820 // dst[i] comes from the two limbs src[i - jump] and, if we have
2821 // an intra-limb shift, src[i - jump - 1].
2822 limb = dst[i - jump];
2823 if shift > 0 {
2824 limb <<= shift;
2825 if i >= jump + 1 {
2826 limb |= dst[i - jump - 1] >> (LIMB_BITS - shift);
2827 }
2828 }
2829 }
2830
2831 dst[i] = limb;
2832 }
2833 }
2834 }
2835
2836 /// Shift `dst` right `bits` bits noting lost fraction.
2837 #[inline]
2838 pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss {
2839 let loss = Loss::through_truncation(dst, bits);
2840
2841 if bits > 0 {
2842 // Our exponent should not overflow.
2843 *exp = exp.checked_add(bits as ExpInt).unwrap();
2844
2845 // Jump is the inter-limb jump; shift is is intra-limb shift.
2846 let jump = bits / LIMB_BITS;
2847 let shift = bits % LIMB_BITS;
2848
2849 // Perform the shift. This leaves the most significant `bits` bits
2850 // of the result at zero.
2851 for i in 0..dst.len() {
2852 let mut limb;
2853
2854 if i + jump >= dst.len() {
2855 limb = 0;
2856 } else {
2857 limb = dst[i + jump];
2858 if shift > 0 {
2859 limb >>= shift;
2860 if i + jump + 1 < dst.len() {
2861 limb |= dst[i + jump + 1] << (LIMB_BITS - shift);
2862 }
2863 }
2864 }
2865
2866 dst[i] = limb;
2867 }
2868 }
2869
2870 loss
2871 }
2872
2873 /// Copy the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB,
2874 /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`.
2875 /// All high bits above `src_bits` in `dst` are zero-filled.
2876 #[inline]
2877 pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) {
2878 if src_bits == 0 {
2879 return;
2880 }
2881
2882 let dst_limbs = limbs_for_bits(src_bits);
2883 assert!(dst_limbs <= dst.len());
2884
2885 let src = &src[src_lsb / LIMB_BITS..];
2886 dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]);
2887
2888 let shift = src_lsb % LIMB_BITS;
2889 let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift);
2890
2891 // We now have (dst_limbs * LIMB_BITS - shift) bits from `src`
2892 // in `dst`. If this is less that src_bits, append the rest, else
2893 // clear the high bits.
2894 let n = dst_limbs * LIMB_BITS - shift;
2895 if n < src_bits {
2896 let mask = (1 << (src_bits - n)) - 1;
2897 dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << n % LIMB_BITS;
2898 } else if n > src_bits && src_bits % LIMB_BITS > 0 {
2899 dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1;
2900 }
2901
2902 // Clear high limbs.
2903 for x in &mut dst[dst_limbs..] {
2904 *x = 0;
2905 }
2906 }
2907
2908 /// We want the most significant PRECISION bits of `src`. There may not
2909 /// be that many; extract what we can.
2910 #[inline]
2911 pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) {
2912 let omsb = omsb(src);
2913
2914 if precision <= omsb {
2915 extract(dst, src, precision, omsb - precision);
2916 (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1)
2917 } else {
2918 extract(dst, src, omsb, 0);
2919 (Loss::ExactlyZero, precision as ExpInt - 1)
2920 }
2921 }
2922
2923 /// For every consecutive chunk of `bits` bits from `limbs`,
2924 /// going from most significant to the least significant bits,
2925 /// call `f` to transform those bits and store the result back.
2926 #[inline]
2927 pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) {
2928 assert_eq!(LIMB_BITS % bits, 0);
2929 for limb in limbs.iter_mut().rev() {
2930 let mut r = 0;
2931 for i in (0..LIMB_BITS / bits).rev() {
2932 r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits);
2933 }
2934 *limb = r;
2935 }
2936 }
2937
2938 /// Increment in-place, return the carry flag.
2939 #[inline]
2940 pub(super) fn increment(dst: &mut [Limb]) -> Limb {
2941 for x in dst {
2942 *x = x.wrapping_add(1);
2943 if *x != 0 {
2944 return 0;
2945 }
2946 }
2947
2948 1
2949 }
2950
2951 /// Decrement in-place, return the borrow flag.
2952 #[inline]
2953 pub(super) fn decrement(dst: &mut [Limb]) -> Limb {
2954 for x in dst {
2955 *x = x.wrapping_sub(1);
2956 if *x != !0 {
2957 return 0;
2958 }
2959 }
2960
2961 1
2962 }
2963
2964 /// `a += b + c` where `c` is zero or one. Returns the carry flag.
2965 #[inline]
2966 pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2967 assert!(c <= 1);
2968
2969 for (a, &b) in a.iter_mut().zip(b) {
2970 let (r, overflow) = a.overflowing_add(b);
2971 let (r, overflow2) = r.overflowing_add(c);
2972 *a = r;
2973 c = (overflow | overflow2) as Limb;
2974 }
2975
2976 c
2977 }
2978
2979 /// `a -= b + c` where `c` is zero or one. Returns the borrow flag.
2980 #[inline]
2981 pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb {
2982 assert!(c <= 1);
2983
2984 for (a, &b) in a.iter_mut().zip(b) {
2985 let (r, overflow) = a.overflowing_sub(b);
2986 let (r, overflow2) = r.overflowing_sub(c);
2987 *a = r;
2988 c = (overflow | overflow2) as Limb;
2989 }
2990
2991 c
2992 }
2993
2994 /// `a += b` or `a -= b`. Does not preserve `b`.
2995 #[inline]
2996 pub(super) fn add_or_sub(
2997 a_sig: &mut [Limb],
2998 a_exp: &mut ExpInt,
2999 a_sign: &mut bool,
3000 b_sig: &mut [Limb],
3001 b_exp: ExpInt,
3002 b_sign: bool,
3003 ) -> Loss {
3004 // Are we bigger exponent-wise than the RHS?
3005 let bits = *a_exp - b_exp;
3006
3007 // Determine if the operation on the absolute values is effectively
3008 // an addition or subtraction.
3009 // Subtraction is more subtle than one might naively expect.
3010 if *a_sign ^ b_sign {
3011 let loss;
3012
3013 if bits == 0 {
3014 loss = Loss::ExactlyZero;
3015 } else if bits > 0 {
3016 loss = shift_right(b_sig, &mut 0, (bits - 1) as usize);
3017 shift_left(a_sig, a_exp, 1);
3018 } else {
3019 loss = shift_right(a_sig, a_exp, (-bits - 1) as usize);
3020 shift_left(b_sig, &mut 0, 1);
3021 }
3022
3023 let borrow = (loss != Loss::ExactlyZero) as Limb;
3024
3025 // Should we reverse the subtraction.
3026 if cmp(a_sig, b_sig) == Ordering::Less {
3027 // The code above is intended to ensure that no borrow is necessary.
3028 assert_eq!(sub(b_sig, a_sig, borrow), 0);
3029 a_sig.copy_from_slice(b_sig);
3030 *a_sign = !*a_sign;
3031 } else {
3032 // The code above is intended to ensure that no borrow is necessary.
3033 assert_eq!(sub(a_sig, b_sig, borrow), 0);
3034 }
3035
3036 // Invert the lost fraction - it was on the RHS and subtracted.
3037 match loss {
3038 Loss::LessThanHalf => Loss::MoreThanHalf,
3039 Loss::MoreThanHalf => Loss::LessThanHalf,
3040 _ => loss,
3041 }
3042 } else {
3043 let loss = if bits > 0 {
3044 shift_right(b_sig, &mut 0, bits as usize)
3045 } else {
3046 shift_right(a_sig, a_exp, -bits as usize)
3047 };
3048 // We have a guard bit; generating a carry cannot happen.
3049 assert_eq!(add(a_sig, b_sig, 0), 0);
3050 loss
3051 }
3052 }
3053
3054 /// `[low, high] = a * b`.
3055 ///
3056 /// This cannot overflow, because
3057 ///
3058 /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)`
3059 ///
3060 /// which is less than n^2.
3061 #[inline]
3062 pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] {
3063 let mut wide = [0, 0];
3064
3065 if a == 0 || b == 0 {
3066 return wide;
3067 }
3068
3069 const HALF_BITS: usize = LIMB_BITS / 2;
3070
3071 let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1);
3072 for i in 0..2 {
3073 for j in 0..2 {
3074 let mut x = [select(a, i) * select(b, j), 0];
3075 shift_left(&mut x, &mut 0, (i + j) * HALF_BITS);
3076 assert_eq!(add(&mut wide, &x, 0), 0);
3077 }
3078 }
3079
3080 wide
3081 }
3082
3083 /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction.
3084 #[inline]
3085 pub(super) fn mul<'a>(
3086 dst: &mut [Limb],
3087 exp: &mut ExpInt,
3088 mut a: &'a [Limb],
3089 mut b: &'a [Limb],
3090 precision: usize,
3091 ) -> Loss {
3092 // Put the narrower number on the `a` for less loops below.
3093 if a.len() > b.len() {
3094 mem::swap(&mut a, &mut b);
3095 }
3096
3097 for x in &mut dst[..b.len()] {
3098 *x = 0;
3099 }
3100
3101 for i in 0..a.len() {
3102 let mut carry = 0;
3103 for j in 0..b.len() {
3104 let [low, mut high] = widening_mul(a[i], b[j]);
3105
3106 // Now add carry.
3107 let (low, overflow) = low.overflowing_add(carry);
3108 high += overflow as Limb;
3109
3110 // And now `dst[i + j]`, and store the new low part there.
3111 let (low, overflow) = low.overflowing_add(dst[i + j]);
3112 high += overflow as Limb;
3113
3114 dst[i + j] = low;
3115 carry = high;
3116 }
3117 dst[i + b.len()] = carry;
3118 }
3119
3120 // Assume the operands involved in the multiplication are single-precision
3121 // FP, and the two multiplicants are:
3122 // a = a23 . a22 ... a0 * 2^e1
3123 // b = b23 . b22 ... b0 * 2^e2
3124 // the result of multiplication is:
3125 // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
3126 // Note that there are three significant bits at the left-hand side of the
3127 // radix point: two for the multiplication, and an overflow bit for the
3128 // addition (that will always be zero at this point). Move the radix point
3129 // toward left by two bits, and adjust exponent accordingly.
3130 *exp += 2;
3131
3132 // Convert the result having "2 * precision" significant-bits back to the one
3133 // having "precision" significant-bits. First, move the radix point from
3134 // poision "2*precision - 1" to "precision - 1". The exponent need to be
3135 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
3136 *exp -= precision as ExpInt + 1;
3137
3138 // In case MSB resides at the left-hand side of radix point, shift the
3139 // mantissa right by some amount to make sure the MSB reside right before
3140 // the radix point (i.e. "MSB . rest-significant-bits").
3141 //
3142 // Note that the result is not normalized when "omsb < precision". So, the
3143 // caller needs to call IeeeFloat::normalize() if normalized value is
3144 // expected.
3145 let omsb = omsb(dst);
3146 if omsb <= precision {
3147 Loss::ExactlyZero
3148 } else {
3149 shift_right(dst, exp, omsb - precision)
3150 }
3151 }
3152
3153 /// `quotient = dividend / divisor`. Returns the lost fraction.
3154 /// Does not preserve `dividend` or `divisor`.
3155 #[inline]
3156 pub(super) fn div(
3157 quotient: &mut [Limb],
3158 exp: &mut ExpInt,
3159 dividend: &mut [Limb],
3160 divisor: &mut [Limb],
3161 precision: usize,
3162 ) -> Loss {
3163 // Normalize the divisor.
3164 let bits = precision - omsb(divisor);
3165 shift_left(divisor, &mut 0, bits);
3166 *exp += bits as ExpInt;
3167
3168 // Normalize the dividend.
3169 let bits = precision - omsb(dividend);
3170 shift_left(dividend, exp, bits);
3171
3172 // Division by 1.
3173 let olsb_divisor = olsb(divisor);
3174 if olsb_divisor == precision {
3175 quotient.copy_from_slice(dividend);
3176 return Loss::ExactlyZero;
3177 }
3178
3179 // Ensure the dividend >= divisor initially for the loop below.
3180 // Incidentally, this means that the division loop below is
3181 // guaranteed to set the integer bit to one.
3182 if cmp(dividend, divisor) == Ordering::Less {
3183 shift_left(dividend, exp, 1);
3184 assert_ne!(cmp(dividend, divisor), Ordering::Less)
3185 }
3186
3187 // Helper for figuring out the lost fraction.
3188 let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) {
3189 Ordering::Greater => Loss::MoreThanHalf,
3190 Ordering::Equal => Loss::ExactlyHalf,
3191 Ordering::Less => {
3192 if is_all_zeros(dividend) {
3193 Loss::ExactlyZero
3194 } else {
3195 Loss::LessThanHalf
3196 }
3197 }
3198 };
3199
3200 // Try to perform a (much faster) short division for small divisors.
3201 let divisor_bits = precision - (olsb_divisor - 1);
3202 macro_rules! try_short_div {
3203 ($W:ty, $H:ty, $half:expr) => {
3204 if divisor_bits * 2 <= $half {
3205 // Extract the small divisor.
3206 let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1);
3207 let divisor = divisor[0] as $H as $W;
3208
3209 // Shift the dividend to produce a quotient with the unit bit set.
3210 let top_limb = *dividend.last().unwrap();
3211 let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H;
3212 shift_left(dividend, &mut 0, divisor_bits - 1);
3213
3214 // Apply short division in place on $H (of $half bits) chunks.
3215 each_chunk(dividend, $half, |chunk| {
3216 let chunk = chunk as $H;
3217 let combined = ((rem as $W) << $half) | (chunk as $W);
3218 rem = (combined % divisor) as $H;
3219 (combined / divisor) as $H as Limb
3220 });
3221 quotient.copy_from_slice(dividend);
3222
3223 return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]);
3224 }
3225 };
3226 }
3227
3228 try_short_div!(u32, u16, 16);
3229 try_short_div!(u64, u32, 32);
3230 try_short_div!(u128, u64, 64);
3231
3232 // Zero the quotient before setting bits in it.
3233 for x in &mut quotient[..limbs_for_bits(precision)] {
3234 *x = 0;
3235 }
3236
3237 // Long division.
3238 for bit in (0..precision).rev() {
3239 if cmp(dividend, divisor) != Ordering::Less {
3240 sub(dividend, divisor, 0);
3241 set_bit(quotient, bit);
3242 }
3243 shift_left(dividend, &mut 0, 1);
3244 }
3245
3246 lost_fraction(dividend, divisor)
3247 }
3248}
3249

Provided by KDAB

Privacy Policy
Learn Rust with the experts
Find out more