| 1 | //! Support for floating point types compatible with IEEE 754. |
| 2 | |
| 3 | use crate::{Category, ExpInt, IEK_INF, IEK_NAN, IEK_ZERO}; |
| 4 | use crate::{Float, FloatConvert, ParseError, Round, Status, StatusAnd}; |
| 5 | |
| 6 | use core::cmp::{self, Ordering}; |
| 7 | use core::convert::TryFrom; |
| 8 | use core::fmt::{self, Write}; |
| 9 | use core::marker::PhantomData; |
| 10 | use core::mem; |
| 11 | use core::ops::Neg; |
| 12 | |
| 13 | /// A floating point number that uses IEEE semantics. |
| 14 | /// |
| 15 | /// Usually you will want to use the available type aliases of this type |
| 16 | /// (e.g., [`Single`], [`Double`]) rather than referencing it directly. |
| 17 | /// |
| 18 | /// If `S` implements [`Semantics`], this type will implement [`Float`]. |
| 19 | #[must_use ] |
| 20 | pub struct IeeeFloat<S> { |
| 21 | /// Absolute significand value (including the integer bit). |
| 22 | sig: [Limb; 1], |
| 23 | |
| 24 | /// The signed unbiased exponent of the value. |
| 25 | exp: ExpInt, |
| 26 | |
| 27 | /// What kind of floating point number this is. |
| 28 | // |
| 29 | // HACK(eddyb) because mutating this without accounting for `exp`/`sig` |
| 30 | // can break some subtle edge cases, it should be only read through the |
| 31 | // `.category()` method, and only set during initialization, either for |
| 32 | // one of the special value constants, or for conversion from bits. |
| 33 | read_only_category_do_not_mutate: Category, |
| 34 | |
| 35 | /// Sign bit of the number. |
| 36 | // |
| 37 | // HACK(eddyb) because mutating this without accounting for `category` |
| 38 | // can break some subtle edge cases, it should be only read through the |
| 39 | // `.is_negative()` method, and only set through negation (which can be |
| 40 | // more easily used through e.g. `copy_sign`/`negate_if`/`with_sign`). |
| 41 | read_only_sign_do_not_mutate: bool, |
| 42 | |
| 43 | marker: PhantomData<S>, |
| 44 | } |
| 45 | |
| 46 | /// Fundamental unit of big integer arithmetic, but also |
| 47 | /// large to store the largest significands by itself. |
| 48 | type Limb = u128; |
| 49 | const LIMB_BITS: usize = 128; |
| 50 | #[inline (always)] |
| 51 | fn limbs_for_bits(bits: usize) -> usize { |
| 52 | (bits + LIMB_BITS - 1) / LIMB_BITS |
| 53 | } |
| 54 | |
| 55 | /// Growable `[Limb]` (i.e. heap-allocated and typically `Vec`/`SmallVec`/etc.), |
| 56 | /// used only by algorithms that may require dynamically arbitrary precision, |
| 57 | /// i.e. conversions from/to decimal strings. |
| 58 | /// |
| 59 | /// Note: the specific type was chosen by starting with `SmallVec<[_; 1]>` and |
| 60 | /// increasing the inline length as long as benchmarks were showing improvements, |
| 61 | /// or at least the `Double::from_str` ones, which roughly had these behaviors: |
| 62 | /// * `Vec<_>` -> `SmallVec<[_; 1]>`: ~15% speedup, but only for shorter inputs |
| 63 | /// * `SmallVec<[_; 1]>` -> `SmallVec<[_; 2]>`: ~10% speedup for longer inputs |
| 64 | /// * `SmallVec<[_; 2]>` -> `SmallVec<[_; 3]>`: noise and/or diminishing returns |
| 65 | /// |
| 66 | /// Note: the choice of type described above, and the factors in its decision, |
| 67 | /// are specific to `Limb` being `u128`, so if `Limb` changes, this should too. |
| 68 | type DynPrecisionLimbVec = smallvec::SmallVec<[Limb; 2]>; |
| 69 | |
| 70 | /// Enum that represents what fraction of the LSB truncated bits of an fp number |
| 71 | /// represent. |
| 72 | /// |
| 73 | /// This essentially combines the roles of guard and sticky bits. |
| 74 | #[must_use ] |
| 75 | #[derive (Copy, Clone, PartialEq, Eq, Debug)] |
| 76 | enum Loss { |
| 77 | // Example of truncated bits: |
| 78 | ExactlyZero, // 000000 |
| 79 | LessThanHalf, // 0xxxxx x's not all zero |
| 80 | ExactlyHalf, // 100000 |
| 81 | MoreThanHalf, // 1xxxxx x's not all zero |
| 82 | } |
| 83 | |
| 84 | /// How the nonfinite values Inf and NaN are represented. |
| 85 | #[derive (Copy, Clone, PartialEq, Eq)] |
| 86 | pub enum NonfiniteBehavior { |
| 87 | /// Represents standard IEEE 754 behavior. A value is nonfinite if the |
| 88 | /// exponent field is all 1s. In such cases, a value is Inf if the |
| 89 | /// significand bits are all zero, and NaN otherwise |
| 90 | IEEE754, |
| 91 | |
| 92 | /// Only the Float8E5M2 has this behavior. There is no Inf representation. A |
| 93 | /// value is NaN if the exponent field and the mantissa field are all 1s. |
| 94 | /// This behavior matches the FP8 E4M3 type described in |
| 95 | /// <https://arxiv.org/abs/2209.05433>. We treat both signed and unsigned NaNs |
| 96 | /// as non-signalling, although the paper does not state whether the NaN |
| 97 | /// values are signalling or not. |
| 98 | NanOnly, |
| 99 | } |
| 100 | |
| 101 | // HACK(eddyb) extension method flipping/changing the sign based on `bool`s. |
| 102 | trait NegExt: Neg<Output = Self> + Sized { |
| 103 | fn negate_if(self, negate: bool) -> Self { |
| 104 | if negate { |
| 105 | -self |
| 106 | } else { |
| 107 | self |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | fn with_sign(self, sign: bool) -> Self |
| 112 | where |
| 113 | Self: Float, |
| 114 | { |
| 115 | self.negate_if(self.is_negative() != sign) |
| 116 | } |
| 117 | } |
| 118 | impl<T: Neg<Output = Self>> NegExt for T {} |
| 119 | |
| 120 | /// Represents floating point arithmetic semantics. |
| 121 | pub trait Semantics: Sized { |
| 122 | /// Total number of bits in the interchange format. |
| 123 | const BITS: usize; |
| 124 | |
| 125 | /// Number of exponent bits in the interchange format. |
| 126 | const EXP_BITS: usize; |
| 127 | |
| 128 | /// Number of bits in the significand. This includes the integer bit. |
| 129 | const PRECISION: usize = (Self::BITS - 1 - Self::EXP_BITS) + 1; |
| 130 | |
| 131 | /// How the nonfinite values Inf and NaN are represented. |
| 132 | const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::IEEE754; |
| 133 | |
| 134 | /// The largest E such that 2^E is representable; this matches the |
| 135 | /// definition of IEEE 754. |
| 136 | const MAX_EXP: ExpInt = { |
| 137 | let ieee_inf_and_nan_replaced_with_extra_normals = match Self::NONFINITE_BEHAVIOR { |
| 138 | NonfiniteBehavior::IEEE754 => false, |
| 139 | NonfiniteBehavior::NanOnly => true, |
| 140 | }; |
| 141 | Self::IEEE_MAX_EXP |
| 142 | + (Self::MIN_EXP - Self::IEEE_MIN_EXP) |
| 143 | + (ieee_inf_and_nan_replaced_with_extra_normals as ExpInt) |
| 144 | }; |
| 145 | const IEEE_MAX_EXP: ExpInt = -Self::IEEE_MIN_EXP + 1; |
| 146 | |
| 147 | /// The smallest E such that 2^E is a normalized number; this |
| 148 | /// matches the definition of IEEE 754. |
| 149 | const MIN_EXP: ExpInt = Self::IEEE_MIN_EXP; |
| 150 | const IEEE_MIN_EXP: ExpInt = -(1 << (Self::EXP_BITS - 1)) + 2; |
| 151 | |
| 152 | /// The base significand bitpattern of NaNs, i.e. the bits that must always |
| 153 | /// be set in all NaNs, with other significand bits being either used for |
| 154 | /// payload bits (if `NAN_PAYLOAD_MASK` covers them) or always unset. |
| 155 | const NAN_SIGNIFICAND_BASE: Limb = match Self::NONFINITE_BEHAVIOR { |
| 156 | NonfiniteBehavior::IEEE754 => 0, |
| 157 | NonfiniteBehavior::NanOnly => (1 << (Self::PRECISION - 1)) - 1, |
| 158 | }; |
| 159 | |
| 160 | /// The significand bitmask for the payload of a NaN (if supported), |
| 161 | /// including the "quiet bit" (telling QNaNs apart from SNaNs). |
| 162 | const NAN_PAYLOAD_MASK: Limb = match Self::NONFINITE_BEHAVIOR { |
| 163 | NonfiniteBehavior::IEEE754 => (1 << (Self::PRECISION - 1)) - 1, |
| 164 | NonfiniteBehavior::NanOnly => 0, |
| 165 | }; |
| 166 | |
| 167 | /// The significand bitpattern to mark a NaN as quiet (if supported). |
| 168 | /// |
| 169 | /// NOTE: for X87DoubleExtended we need to set two bits instead of one. |
| 170 | /// |
| 171 | /// NOTE: all NaNs are quiet if unsupported (see `NonfiniteBehavior::NanOnly`). |
| 172 | const QNAN_SIGNIFICAND: Limb = match Self::NONFINITE_BEHAVIOR { |
| 173 | NonfiniteBehavior::IEEE754 => 1 << (Self::PRECISION - 2), |
| 174 | NonfiniteBehavior::NanOnly => 0, |
| 175 | }; |
| 176 | |
| 177 | fn from_bits(bits: u128) -> IeeeFloat<Self> { |
| 178 | assert!(Self::BITS > Self::PRECISION); |
| 179 | |
| 180 | let sign = bits & (1 << (Self::BITS - 1)); |
| 181 | let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); |
| 182 | let mut r = IeeeFloat { |
| 183 | sig: [bits & ((1 << (Self::PRECISION - 1)) - 1)], |
| 184 | // Convert the exponent from its bias representation to a signed integer. |
| 185 | exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), |
| 186 | read_only_category_do_not_mutate: Category::Zero, |
| 187 | read_only_sign_do_not_mutate: sign != 0, |
| 188 | marker: PhantomData, |
| 189 | }; |
| 190 | |
| 191 | // NOTE(eddyb) unlike the original C++ code, this doesn't check for |
| 192 | // specific exponent/significand combinations, but instead relies on |
| 193 | // being able to construct known-good special values to compare to. |
| 194 | let try_category_from_special = |mut special: IeeeFloat<Self>| { |
| 195 | special = special.copy_sign(r); |
| 196 | |
| 197 | // Ignore NaN payload to avoid needing a separate NaN check. |
| 198 | let sig_mask = if special.is_nan() { !Self::NAN_PAYLOAD_MASK } else { !0 }; |
| 199 | |
| 200 | if special.is_negative() == r.is_negative() |
| 201 | && special.exp == r.exp |
| 202 | && special.sig[0] & sig_mask == r.sig[0] & sig_mask |
| 203 | { |
| 204 | Some(special.category()) |
| 205 | } else { |
| 206 | None |
| 207 | } |
| 208 | }; |
| 209 | |
| 210 | // NOTE(eddyb) the order here matters, i.e. `NAN` needs to be last, as |
| 211 | // its relaxed check (see above) overlaps `INFINITY`, for IEEE NaNs. |
| 212 | let specials = [ |
| 213 | IeeeFloat::<Self>::ZERO, |
| 214 | IeeeFloat::<Self>::INFINITY, |
| 215 | IeeeFloat::<Self>::NAN, |
| 216 | ]; |
| 217 | |
| 218 | let category = specials |
| 219 | .into_iter() |
| 220 | .find_map(try_category_from_special) |
| 221 | .unwrap_or_else(|| { |
| 222 | if r.exp == Self::MIN_EXP - 1 { |
| 223 | // Denormal. |
| 224 | r.exp = Self::MIN_EXP; |
| 225 | } else { |
| 226 | // Set integer bit. |
| 227 | sig::set_bit(&mut r.sig, Self::PRECISION - 1); |
| 228 | } |
| 229 | Category::Normal |
| 230 | }); |
| 231 | |
| 232 | r.read_only_category_do_not_mutate = category; |
| 233 | |
| 234 | r |
| 235 | } |
| 236 | |
| 237 | fn to_bits(x: IeeeFloat<Self>) -> u128 { |
| 238 | assert!(Self::BITS > Self::PRECISION); |
| 239 | |
| 240 | // Split integer bit from significand. |
| 241 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); |
| 242 | let mut significand = x.sig[0] & ((1 << (Self::PRECISION - 1)) - 1); |
| 243 | let mut exponent = x.exp; |
| 244 | match x.category() { |
| 245 | Category::Normal => { |
| 246 | if exponent == Self::MIN_EXP && !integer_bit { |
| 247 | // Denormal. |
| 248 | exponent -= 1; |
| 249 | } |
| 250 | } |
| 251 | Category::Zero => { |
| 252 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
| 253 | IeeeFloat::<Self> { |
| 254 | sig: [significand], |
| 255 | exp: exponent, |
| 256 | .. |
| 257 | } = Float::ZERO; |
| 258 | } |
| 259 | Category::Infinity => { |
| 260 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
| 261 | IeeeFloat::<Self> { |
| 262 | sig: [significand], |
| 263 | exp: exponent, |
| 264 | .. |
| 265 | } = Float::INFINITY; |
| 266 | } |
| 267 | Category::NaN => { |
| 268 | IeeeFloat::<Self> { exp: exponent, .. } = Float::NAN; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | // Convert the exponent from a signed integer to its bias representation. |
| 273 | let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; |
| 274 | |
| 275 | ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << (Self::PRECISION - 1)) | significand |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | impl<S> Copy for IeeeFloat<S> {} |
| 280 | impl<S> Clone for IeeeFloat<S> { |
| 281 | fn clone(&self) -> Self { |
| 282 | *self |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | macro_rules! ieee_semantics { |
| 287 | ($( |
| 288 | $(#[$meta:meta])* |
| 289 | $name:ident = $sem:ident($bits:tt : $exp_bits:tt) $({ $($extra:tt)* })? |
| 290 | ),* $(,)?) => { |
| 291 | $( |
| 292 | #[doc = concat!("Floating point semantics for [`" , stringify!($name), "`]." )] |
| 293 | /// |
| 294 | /// See that type for more details. |
| 295 | pub struct $sem; |
| 296 | |
| 297 | $(#[$meta])* |
| 298 | pub type $name = IeeeFloat<$sem>; |
| 299 | |
| 300 | impl Semantics for $sem { |
| 301 | const BITS: usize = $bits; |
| 302 | const EXP_BITS: usize = $exp_bits; |
| 303 | |
| 304 | $($($extra)*)? |
| 305 | } |
| 306 | )* |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | ieee_semantics! { |
| 311 | /// IEEE binary16 half-precision (16-bit) floating point number. |
| 312 | Half = HalfS(16:5), |
| 313 | |
| 314 | /// IEEE binary32 single-precision (32-bit) floating point number. |
| 315 | Single = SingleS(32:8), |
| 316 | |
| 317 | /// IEEE binary64 double-precision (64-bit) floating point number. |
| 318 | Double = DoubleS(64:11), |
| 319 | |
| 320 | /// IEEE binary128 quadruple-precision (128-bit) floating point number. |
| 321 | Quad = QuadS(128:15), |
| 322 | |
| 323 | /// 16-bit brain floating point number. |
| 324 | /// |
| 325 | /// This is not an IEEE kind but uses the same semantics. |
| 326 | BFloat = BFloatS(16:8), |
| 327 | |
| 328 | /// 8-bit floating point number with S1E5M2 bit layout. |
| 329 | /// |
| 330 | /// Follows IEEE-754 conventions with S1E5M2 bit layout as described in |
| 331 | /// <https://arxiv.org/abs/2209.05433>. |
| 332 | Float8E5M2 = Float8E5M2S(8:5), |
| 333 | |
| 334 | /// 8-bit floating point number with S1E4M3 bit layout. |
| 335 | /// |
| 336 | /// This type mostly follows IEEE-754 conventions with a |
| 337 | /// bit layout S1E4M3 as described in <https://arxiv.org/abs/2209.05433>. |
| 338 | /// Unlike IEEE-754 types, there are no infinity values, and NaN is |
| 339 | /// represented with the exponent and mantissa bits set to all 1s. |
| 340 | Float8E4M3FN = Float8E4M3FNS(8:4) { |
| 341 | const NONFINITE_BEHAVIOR: NonfiniteBehavior = NonfiniteBehavior::NanOnly; |
| 342 | }, |
| 343 | } |
| 344 | |
| 345 | // FIXME(eddyb) consider moving X87-specific logic to a "has explicit integer bit" |
| 346 | // associated `const` on `Semantics` itself. |
| 347 | /// Floating point semantics for [`X87DoubleExtended`]. |
| 348 | /// |
| 349 | /// See that type for more details. |
| 350 | pub struct X87DoubleExtendedS; |
| 351 | |
| 352 | /// 80-bit floating point number that uses IEEE extended precision semantics, as used |
| 353 | /// by x87 `long double`. |
| 354 | pub type X87DoubleExtended = IeeeFloat<X87DoubleExtendedS>; |
| 355 | |
| 356 | impl Semantics for X87DoubleExtendedS { |
| 357 | const BITS: usize = 80; |
| 358 | const EXP_BITS: usize = 15; |
| 359 | |
| 360 | // HACK(eddyb) overwriting `EXP_BITS` because its default is incorrect. |
| 361 | // FIMXE(eddyb) get rid of this by allowing `Semantics` to generically have |
| 362 | // the concept of "explicitly included integer bit", which is the main way |
| 363 | // in which the 80-bit X87 format differs from standard IEEE encodings. |
| 364 | const PRECISION: usize = 64; |
| 365 | |
| 366 | /// For x87 extended precision, we want to make a NaN, not a |
| 367 | /// pseudo-NaN. Maybe we should expose the ability to make |
| 368 | /// pseudo-NaNs? |
| 369 | const QNAN_SIGNIFICAND: Limb = 0b11 << (Self::PRECISION - 2); |
| 370 | |
| 371 | /// Integer bit is explicit in this format. Intel hardware (387 and later) |
| 372 | /// does not support these bit patterns: |
| 373 | /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity") |
| 374 | /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN") |
| 375 | /// exponent!=0 nor all 1's, integer bit 0 ("unnormal") |
| 376 | /// exponent = 0, integer bit 1 ("pseudodenormal") |
| 377 | /// At the moment, the first three are treated as NaNs, the last one as Normal. |
| 378 | #[inline ] |
| 379 | fn from_bits(bits: u128) -> IeeeFloat<Self> { |
| 380 | let sign = bits & (1 << (Self::BITS - 1)); |
| 381 | let exponent = ((bits & !sign) >> (Self::BITS - 1 - Self::EXP_BITS)) & ((1 << Self::EXP_BITS) - 1); |
| 382 | let mut r = IeeeFloat { |
| 383 | sig: [bits & ((1 << Self::PRECISION) - 1)], |
| 384 | // Convert the exponent from its bias representation to a signed integer. |
| 385 | exp: (exponent as ExpInt) + (Self::MIN_EXP - 1), |
| 386 | read_only_category_do_not_mutate: Category::Zero, |
| 387 | read_only_sign_do_not_mutate: sign != 0, |
| 388 | marker: PhantomData, |
| 389 | }; |
| 390 | |
| 391 | let integer_bit = r.sig[0] >> (Self::PRECISION - 1); |
| 392 | |
| 393 | let category = if r.exp == Self::MIN_EXP - 1 && r.sig == [0] { |
| 394 | Category::Zero |
| 395 | } else if r.exp == Self::MAX_EXP + 1 && r.sig == [1 << (Self::PRECISION - 1)] { |
| 396 | Category::Infinity |
| 397 | } else if r.exp == Self::MAX_EXP + 1 && r.sig != [1 << (Self::PRECISION - 1)] |
| 398 | || r.exp != Self::MAX_EXP + 1 && r.exp != Self::MIN_EXP - 1 && integer_bit == 0 |
| 399 | { |
| 400 | r.exp = Self::MAX_EXP + 1; |
| 401 | Category::NaN |
| 402 | } else { |
| 403 | if r.exp == Self::MIN_EXP - 1 { |
| 404 | // Denormal. |
| 405 | r.exp = Self::MIN_EXP; |
| 406 | } |
| 407 | Category::Normal |
| 408 | }; |
| 409 | |
| 410 | r.read_only_category_do_not_mutate = category; |
| 411 | |
| 412 | r |
| 413 | } |
| 414 | |
| 415 | #[inline ] |
| 416 | fn to_bits(x: IeeeFloat<Self>) -> u128 { |
| 417 | // Get integer bit from significand. |
| 418 | let integer_bit = sig::get_bit(&x.sig, Self::PRECISION - 1); |
| 419 | let mut significand = x.sig[0] & ((1 << Self::PRECISION) - 1); |
| 420 | let exponent = match x.category() { |
| 421 | Category::Normal => { |
| 422 | if x.exp == Self::MIN_EXP && !integer_bit { |
| 423 | // Denormal. |
| 424 | Self::MIN_EXP - 1 |
| 425 | } else { |
| 426 | x.exp |
| 427 | } |
| 428 | } |
| 429 | Category::Zero => { |
| 430 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
| 431 | significand = 0; |
| 432 | Self::MIN_EXP - 1 |
| 433 | } |
| 434 | Category::Infinity => { |
| 435 | // FIXME(eddyb) Maybe we should guarantee an invariant instead? |
| 436 | significand = 1 << (Self::PRECISION - 1); |
| 437 | Self::MAX_EXP + 1 |
| 438 | } |
| 439 | Category::NaN => Self::MAX_EXP + 1, |
| 440 | }; |
| 441 | |
| 442 | // Convert the exponent from a signed integer to its bias representation. |
| 443 | let exponent = (exponent - (Self::MIN_EXP - 1)) as u128; |
| 444 | |
| 445 | ((x.is_negative() as u128) << (Self::BITS - 1)) | (exponent << Self::PRECISION) | significand |
| 446 | } |
| 447 | } |
| 448 | |
| 449 | float_common_impls!(IeeeFloat<S>); |
| 450 | |
| 451 | impl<S: Semantics> PartialEq for IeeeFloat<S> { |
| 452 | fn eq(&self, rhs: &Self) -> bool { |
| 453 | self.partial_cmp(rhs) == Some(Ordering::Equal) |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | impl<S: Semantics> PartialOrd for IeeeFloat<S> { |
| 458 | fn partial_cmp(&self, rhs: &Self) -> Option<Ordering> { |
| 459 | match (self.category(), rhs.category()) { |
| 460 | (Category::NaN, _) | (_, Category::NaN) => None, |
| 461 | |
| 462 | (Category::Infinity, Category::Infinity) => Some((!self.is_negative()).cmp(&(!rhs.is_negative()))), |
| 463 | |
| 464 | (Category::Zero, Category::Zero) => Some(Ordering::Equal), |
| 465 | |
| 466 | (Category::Infinity, _) | (Category::Normal, Category::Zero) => { |
| 467 | Some((!self.is_negative()).cmp(&self.is_negative())) |
| 468 | } |
| 469 | |
| 470 | (_, Category::Infinity) | (Category::Zero, Category::Normal) => { |
| 471 | Some(rhs.is_negative().cmp(&(!rhs.is_negative()))) |
| 472 | } |
| 473 | |
| 474 | (Category::Normal, Category::Normal) => { |
| 475 | // Two normal numbers. Do they have the same sign? |
| 476 | Some((!self.is_negative()).cmp(&(!rhs.is_negative())).then_with(|| { |
| 477 | // Compare absolute values; invert result if negative. |
| 478 | let result = self.cmp_abs_normal(*rhs); |
| 479 | |
| 480 | if self.is_negative() { |
| 481 | result.reverse() |
| 482 | } else { |
| 483 | result |
| 484 | } |
| 485 | })) |
| 486 | } |
| 487 | } |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | impl<S: Semantics> Neg for IeeeFloat<S> { |
| 492 | type Output = Self; |
| 493 | fn neg(mut self) -> Self { |
| 494 | self.read_only_sign_do_not_mutate = !self.is_negative(); |
| 495 | self |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | /// Prints this value as a decimal string. |
| 500 | /// |
| 501 | /// \param precision The maximum number of digits of |
| 502 | /// precision to output. If there are fewer digits available, |
| 503 | /// zero padding will not be used unless the value is |
| 504 | /// integral and small enough to be expressed in |
| 505 | /// precision digits. 0 means to use the natural |
| 506 | /// precision of the number. |
| 507 | /// \param width The maximum number of zeros to |
| 508 | /// consider inserting before falling back to scientific |
| 509 | /// notation. 0 means to always use scientific notation. |
| 510 | /// |
| 511 | /// \param alternate Indicate whether to remove the trailing zero in |
| 512 | /// fraction part or not. Also setting this parameter to true forces |
| 513 | /// producing of output more similar to default printf behavior. |
| 514 | /// Specifically the lower e is used as exponent delimiter and exponent |
| 515 | /// always contains no less than two digits. |
| 516 | /// |
| 517 | /// Number precision width Result |
| 518 | /// ------ --------- ----- ------ |
| 519 | /// 1.01E+4 5 2 10100 |
| 520 | /// 1.01E+4 4 2 1.01E+4 |
| 521 | /// 1.01E+4 5 1 1.01E+4 |
| 522 | /// 1.01E-2 5 2 0.0101 |
| 523 | /// 1.01E-2 4 2 0.0101 |
| 524 | /// 1.01E-2 4 1 1.01E-2 |
| 525 | impl<S: Semantics> fmt::Display for IeeeFloat<S> { |
| 526 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 527 | let width = f.width().unwrap_or(3); |
| 528 | let alternate = f.alternate(); |
| 529 | |
| 530 | match self.category() { |
| 531 | Category::Infinity => { |
| 532 | if self.is_negative() { |
| 533 | return f.write_str("-Inf" ); |
| 534 | } else { |
| 535 | return f.write_str("+Inf" ); |
| 536 | } |
| 537 | } |
| 538 | |
| 539 | Category::NaN => return f.write_str("NaN" ), |
| 540 | |
| 541 | Category::Zero => { |
| 542 | if self.is_negative() { |
| 543 | f.write_char('-' )?; |
| 544 | } |
| 545 | |
| 546 | if width == 0 { |
| 547 | if alternate { |
| 548 | f.write_str("0.0" )?; |
| 549 | if let Some(n) = f.precision() { |
| 550 | for _ in 1..n { |
| 551 | f.write_char('0' )?; |
| 552 | } |
| 553 | } |
| 554 | f.write_str("e+00" )?; |
| 555 | } else { |
| 556 | f.write_str("0.0E+0" )?; |
| 557 | } |
| 558 | } else { |
| 559 | f.write_char('0' )?; |
| 560 | } |
| 561 | return Ok(()); |
| 562 | } |
| 563 | |
| 564 | Category::Normal => {} |
| 565 | } |
| 566 | |
| 567 | if self.is_negative() { |
| 568 | f.write_char('-' )?; |
| 569 | } |
| 570 | |
| 571 | // We use enough digits so the number can be round-tripped back to an |
| 572 | // APFloat. The formula comes from "How to Print Floating-Point Numbers |
| 573 | // Accurately" by Steele and White. |
| 574 | // FIXME: Using a formula based purely on the precision is conservative; |
| 575 | // we can print fewer digits depending on the actual value being printed. |
| 576 | |
| 577 | // precision = 2 + floor(S::PRECISION / lg_2(10)) |
| 578 | let precision = f.precision().unwrap_or(2 + S::PRECISION * 59 / 196); |
| 579 | |
| 580 | // Decompose the number into an APInt and an exponent. |
| 581 | let mut exp = self.exp - (S::PRECISION as ExpInt - 1); |
| 582 | let mut sig: DynPrecisionLimbVec = [self.sig[0]].into_iter().collect(); |
| 583 | |
| 584 | // Ignore trailing binary zeros. |
| 585 | let trailing_zeros = sig[0].trailing_zeros(); |
| 586 | let _: Loss = sig::shift_right(&mut sig, &mut exp, trailing_zeros as usize); |
| 587 | |
| 588 | // Change the exponent from 2^e to 10^e. |
| 589 | if exp == 0 { |
| 590 | // Nothing to do. |
| 591 | } else if exp > 0 { |
| 592 | // Just shift left. |
| 593 | let shift = exp as usize; |
| 594 | sig.resize(limbs_for_bits(S::PRECISION + shift), 0); |
| 595 | sig::shift_left(&mut sig, &mut exp, shift); |
| 596 | } else { |
| 597 | // exp < 0 |
| 598 | let mut texp = -exp as usize; |
| 599 | |
| 600 | // We transform this using the identity: |
| 601 | // (N)(2^-e) == (N)(5^e)(10^-e) |
| 602 | |
| 603 | // Multiply significand by 5^e. |
| 604 | // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8) |
| 605 | let mut sig_scratch = DynPrecisionLimbVec::new(); |
| 606 | let mut p5 = DynPrecisionLimbVec::new(); |
| 607 | let mut p5_scratch = DynPrecisionLimbVec::new(); |
| 608 | while texp != 0 { |
| 609 | if p5.is_empty() { |
| 610 | p5.push(5); |
| 611 | } else { |
| 612 | p5_scratch.resize(p5.len() * 2, 0); |
| 613 | let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); |
| 614 | while p5_scratch.last() == Some(&0) { |
| 615 | p5_scratch.pop(); |
| 616 | } |
| 617 | mem::swap(&mut p5, &mut p5_scratch); |
| 618 | } |
| 619 | if texp & 1 != 0 { |
| 620 | sig_scratch.resize(sig.len() + p5.len(), 0); |
| 621 | let _: Loss = sig::mul(&mut sig_scratch, &mut 0, &sig, &p5, (sig.len() + p5.len()) * LIMB_BITS); |
| 622 | while sig_scratch.last() == Some(&0) { |
| 623 | sig_scratch.pop(); |
| 624 | } |
| 625 | mem::swap(&mut sig, &mut sig_scratch); |
| 626 | } |
| 627 | texp >>= 1; |
| 628 | } |
| 629 | } |
| 630 | |
| 631 | // Fill the buffer. |
| 632 | let mut buffer = smallvec::SmallVec::<[u8; 64]>::new(); |
| 633 | |
| 634 | // Ignore digits from the significand until it is no more |
| 635 | // precise than is required for the desired precision. |
| 636 | // 196/59 is a very slight overestimate of lg_2(10). |
| 637 | let required = (precision * 196 + 58) / 59; |
| 638 | let mut discard_digits = sig::omsb(&sig).saturating_sub(required) * 59 / 196; |
| 639 | let mut in_trail = true; |
| 640 | while !sig.is_empty() { |
| 641 | // Perform short division by 10 to extract the rightmost digit. |
| 642 | // rem <- sig % 10 |
| 643 | // sig <- sig / 10 |
| 644 | let mut rem = 0; |
| 645 | |
| 646 | // Use 64-bit division and remainder, with 32-bit chunks from sig. |
| 647 | sig::each_chunk(&mut sig, 32, |chunk| { |
| 648 | let chunk = chunk as u32; |
| 649 | let combined = ((rem as u64) << 32) | (chunk as u64); |
| 650 | rem = (combined % 10) as u8; |
| 651 | (combined / 10) as u32 as Limb |
| 652 | }); |
| 653 | |
| 654 | // Reduce the sigificand to avoid wasting time dividing 0's. |
| 655 | while sig.last() == Some(&0) { |
| 656 | sig.pop(); |
| 657 | } |
| 658 | |
| 659 | let digit = rem; |
| 660 | |
| 661 | // Ignore digits we don't need. |
| 662 | if discard_digits > 0 { |
| 663 | discard_digits -= 1; |
| 664 | exp += 1; |
| 665 | continue; |
| 666 | } |
| 667 | |
| 668 | // Drop trailing zeros. |
| 669 | if in_trail && digit == 0 { |
| 670 | exp += 1; |
| 671 | } else { |
| 672 | in_trail = false; |
| 673 | buffer.push(b'0' + digit); |
| 674 | } |
| 675 | } |
| 676 | |
| 677 | assert!(!buffer.is_empty(), "no characters in buffer!" ); |
| 678 | |
| 679 | // Drop down to precision. |
| 680 | // FIXME: don't do more precise calculations above than are required. |
| 681 | if buffer.len() > precision { |
| 682 | // The most significant figures are the last ones in the buffer. |
| 683 | let mut first_sig = buffer.len() - precision; |
| 684 | |
| 685 | // Round. |
| 686 | // FIXME: this probably shouldn't use 'round half up'. |
| 687 | |
| 688 | // Rounding down is just a truncation, except we also want to drop |
| 689 | // trailing zeros from the new result. |
| 690 | if buffer[first_sig - 1] < b'5' { |
| 691 | while first_sig < buffer.len() && buffer[first_sig] == b'0' { |
| 692 | first_sig += 1; |
| 693 | } |
| 694 | } else { |
| 695 | // Rounding up requires a decimal add-with-carry. If we continue |
| 696 | // the carry, the newly-introduced zeros will just be truncated. |
| 697 | for x in &mut buffer[first_sig..] { |
| 698 | if *x == b'9' { |
| 699 | first_sig += 1; |
| 700 | } else { |
| 701 | *x += 1; |
| 702 | break; |
| 703 | } |
| 704 | } |
| 705 | } |
| 706 | |
| 707 | exp += first_sig as ExpInt; |
| 708 | buffer.drain(..first_sig); |
| 709 | |
| 710 | // If we carried through, we have exactly one digit of precision. |
| 711 | if buffer.is_empty() { |
| 712 | buffer.push(b'1' ); |
| 713 | } |
| 714 | } |
| 715 | |
| 716 | let digits = buffer.len(); |
| 717 | |
| 718 | // Check whether we should use scientific notation. |
| 719 | let scientific = if width == 0 { |
| 720 | true |
| 721 | } else { |
| 722 | if exp >= 0 { |
| 723 | // 765e3 --> 765000 |
| 724 | // ^^^ |
| 725 | // But we shouldn't make the number look more precise than it is. |
| 726 | exp as usize > width || digits + exp as usize > precision |
| 727 | } else { |
| 728 | // Power of the most significant digit. |
| 729 | let msd = exp + (digits - 1) as ExpInt; |
| 730 | if msd >= 0 { |
| 731 | // 765e-2 == 7.65 |
| 732 | false |
| 733 | } else { |
| 734 | // 765e-5 == 0.00765 |
| 735 | // ^ ^^ |
| 736 | -msd as usize > width |
| 737 | } |
| 738 | } |
| 739 | }; |
| 740 | |
| 741 | // Scientific formatting is pretty straightforward. |
| 742 | if scientific { |
| 743 | exp += digits as ExpInt - 1; |
| 744 | |
| 745 | f.write_char(buffer[digits - 1] as char)?; |
| 746 | f.write_char('.' )?; |
| 747 | let truncate_zero = !alternate; |
| 748 | if digits == 1 && truncate_zero { |
| 749 | f.write_char('0' )?; |
| 750 | } else { |
| 751 | for &d in buffer[..digits - 1].iter().rev() { |
| 752 | f.write_char(d as char)?; |
| 753 | } |
| 754 | } |
| 755 | // Fill with zeros up to precision. |
| 756 | if !truncate_zero && precision > digits - 1 { |
| 757 | for _ in 0..precision - digits + 1 { |
| 758 | f.write_char('0' )?; |
| 759 | } |
| 760 | } |
| 761 | // For alternate we use lower 'e'. |
| 762 | f.write_char(if alternate { 'e' } else { 'E' })?; |
| 763 | |
| 764 | // Exponent always at least two digits if we do not truncate zeros. |
| 765 | if truncate_zero { |
| 766 | write!(f, " {:+}" , exp)?; |
| 767 | } else { |
| 768 | write!(f, " {:+03}" , exp)?; |
| 769 | } |
| 770 | |
| 771 | return Ok(()); |
| 772 | } |
| 773 | |
| 774 | // Non-scientific, positive exponents. |
| 775 | if exp >= 0 { |
| 776 | for &d in buffer.iter().rev() { |
| 777 | f.write_char(d as char)?; |
| 778 | } |
| 779 | for _ in 0..exp { |
| 780 | f.write_char('0' )?; |
| 781 | } |
| 782 | return Ok(()); |
| 783 | } |
| 784 | |
| 785 | // Non-scientific, negative exponents. |
| 786 | let unit_place = -exp as usize; |
| 787 | if unit_place < digits { |
| 788 | for &d in buffer[unit_place..].iter().rev() { |
| 789 | f.write_char(d as char)?; |
| 790 | } |
| 791 | f.write_char('.' )?; |
| 792 | for &d in buffer[..unit_place].iter().rev() { |
| 793 | f.write_char(d as char)?; |
| 794 | } |
| 795 | } else { |
| 796 | f.write_str("0." )?; |
| 797 | for _ in digits..unit_place { |
| 798 | f.write_char('0' )?; |
| 799 | } |
| 800 | for &d in buffer.iter().rev() { |
| 801 | f.write_char(d as char)?; |
| 802 | } |
| 803 | } |
| 804 | |
| 805 | Ok(()) |
| 806 | } |
| 807 | } |
| 808 | |
| 809 | impl<S: Semantics> fmt::Debug for IeeeFloat<S> { |
| 810 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 811 | write!( |
| 812 | f, |
| 813 | " {}( {:?} | {}{:?} * 2^ {})" , |
| 814 | self, |
| 815 | self.category(), |
| 816 | if self.is_negative() { "-" } else { "+" }, |
| 817 | self.sig, |
| 818 | self.exp |
| 819 | ) |
| 820 | } |
| 821 | } |
| 822 | |
| 823 | // HACK(eddyb) this logic is duplicated throughout the original C++ code, |
| 824 | // but it's a bit too long to keep repeating in the Rust port for all ops. |
| 825 | // FIXME(eddyb) find a better name/organization for all of this functionality |
| 826 | // (`IeeeDefaultExceptionHandling` doesn't have a counterpart in the C++ code). |
| 827 | struct IeeeDefaultExceptionHandling; |
| 828 | impl IeeeDefaultExceptionHandling { |
| 829 | fn result_from_nan<S: Semantics>(mut r: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> { |
| 830 | assert!(r.is_nan()); |
| 831 | |
| 832 | let status = if r.is_signaling() { |
| 833 | // [IEEE Std 754-2008 6.2]: |
| 834 | // Under default exception handling, any operation signaling an invalid |
| 835 | // operation exception and for which a floating-point result is to be |
| 836 | // delivered shall deliver a quiet NaN. |
| 837 | let [sig] = &mut r.sig; |
| 838 | *sig |= if S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND { |
| 839 | // HACK(eddyb) remain bug-compatible with the original C++ code |
| 840 | // which doesn't appear to attempt avoiding creating pseudo-NaNs |
| 841 | // (see https://github.com/llvm/llvm-project/issues/63938). |
| 842 | S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK |
| 843 | } else { |
| 844 | S::QNAN_SIGNIFICAND |
| 845 | }; |
| 846 | |
| 847 | // [IEEE Std 754-2008 6.2]: |
| 848 | // Signaling NaNs shall be reserved operands that, under default exception |
| 849 | // handling, signal the invalid operation exception(see 7.2) for every |
| 850 | // general-computational and signaling-computational operation except for |
| 851 | // the conversions described in 5.12. |
| 852 | Status::INVALID_OP |
| 853 | } else { |
| 854 | // [IEEE Std 754-2008 6.2]: |
| 855 | // For an operation with quiet NaN inputs, other than maximum and minimum |
| 856 | // operations, if a floating-point result is to be delivered the result |
| 857 | // shall be a quiet NaN which should be one of the input NaNs. |
| 858 | // ... |
| 859 | // Every general-computational and quiet-computational operation involving |
| 860 | // one or more input NaNs, none of them signaling, shall signal no |
| 861 | // exception, except fusedMultiplyAdd might signal the invalid operation |
| 862 | // exception(see 7.2). |
| 863 | Status::OK |
| 864 | }; |
| 865 | status.and(r) |
| 866 | } |
| 867 | |
| 868 | fn binop_result_from_either_nan<S: Semantics>(a: IeeeFloat<S>, b: IeeeFloat<S>) -> StatusAnd<IeeeFloat<S>> { |
| 869 | let r = match (a.category(), b.category()) { |
| 870 | (Category::NaN, _) => a, |
| 871 | (_, Category::NaN) => b, |
| 872 | _ => unreachable!(), |
| 873 | }; |
| 874 | let mut status_and_r = Self::result_from_nan(r); |
| 875 | if b.is_signaling() { |
| 876 | status_and_r.status |= Status::INVALID_OP; |
| 877 | } |
| 878 | status_and_r |
| 879 | } |
| 880 | } |
| 881 | |
| 882 | impl<S: Semantics> IeeeFloat<S> { |
| 883 | // HACK(eddyb) allow `Self::qnan` to be used from `IeeeFloat::NAN`. |
| 884 | // FIXME(eddyb) move back to the trait impl when that can be `const fn`. |
| 885 | const fn qnan(payload: Option<u128>) -> Self { |
| 886 | let sig = [S::NAN_SIGNIFICAND_BASE |
| 887 | | S::QNAN_SIGNIFICAND |
| 888 | | match payload { |
| 889 | // Zero out the excess bits of the significand. |
| 890 | Some(payload) => payload & S::NAN_PAYLOAD_MASK, |
| 891 | None => 0, |
| 892 | }]; |
| 893 | |
| 894 | let exp = match S::NONFINITE_BEHAVIOR { |
| 895 | NonfiniteBehavior::IEEE754 => S::MAX_EXP + 1, |
| 896 | NonfiniteBehavior::NanOnly => S::MAX_EXP, |
| 897 | }; |
| 898 | |
| 899 | IeeeFloat { |
| 900 | sig, |
| 901 | exp, |
| 902 | read_only_category_do_not_mutate: Category::NaN, |
| 903 | read_only_sign_do_not_mutate: false, |
| 904 | marker: PhantomData, |
| 905 | } |
| 906 | } |
| 907 | } |
| 908 | |
| 909 | impl<S: Semantics> Float for IeeeFloat<S> { |
| 910 | const BITS: usize = S::BITS; |
| 911 | const PRECISION: usize = S::PRECISION; |
| 912 | const MAX_EXP: ExpInt = S::MAX_EXP; |
| 913 | const MIN_EXP: ExpInt = S::MIN_EXP; |
| 914 | |
| 915 | const ZERO: Self = IeeeFloat { |
| 916 | sig: [0], |
| 917 | exp: S::MIN_EXP - 1, |
| 918 | read_only_category_do_not_mutate: Category::Zero, |
| 919 | read_only_sign_do_not_mutate: false, |
| 920 | marker: PhantomData, |
| 921 | }; |
| 922 | |
| 923 | const INFINITY: Self = match S::NONFINITE_BEHAVIOR { |
| 924 | NonfiniteBehavior::IEEE754 => IeeeFloat { |
| 925 | sig: [0], |
| 926 | exp: S::MAX_EXP + 1, |
| 927 | read_only_category_do_not_mutate: Category::Infinity, |
| 928 | read_only_sign_do_not_mutate: false, |
| 929 | marker: PhantomData, |
| 930 | }, |
| 931 | |
| 932 | // There is no Inf, so make NaN instead. |
| 933 | NonfiniteBehavior::NanOnly => Self::NAN, |
| 934 | }; |
| 935 | |
| 936 | const NAN: Self = Self::qnan(None); |
| 937 | |
| 938 | fn qnan(payload: Option<u128>) -> Self { |
| 939 | // NOTE(eddyb) this is not a recursive self-call, but rather it calls |
| 940 | // the `const fn` inherent mehtod (see above). |
| 941 | Self::qnan(payload) |
| 942 | } |
| 943 | |
| 944 | fn snan(payload: Option<u128>) -> Self { |
| 945 | let mut snan = Self::qnan(payload); |
| 946 | |
| 947 | let [sig] = &mut snan.sig; |
| 948 | |
| 949 | // We always have to clear the QNaN bit to make it an SNaN. |
| 950 | *sig &= !(S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK); |
| 951 | |
| 952 | // If there are no bits set in the payload, we have to set |
| 953 | // *something* to make it a NaN instead of an infinity; |
| 954 | // conventionally, this is the next bit down from the QNaN bit. |
| 955 | if *sig & S::NAN_PAYLOAD_MASK == 0 { |
| 956 | *sig |= (S::QNAN_SIGNIFICAND & S::NAN_PAYLOAD_MASK) >> 1; |
| 957 | } |
| 958 | |
| 959 | snan |
| 960 | } |
| 961 | |
| 962 | fn largest() -> Self { |
| 963 | // We want (in interchange format): |
| 964 | // exponent = 1..10 |
| 965 | // significand = 1..1 |
| 966 | IeeeFloat { |
| 967 | sig: [((1 << S::PRECISION) - 1) |
| 968 | & match S::NONFINITE_BEHAVIOR { |
| 969 | // The largest number by magnitude in our format will be the floating point |
| 970 | // number with maximum exponent and with significand that is all ones. |
| 971 | NonfiniteBehavior::IEEE754 => !0, |
| 972 | |
| 973 | // The largest number by magnitude in our format will be the floating point |
| 974 | // number with maximum exponent and with significand that is all ones except |
| 975 | // the LSB. |
| 976 | NonfiniteBehavior::NanOnly => !1, |
| 977 | }], |
| 978 | exp: S::MAX_EXP, |
| 979 | read_only_category_do_not_mutate: Category::Normal, |
| 980 | read_only_sign_do_not_mutate: false, |
| 981 | marker: PhantomData, |
| 982 | } |
| 983 | } |
| 984 | |
| 985 | // We want (in interchange format): |
| 986 | // exponent = 0..0 |
| 987 | // significand = 0..01 |
| 988 | const SMALLEST: Self = IeeeFloat { |
| 989 | sig: [1], |
| 990 | exp: S::MIN_EXP, |
| 991 | read_only_category_do_not_mutate: Category::Normal, |
| 992 | read_only_sign_do_not_mutate: false, |
| 993 | marker: PhantomData, |
| 994 | }; |
| 995 | |
| 996 | fn smallest_normalized() -> Self { |
| 997 | // We want (in interchange format): |
| 998 | // exponent = 0..0 |
| 999 | // significand = 10..0 |
| 1000 | IeeeFloat { |
| 1001 | sig: [1 << (S::PRECISION - 1)], |
| 1002 | exp: S::MIN_EXP, |
| 1003 | read_only_category_do_not_mutate: Category::Normal, |
| 1004 | read_only_sign_do_not_mutate: false, |
| 1005 | marker: PhantomData, |
| 1006 | } |
| 1007 | } |
| 1008 | |
| 1009 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 1010 | let status = match (self.category(), rhs.category()) { |
| 1011 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1012 | return IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs); |
| 1013 | } |
| 1014 | |
| 1015 | (Category::Infinity, Category::Infinity) => { |
| 1016 | // Differently signed infinities can only be validly |
| 1017 | // subtracted. |
| 1018 | if self.is_negative() != rhs.is_negative() { |
| 1019 | self = Self::NAN; |
| 1020 | Status::INVALID_OP |
| 1021 | } else { |
| 1022 | Status::OK |
| 1023 | } |
| 1024 | } |
| 1025 | |
| 1026 | // Sign may depend on rounding mode; handled below. |
| 1027 | (_, Category::Zero) | (Category::Infinity, Category::Normal) => Status::OK, |
| 1028 | |
| 1029 | (Category::Zero, _) | (_, Category::Infinity) => { |
| 1030 | self = rhs; |
| 1031 | Status::OK |
| 1032 | } |
| 1033 | |
| 1034 | (Category::Normal, Category::Normal) => { |
| 1035 | let mut sign = self.is_negative(); |
| 1036 | let loss = sig::add_or_sub( |
| 1037 | &mut self.sig, |
| 1038 | &mut self.exp, |
| 1039 | &mut sign, |
| 1040 | &mut [rhs.sig[0]], |
| 1041 | rhs.exp, |
| 1042 | rhs.is_negative(), |
| 1043 | ); |
| 1044 | self = self.with_sign(sign); |
| 1045 | |
| 1046 | let status; |
| 1047 | self = unpack!(status=, self.normalize(round, loss)); |
| 1048 | |
| 1049 | // Can only be zero if we lost no fraction. |
| 1050 | assert!(self.category() != Category::Zero || loss == Loss::ExactlyZero); |
| 1051 | |
| 1052 | status |
| 1053 | } |
| 1054 | }; |
| 1055 | |
| 1056 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
| 1057 | // positive zero unless rounding to minus infinity, except that |
| 1058 | // adding two like-signed zeroes gives that zero. |
| 1059 | if self.category() == Category::Zero |
| 1060 | && (rhs.category() != Category::Zero || self.is_negative() != rhs.is_negative()) |
| 1061 | { |
| 1062 | self = self.with_sign(round == Round::TowardNegative); |
| 1063 | } |
| 1064 | |
| 1065 | status.and(self) |
| 1066 | } |
| 1067 | |
| 1068 | // NOTE(eddyb) we can't rely on the `sub_r` method default implementation |
| 1069 | // because NaN handling needs the original `rhs` (i.e. without negation). |
| 1070 | fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 1071 | match (self.category(), rhs.category()) { |
| 1072 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1073 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
| 1074 | } |
| 1075 | |
| 1076 | _ => self.add_r(-rhs, round), |
| 1077 | } |
| 1078 | } |
| 1079 | |
| 1080 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 1081 | self = self.negate_if(rhs.is_negative()); |
| 1082 | |
| 1083 | match (self.category(), rhs.category()) { |
| 1084 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1085 | self = self.negate_if(rhs.is_negative()); // restore the original sign |
| 1086 | |
| 1087 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
| 1088 | } |
| 1089 | |
| 1090 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => { |
| 1091 | Status::INVALID_OP.and(Self::NAN) |
| 1092 | } |
| 1093 | |
| 1094 | (Category::Infinity, _) | (_, Category::Infinity) => Status::OK.and(Self::INFINITY.copy_sign(self)), |
| 1095 | |
| 1096 | (Category::Zero, _) | (_, Category::Zero) => Status::OK.and(Self::ZERO.copy_sign(self)), |
| 1097 | |
| 1098 | (Category::Normal, Category::Normal) => { |
| 1099 | self.exp += rhs.exp; |
| 1100 | let mut wide_sig = [0; 2]; |
| 1101 | let loss = sig::mul(&mut wide_sig, &mut self.exp, &self.sig, &rhs.sig, S::PRECISION); |
| 1102 | self.sig = [wide_sig[0]]; |
| 1103 | let mut status; |
| 1104 | self = unpack!(status=, self.normalize(round, loss)); |
| 1105 | if loss != Loss::ExactlyZero { |
| 1106 | status |= Status::INEXACT; |
| 1107 | } |
| 1108 | status.and(self) |
| 1109 | } |
| 1110 | } |
| 1111 | } |
| 1112 | |
| 1113 | fn mul_add_r(mut self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { |
| 1114 | // If and only if all arguments are normal do we need to do an |
| 1115 | // extended-precision calculation. |
| 1116 | if !self.is_finite_non_zero() || !multiplicand.is_finite_non_zero() || !addend.is_finite() { |
| 1117 | let mut status; |
| 1118 | if self.is_finite_non_zero() && multiplicand.is_finite_non_zero() { |
| 1119 | // HACK(eddyb) this corresponds to the case where the C++ code |
| 1120 | // (`multiplySpecials`) is effectively a noop: no multiplication |
| 1121 | // is actually performed, because we're really only here to handle |
| 1122 | // the "infinite/NaN `addend`" special-case, which needs to ignore |
| 1123 | // the "finite * finite" multiplication entirely, instead of letting |
| 1124 | // it e.g. overflow into infinity (and trample over `addend`). |
| 1125 | assert!(!addend.is_finite()); |
| 1126 | status = Status::OK; |
| 1127 | } else { |
| 1128 | self = unpack!(status=, self.mul_r(multiplicand, round)); |
| 1129 | } |
| 1130 | |
| 1131 | // FS can only be Status::OK or Status::INVALID_OP. There is no more work |
| 1132 | // to do in the latter case. The IEEE-754R standard says it is |
| 1133 | // implementation-defined in this case whether, if ADDEND is a |
| 1134 | // quiet NaN, we raise invalid op; this implementation does so. |
| 1135 | // |
| 1136 | // If we need to do the addition we can do so with normal |
| 1137 | // precision. |
| 1138 | if status == Status::OK { |
| 1139 | self = unpack!(status=, self.add_r(addend, round)); |
| 1140 | } |
| 1141 | return status.and(self); |
| 1142 | } |
| 1143 | |
| 1144 | // Post-multiplication sign, before addition. |
| 1145 | self = self.negate_if(multiplicand.is_negative()); |
| 1146 | |
| 1147 | // Allocate space for twice as many bits as the original significand, plus one |
| 1148 | // extra bit for the addition to overflow into. |
| 1149 | assert!(limbs_for_bits(S::PRECISION * 2 + 1) <= 2); |
| 1150 | let mut wide_sig = sig::widening_mul(self.sig[0], multiplicand.sig[0]); |
| 1151 | |
| 1152 | let mut loss = Loss::ExactlyZero; |
| 1153 | let mut omsb = sig::omsb(&wide_sig); |
| 1154 | self.exp += multiplicand.exp; |
| 1155 | |
| 1156 | // Assume the operands involved in the multiplication are single-precision |
| 1157 | // FP, and the two multiplicants are: |
| 1158 | // lhs = a23 . a22 ... a0 * 2^e1 |
| 1159 | // rhs = b23 . b22 ... b0 * 2^e2 |
| 1160 | // the result of multiplication is: |
| 1161 | // lhs = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) |
| 1162 | // Note that there are three significant bits at the left-hand side of the |
| 1163 | // radix point: two for the multiplication, and an overflow bit for the |
| 1164 | // addition (that will always be zero at this point). Move the radix point |
| 1165 | // toward left by two bits, and adjust exponent accordingly. |
| 1166 | self.exp += 2; |
| 1167 | |
| 1168 | if addend.is_non_zero() { |
| 1169 | // Normalize our MSB to one below the top bit to allow for overflow. |
| 1170 | let ext_precision = 2 * S::PRECISION + 1; |
| 1171 | if omsb != ext_precision - 1 { |
| 1172 | assert!(ext_precision > omsb); |
| 1173 | sig::shift_left(&mut wide_sig, &mut self.exp, (ext_precision - 1) - omsb); |
| 1174 | } |
| 1175 | |
| 1176 | // The intermediate result of the multiplication has "2 * S::PRECISION" |
| 1177 | // signicant bit; adjust the addend to be consistent with mul result. |
| 1178 | let mut ext_addend_sig = [addend.sig[0], 0]; |
| 1179 | |
| 1180 | // Extend the addend significand to ext_precision - 1. This guarantees |
| 1181 | // that the high bit of the significand is zero (same as wide_sig), |
| 1182 | // so the addition will overflow (if it does overflow at all) into the top bit. |
| 1183 | sig::shift_left(&mut ext_addend_sig, &mut 0, ext_precision - 1 - S::PRECISION); |
| 1184 | |
| 1185 | let mut sign = self.is_negative(); |
| 1186 | loss = sig::add_or_sub( |
| 1187 | &mut wide_sig, |
| 1188 | &mut self.exp, |
| 1189 | &mut sign, |
| 1190 | &mut ext_addend_sig, |
| 1191 | addend.exp + 1, |
| 1192 | addend.is_negative(), |
| 1193 | ); |
| 1194 | self = self.with_sign(sign); |
| 1195 | |
| 1196 | omsb = sig::omsb(&wide_sig); |
| 1197 | } |
| 1198 | |
| 1199 | // Convert the result having "2 * S::PRECISION" significant-bits back to the one |
| 1200 | // having "S::PRECISION" significant-bits. First, move the radix point from |
| 1201 | // poision "2*S::PRECISION - 1" to "S::PRECISION - 1". The exponent need to be |
| 1202 | // adjusted by "2*S::PRECISION - 1" - "S::PRECISION - 1" = "S::PRECISION". |
| 1203 | self.exp -= S::PRECISION as ExpInt + 1; |
| 1204 | |
| 1205 | // In case MSB resides at the left-hand side of radix point, shift the |
| 1206 | // mantissa right by some amount to make sure the MSB reside right before |
| 1207 | // the radix point (i.e. "MSB . rest-significant-bits"). |
| 1208 | if omsb > S::PRECISION { |
| 1209 | let bits = omsb - S::PRECISION; |
| 1210 | loss = sig::shift_right(&mut wide_sig, &mut self.exp, bits).combine(loss); |
| 1211 | } |
| 1212 | |
| 1213 | self.sig[0] = wide_sig[0]; |
| 1214 | |
| 1215 | let mut status; |
| 1216 | self = unpack!(status=, self.normalize(round, loss)); |
| 1217 | if loss != Loss::ExactlyZero { |
| 1218 | status |= Status::INEXACT; |
| 1219 | } |
| 1220 | |
| 1221 | // If two numbers add (exactly) to zero, IEEE 754 decrees it is a |
| 1222 | // positive zero unless rounding to minus infinity, except that |
| 1223 | // adding two like-signed zeroes gives that zero. |
| 1224 | if self.category() == Category::Zero |
| 1225 | && !status.intersects(Status::UNDERFLOW) |
| 1226 | && self.is_negative() != addend.is_negative() |
| 1227 | { |
| 1228 | self = self.with_sign(round == Round::TowardNegative); |
| 1229 | } |
| 1230 | |
| 1231 | status.and(self) |
| 1232 | } |
| 1233 | |
| 1234 | fn div_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 1235 | self = self.negate_if(rhs.is_negative()); |
| 1236 | |
| 1237 | match (self.category(), rhs.category()) { |
| 1238 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1239 | self = self.negate_if(rhs.is_negative()); // restore the original sign |
| 1240 | |
| 1241 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
| 1242 | } |
| 1243 | |
| 1244 | (Category::Infinity, Category::Infinity) | (Category::Zero, Category::Zero) => { |
| 1245 | Status::INVALID_OP.and(Self::NAN) |
| 1246 | } |
| 1247 | |
| 1248 | (Category::Infinity | Category::Zero, _) => Status::OK.and(self), |
| 1249 | |
| 1250 | (_, Category::Infinity) => Status::OK.and(Self::ZERO.copy_sign(self)), |
| 1251 | |
| 1252 | (_, Category::Zero) => Status::DIV_BY_ZERO.and(Self::INFINITY.copy_sign(self)), |
| 1253 | |
| 1254 | (Category::Normal, Category::Normal) => { |
| 1255 | self.exp -= rhs.exp; |
| 1256 | let dividend = self.sig[0]; |
| 1257 | let loss = sig::div(&mut self.sig, &mut self.exp, &mut [dividend], &mut [rhs.sig[0]], S::PRECISION); |
| 1258 | let mut status; |
| 1259 | self = unpack!(status=, self.normalize(round, loss)); |
| 1260 | if loss != Loss::ExactlyZero { |
| 1261 | status |= Status::INEXACT; |
| 1262 | } |
| 1263 | status.and(self) |
| 1264 | } |
| 1265 | } |
| 1266 | } |
| 1267 | |
| 1268 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { |
| 1269 | match (self.category(), rhs.category()) { |
| 1270 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1271 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
| 1272 | } |
| 1273 | |
| 1274 | (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), |
| 1275 | |
| 1276 | (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), |
| 1277 | |
| 1278 | (Category::Normal, Category::Normal) => { |
| 1279 | let mut status; |
| 1280 | |
| 1281 | let mut x = self; |
| 1282 | let mut p = rhs; |
| 1283 | |
| 1284 | // Make sure the current value is less than twice the denom. If the addition |
| 1285 | // did not succeed (an overflow has happened), which means that the finite |
| 1286 | // value we currently posses must be less than twice the denom (as we are |
| 1287 | // using the same semantics). |
| 1288 | let p2 = unpack!(status=, p + p); |
| 1289 | if status == Status::OK { |
| 1290 | x = unpack!(status=, x.c_fmod(p2)); |
| 1291 | assert_eq!(status, Status::OK); |
| 1292 | } |
| 1293 | |
| 1294 | // Lets work with absolute numbers. |
| 1295 | p = p.abs(); |
| 1296 | x = x.abs(); |
| 1297 | |
| 1298 | // |
| 1299 | // To calculate the remainder we use the following scheme. |
| 1300 | // |
| 1301 | // The remainder is defained as follows: |
| 1302 | // |
| 1303 | // remainder = numer - rquot * denom = x - r * p |
| 1304 | // |
| 1305 | // Where r is the result of: x/p, rounded toward the nearest integral value |
| 1306 | // (with halfway cases rounded toward the even number). |
| 1307 | // |
| 1308 | // Currently, (after x mod 2p): |
| 1309 | // r is the number of 2p's present inside x, which is inherently, an even |
| 1310 | // number of p's. |
| 1311 | // |
| 1312 | // We may split the remaining calculation into 4 options: |
| 1313 | // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
| 1314 | // - if x == 0.5p then we round to the nearest even number which is 0, and we |
| 1315 | // are done as well. |
| 1316 | // - if 0.5p < x < p then we round to nearest number which is 1, and we have |
| 1317 | // to subtract 1p at least once. |
| 1318 | // - if x >= p then we must subtract p at least once, as x must be a |
| 1319 | // remainder. |
| 1320 | // |
| 1321 | // By now, we were done, or we added 1 to r, which in turn, now an odd number. |
| 1322 | // |
| 1323 | // We can now split the remaining calculation to the following 3 options: |
| 1324 | // - if x < 0.5p then we round to the nearest number with is 0, and are done. |
| 1325 | // - if x == 0.5p then we round to the nearest even number. As r is odd, we |
| 1326 | // must round up to the next even number. so we must subtract p once more. |
| 1327 | // - if x > 0.5p (and inherently x < p) then we must round r up to the next |
| 1328 | // integral, and subtract p once more. |
| 1329 | // |
| 1330 | |
| 1331 | // Return `x * 2` at no loss of precision (i.e. no overflow). |
| 1332 | // |
| 1333 | // HACK(eddyb) this may seem a bit sketchy because it can return |
| 1334 | // values that `normalize` would've replaced with `overflow_result` |
| 1335 | // (e.g. overflowing to infinity), but the result is only used for |
| 1336 | // comparisons, where both sides of such comparison can be seen |
| 1337 | // as transiently having a larger *effective* exponent range. |
| 1338 | let lossless_2x = |mut x: Self| { |
| 1339 | x.exp += 1; |
| 1340 | |
| 1341 | if x.exp >= Self::MAX_EXP { |
| 1342 | // HACK(eddyb) skip lossy `normalize` (see above). |
| 1343 | } else { |
| 1344 | let status; |
| 1345 | x = unpack!(status=, x.normalize(Round::NearestTiesToEven, Loss::ExactlyZero)); |
| 1346 | assert_eq!(status, Status::OK); |
| 1347 | } |
| 1348 | |
| 1349 | x |
| 1350 | }; |
| 1351 | |
| 1352 | if lossless_2x(x) > p { |
| 1353 | x = unpack!(status=, x - p); |
| 1354 | assert_eq!(status, Status::OK); |
| 1355 | |
| 1356 | if lossless_2x(x) >= p { |
| 1357 | x = unpack!(status=, x - p); |
| 1358 | assert_eq!(status, Status::OK); |
| 1359 | } |
| 1360 | } |
| 1361 | |
| 1362 | if x.is_zero() { |
| 1363 | Status::OK.and(x.copy_sign(self)) // IEEE754 requires this |
| 1364 | } else { |
| 1365 | Status::OK.and(x.negate_if(self.is_negative())) |
| 1366 | } |
| 1367 | } |
| 1368 | } |
| 1369 | } |
| 1370 | |
| 1371 | fn c_fmod(mut self, rhs: Self) -> StatusAnd<Self> { |
| 1372 | match (self.category(), rhs.category()) { |
| 1373 | (Category::NaN, _) | (_, Category::NaN) => { |
| 1374 | IeeeDefaultExceptionHandling::binop_result_from_either_nan(self, rhs) |
| 1375 | } |
| 1376 | |
| 1377 | (Category::Infinity, _) | (_, Category::Zero) => Status::INVALID_OP.and(Self::NAN), |
| 1378 | |
| 1379 | (Category::Zero, _) | (_, Category::Infinity) => Status::OK.and(self), |
| 1380 | |
| 1381 | (Category::Normal, Category::Normal) => { |
| 1382 | let orig = self; |
| 1383 | |
| 1384 | while self.is_finite_non_zero() |
| 1385 | && rhs.is_finite_non_zero() |
| 1386 | && self.cmp_abs_normal(rhs) != Ordering::Less |
| 1387 | { |
| 1388 | let exp = self.ilogb() - rhs.ilogb(); |
| 1389 | let mut v = rhs.scalbn(exp); |
| 1390 | // `v` can overflow to NaN with `NonfiniteBehavior::NanOnly`, so explicitly |
| 1391 | // check for it. |
| 1392 | if v.is_nan() || self.cmp_abs_normal(v) == Ordering::Less { |
| 1393 | v = rhs.scalbn(exp - 1); |
| 1394 | } |
| 1395 | v = v.copy_sign(self); |
| 1396 | |
| 1397 | let status; |
| 1398 | self = unpack!(status=, self - v); |
| 1399 | assert_eq!(status, Status::OK); |
| 1400 | } |
| 1401 | if self.is_zero() { |
| 1402 | self = self.copy_sign(orig); |
| 1403 | } |
| 1404 | Status::OK.and(self) |
| 1405 | } |
| 1406 | } |
| 1407 | } |
| 1408 | |
| 1409 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { |
| 1410 | match self.category() { |
| 1411 | Category::NaN => IeeeDefaultExceptionHandling::result_from_nan(self), |
| 1412 | |
| 1413 | // [IEEE Std 754-2008 6.1]: |
| 1414 | // The behavior of infinity in floating-point arithmetic is derived from the |
| 1415 | // limiting cases of real arithmetic with operands of arbitrarily |
| 1416 | // large magnitude, when such a limit exists. |
| 1417 | // ... |
| 1418 | // Operations on infinite operands are usually exact and therefore signal no |
| 1419 | // exceptions ... |
| 1420 | Category::Infinity => Status::OK.and(self), |
| 1421 | |
| 1422 | // [IEEE Std 754-2008 6.3]: |
| 1423 | // ... the sign of the result of conversions, the quantize operation, the |
| 1424 | // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is |
| 1425 | // the sign of the first or only operand. |
| 1426 | Category::Zero => Status::OK.and(self), |
| 1427 | |
| 1428 | Category::Normal => { |
| 1429 | // If the exponent is large enough, we know that this value is already |
| 1430 | // integral, and the arithmetic below would potentially cause it to saturate |
| 1431 | // to +/-Inf. Bail out early instead. |
| 1432 | if self.exp + 1 >= S::PRECISION as ExpInt { |
| 1433 | return Status::OK.and(self); |
| 1434 | } |
| 1435 | |
| 1436 | // The algorithm here is quite simple: we add 2^(p-1), where p is the |
| 1437 | // precision of our format, and then subtract it back off again. The choice |
| 1438 | // of rounding modes for the addition/subtraction determines the rounding mode |
| 1439 | // for our integral rounding as well. |
| 1440 | // NOTE: When the input value is negative, we do subtraction followed by |
| 1441 | // addition instead. |
| 1442 | assert!(S::PRECISION <= 128); |
| 1443 | let mut status; |
| 1444 | let magic_const = unpack!(status=, Self::from_u128(1 << (S::PRECISION - 1))); |
| 1445 | assert_eq!(status, Status::OK); |
| 1446 | let magic_const = magic_const.copy_sign(self); |
| 1447 | |
| 1448 | let mut r = self; |
| 1449 | r = unpack!(status=, r.add_r(magic_const, round)); |
| 1450 | |
| 1451 | // Current value and 'MagicConstant' are both integers, so the result of the |
| 1452 | // subtraction is always exact according to Sterbenz' lemma. |
| 1453 | r = r.sub_r(magic_const, round).value; |
| 1454 | |
| 1455 | // Restore the input sign to handle the case of zero result |
| 1456 | // correctly. |
| 1457 | status.and(r.copy_sign(self)) |
| 1458 | } |
| 1459 | } |
| 1460 | } |
| 1461 | |
| 1462 | fn next_up(mut self) -> StatusAnd<Self> { |
| 1463 | // Compute nextUp(x), handling each float category separately. |
| 1464 | match self.category() { |
| 1465 | Category::Infinity => { |
| 1466 | if self.is_negative() { |
| 1467 | // nextUp(-inf) = -largest |
| 1468 | Status::OK.and(-Self::largest()) |
| 1469 | } else { |
| 1470 | // nextUp(+inf) = +inf |
| 1471 | Status::OK.and(self) |
| 1472 | } |
| 1473 | } |
| 1474 | Category::NaN => { |
| 1475 | // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. |
| 1476 | // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not |
| 1477 | // change the payload. |
| 1478 | if self.is_signaling() { |
| 1479 | // For consistency, propagate the sign of the sNaN to the qNaN. |
| 1480 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) |
| 1481 | } else { |
| 1482 | Status::OK.and(self) |
| 1483 | } |
| 1484 | } |
| 1485 | Category::Zero => { |
| 1486 | // nextUp(pm 0) = +smallest |
| 1487 | Status::OK.and(Self::SMALLEST) |
| 1488 | } |
| 1489 | Category::Normal => { |
| 1490 | // nextUp(-smallest) = -0 |
| 1491 | if self.is_smallest() && self.is_negative() { |
| 1492 | return Status::OK.and(-Self::ZERO); |
| 1493 | } |
| 1494 | |
| 1495 | // nextUp(largest) == INFINITY |
| 1496 | if self.is_largest() && !self.is_negative() { |
| 1497 | return Status::OK.and(Self::INFINITY); |
| 1498 | } |
| 1499 | |
| 1500 | // Excluding the integral bit. This allows us to test for binade boundaries. |
| 1501 | let sig_mask = (1 << (S::PRECISION - 1)) - 1; |
| 1502 | |
| 1503 | // nextUp(normal) == normal + inc. |
| 1504 | if self.is_negative() { |
| 1505 | // If we are negative, we need to decrement the significand. |
| 1506 | |
| 1507 | // We only cross a binade boundary that requires adjusting the exponent |
| 1508 | // if: |
| 1509 | // 1. exponent != S::MIN_EXP. This implies we are not in the |
| 1510 | // smallest binade or are dealing with denormals. |
| 1511 | // 2. Our significand excluding the integral bit is all zeros. |
| 1512 | let crossing_binade_boundary = self.exp != S::MIN_EXP && self.sig[0] & sig_mask == 0; |
| 1513 | |
| 1514 | // Decrement the significand. |
| 1515 | // |
| 1516 | // We always do this since: |
| 1517 | // 1. If we are dealing with a non-binade decrement, by definition we |
| 1518 | // just decrement the significand. |
| 1519 | // 2. If we are dealing with a normal -> normal binade decrement, since |
| 1520 | // we have an explicit integral bit the fact that all bits but the |
| 1521 | // integral bit are zero implies that subtracting one will yield a |
| 1522 | // significand with 0 integral bit and 1 in all other spots. Thus we |
| 1523 | // must just adjust the exponent and set the integral bit to 1. |
| 1524 | // 3. If we are dealing with a normal -> denormal binade decrement, |
| 1525 | // since we set the integral bit to 0 when we represent denormals, we |
| 1526 | // just decrement the significand. |
| 1527 | sig::decrement(&mut self.sig); |
| 1528 | |
| 1529 | if crossing_binade_boundary { |
| 1530 | // Our result is a normal number. Do the following: |
| 1531 | // 1. Set the integral bit to 1. |
| 1532 | // 2. Decrement the exponent. |
| 1533 | sig::set_bit(&mut self.sig, S::PRECISION - 1); |
| 1534 | self.exp -= 1; |
| 1535 | } |
| 1536 | } else { |
| 1537 | // If we are positive, we need to increment the significand. |
| 1538 | |
| 1539 | // We only cross a binade boundary that requires adjusting the exponent if |
| 1540 | // the input is not a denormal and all of said input's significand bits |
| 1541 | // are set. If all of said conditions are true: clear the significand, set |
| 1542 | // the integral bit to 1, and increment the exponent. If we have a |
| 1543 | // denormal always increment since moving denormals and the numbers in the |
| 1544 | // smallest normal binade have the same exponent in our representation. |
| 1545 | let crossing_binade_boundary = !self.is_denormal() && self.sig[0] & sig_mask == sig_mask; |
| 1546 | |
| 1547 | if crossing_binade_boundary { |
| 1548 | self.sig = [0]; |
| 1549 | sig::set_bit(&mut self.sig, S::PRECISION - 1); |
| 1550 | assert_ne!( |
| 1551 | self.exp, |
| 1552 | S::MAX_EXP, |
| 1553 | "We can not increment an exponent beyond the MAX_EXP \ |
| 1554 | allowed by the given floating point semantics." |
| 1555 | ); |
| 1556 | self.exp += 1; |
| 1557 | } else { |
| 1558 | sig::increment(&mut self.sig); |
| 1559 | } |
| 1560 | } |
| 1561 | Status::OK.and(self) |
| 1562 | } |
| 1563 | } |
| 1564 | } |
| 1565 | |
| 1566 | fn from_bits(input: u128) -> Self { |
| 1567 | // Dispatch to semantics. |
| 1568 | S::from_bits(input) |
| 1569 | } |
| 1570 | |
| 1571 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { |
| 1572 | IeeeFloat { |
| 1573 | sig: [input], |
| 1574 | exp: S::PRECISION as ExpInt - 1, |
| 1575 | read_only_category_do_not_mutate: Category::Normal, |
| 1576 | read_only_sign_do_not_mutate: false, |
| 1577 | marker: PhantomData, |
| 1578 | } |
| 1579 | .normalize(round, Loss::ExactlyZero) |
| 1580 | } |
| 1581 | |
| 1582 | fn from_str_r(s: &str, mut round: Round) -> Result<StatusAnd<Self>, ParseError> { |
| 1583 | if s.is_empty() { |
| 1584 | return Err(ParseError("Invalid string length" )); |
| 1585 | } |
| 1586 | |
| 1587 | // Handle a leading minus sign. |
| 1588 | let (minus, s) = s.strip_prefix("-" ).map(|s| (true, s)).unwrap_or((false, s)); |
| 1589 | let from_abs = |r: Self| r.negate_if(minus); |
| 1590 | |
| 1591 | // Handle a leading plus sign (mutually exclusive with minus). |
| 1592 | let (explicit_plus, s) = s |
| 1593 | .strip_prefix("+" ) |
| 1594 | .filter(|_| !minus) |
| 1595 | .map(|s| (true, s)) |
| 1596 | .unwrap_or((false, s)); |
| 1597 | |
| 1598 | // Handle special cases. |
| 1599 | let special = match s { |
| 1600 | "Inf" if minus || explicit_plus => Some(Self::INFINITY), |
| 1601 | |
| 1602 | "inf" | "INFINITY" if !explicit_plus => Some(Self::INFINITY), |
| 1603 | |
| 1604 | _ if !explicit_plus => { |
| 1605 | // If we have a 's' (or 'S') prefix, then this is a Signaling NaN. |
| 1606 | let (is_signaling, s) = s.strip_prefix(['s' , 'S' ]).map_or((false, s), |s| (true, s)); |
| 1607 | |
| 1608 | s.strip_prefix("nan" ).or_else(|| s.strip_prefix("NaN" )).and_then(|s| { |
| 1609 | // Allow the payload to be inside parentheses. |
| 1610 | let s = s |
| 1611 | .strip_prefix("(" ) |
| 1612 | .and_then(|s| { |
| 1613 | // Parentheses should be balanced (and not empty). |
| 1614 | s.strip_suffix(")" ).filter(|s| !s.is_empty()) |
| 1615 | }) |
| 1616 | .unwrap_or(s); |
| 1617 | |
| 1618 | let payload = if s.is_empty() { |
| 1619 | // A NaN without payload. |
| 1620 | None |
| 1621 | } else { |
| 1622 | // Determine the payload number's radix. |
| 1623 | let (radix, s) = s |
| 1624 | .strip_prefix("0" ) |
| 1625 | .filter(|s| !s.is_empty()) |
| 1626 | .map(|s| s.strip_prefix(['x' , 'X' ]).map(|s| (16, s)).unwrap_or((8, s))) |
| 1627 | .unwrap_or((10, s)); |
| 1628 | |
| 1629 | // Parse the payload and make the NaN. |
| 1630 | Some(u128::from_str_radix(s, radix).ok()?) |
| 1631 | }; |
| 1632 | |
| 1633 | Some(if is_signaling { |
| 1634 | Self::snan(payload) |
| 1635 | } else { |
| 1636 | Self::qnan(payload) |
| 1637 | }) |
| 1638 | }) |
| 1639 | } |
| 1640 | |
| 1641 | _ => None, |
| 1642 | }; |
| 1643 | if let Some(r) = special { |
| 1644 | return Ok(Status::OK.and(from_abs(r))); |
| 1645 | } |
| 1646 | |
| 1647 | if s.is_empty() { |
| 1648 | return Err(ParseError("String has no digits" )); |
| 1649 | } |
| 1650 | |
| 1651 | // Adjust the rounding mode for the absolute value below. |
| 1652 | round = round.negate_if(minus); |
| 1653 | |
| 1654 | let (is_hex, s) = s |
| 1655 | .strip_prefix("0" ) |
| 1656 | .and_then(|s| s.strip_prefix(['x' , 'X' ])) |
| 1657 | .map(|s| (true, s)) |
| 1658 | .unwrap_or((false, s)); |
| 1659 | |
| 1660 | let r = if is_hex { |
| 1661 | if s.is_empty() { |
| 1662 | return Err(ParseError("Invalid string" )); |
| 1663 | } |
| 1664 | Self::from_hexadecimal_string(s, round)? |
| 1665 | } else { |
| 1666 | Self::from_decimal_string(s, round)? |
| 1667 | }; |
| 1668 | |
| 1669 | Ok(r.map(from_abs)) |
| 1670 | } |
| 1671 | |
| 1672 | fn to_bits(self) -> u128 { |
| 1673 | // Dispatch to semantics. |
| 1674 | S::to_bits(self) |
| 1675 | } |
| 1676 | |
| 1677 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { |
| 1678 | // The result of trying to convert a number too large. |
| 1679 | let overflow = if self.is_negative() { |
| 1680 | // Negative numbers cannot be represented as unsigned. |
| 1681 | 0 |
| 1682 | } else { |
| 1683 | // Largest unsigned integer of the given width. |
| 1684 | !0 >> (128 - width) |
| 1685 | }; |
| 1686 | |
| 1687 | *is_exact = false; |
| 1688 | |
| 1689 | match self.category() { |
| 1690 | Category::NaN => Status::INVALID_OP.and(0), |
| 1691 | |
| 1692 | Category::Infinity => Status::INVALID_OP.and(overflow), |
| 1693 | |
| 1694 | Category::Zero => { |
| 1695 | // Negative zero can't be represented as an int. |
| 1696 | *is_exact = !self.is_negative(); |
| 1697 | Status::OK.and(0) |
| 1698 | } |
| 1699 | |
| 1700 | Category::Normal => { |
| 1701 | let mut r = 0; |
| 1702 | |
| 1703 | // Step 1: place our absolute value, with any fraction truncated, in |
| 1704 | // the destination. |
| 1705 | let truncated_bits = if self.exp < 0 { |
| 1706 | // Our absolute value is less than one; truncate everything. |
| 1707 | // For exponent -1 the integer bit represents .5, look at that. |
| 1708 | // For smaller exponents leftmost truncated bit is 0. |
| 1709 | S::PRECISION - 1 + (-self.exp) as usize |
| 1710 | } else { |
| 1711 | // We want the most significant (exponent + 1) bits; the rest are |
| 1712 | // truncated. |
| 1713 | let bits = self.exp as usize + 1; |
| 1714 | |
| 1715 | // Hopelessly large in magnitude? |
| 1716 | if bits > width { |
| 1717 | return Status::INVALID_OP.and(overflow); |
| 1718 | } |
| 1719 | |
| 1720 | if bits < S::PRECISION { |
| 1721 | // We truncate (S::PRECISION - bits) bits. |
| 1722 | r = self.sig[0] >> (S::PRECISION - bits); |
| 1723 | S::PRECISION - bits |
| 1724 | } else { |
| 1725 | // We want at least as many bits as are available. |
| 1726 | r = self.sig[0] << (bits - S::PRECISION); |
| 1727 | 0 |
| 1728 | } |
| 1729 | }; |
| 1730 | |
| 1731 | // Step 2: work out any lost fraction, and increment the absolute |
| 1732 | // value if we would round away from zero. |
| 1733 | let mut loss = Loss::ExactlyZero; |
| 1734 | if truncated_bits > 0 { |
| 1735 | loss = Loss::through_truncation(&self.sig, truncated_bits); |
| 1736 | if loss != Loss::ExactlyZero && self.round_away_from_zero(round, loss, truncated_bits) { |
| 1737 | r = r.wrapping_add(1); |
| 1738 | if r == 0 { |
| 1739 | return Status::INVALID_OP.and(overflow); // Overflow. |
| 1740 | } |
| 1741 | } |
| 1742 | } |
| 1743 | |
| 1744 | // Step 3: check if we fit in the destination. |
| 1745 | if r > overflow { |
| 1746 | return Status::INVALID_OP.and(overflow); |
| 1747 | } |
| 1748 | |
| 1749 | if loss == Loss::ExactlyZero { |
| 1750 | *is_exact = true; |
| 1751 | Status::OK.and(r) |
| 1752 | } else { |
| 1753 | Status::INEXACT.and(r) |
| 1754 | } |
| 1755 | } |
| 1756 | } |
| 1757 | } |
| 1758 | |
| 1759 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { |
| 1760 | assert!(self.is_finite_non_zero()); |
| 1761 | assert!(rhs.is_finite_non_zero()); |
| 1762 | |
| 1763 | // If exponents are equal, do an unsigned comparison of the significands. |
| 1764 | self.exp.cmp(&rhs.exp).then_with(|| sig::cmp(&self.sig, &rhs.sig)) |
| 1765 | } |
| 1766 | |
| 1767 | fn bitwise_eq(self, rhs: Self) -> bool { |
| 1768 | if self.category() != rhs.category() || self.is_negative() != rhs.is_negative() { |
| 1769 | return false; |
| 1770 | } |
| 1771 | |
| 1772 | if self.category() == Category::Zero || self.category() == Category::Infinity { |
| 1773 | return true; |
| 1774 | } |
| 1775 | |
| 1776 | if self.is_finite_non_zero() && self.exp != rhs.exp { |
| 1777 | return false; |
| 1778 | } |
| 1779 | |
| 1780 | self.sig == rhs.sig |
| 1781 | } |
| 1782 | |
| 1783 | fn is_negative(self) -> bool { |
| 1784 | self.read_only_sign_do_not_mutate |
| 1785 | } |
| 1786 | |
| 1787 | fn is_denormal(self) -> bool { |
| 1788 | self.is_finite_non_zero() && self.exp == S::MIN_EXP && !sig::get_bit(&self.sig, S::PRECISION - 1) |
| 1789 | } |
| 1790 | |
| 1791 | fn is_signaling(self) -> bool { |
| 1792 | // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the |
| 1793 | // first bit of the trailing significand being 0. |
| 1794 | self.is_nan() && self.sig[0] & S::QNAN_SIGNIFICAND != S::QNAN_SIGNIFICAND |
| 1795 | } |
| 1796 | |
| 1797 | fn category(self) -> Category { |
| 1798 | self.read_only_category_do_not_mutate |
| 1799 | } |
| 1800 | |
| 1801 | fn get_exact_inverse(self) -> Option<Self> { |
| 1802 | // Special floats and denormals have no exact inverse. |
| 1803 | if !self.is_finite_non_zero() { |
| 1804 | return None; |
| 1805 | } |
| 1806 | |
| 1807 | // Check that the number is a power of two by making sure that only the |
| 1808 | // integer bit is set in the significand. |
| 1809 | if self.sig != [1 << (S::PRECISION - 1)] { |
| 1810 | return None; |
| 1811 | } |
| 1812 | |
| 1813 | // Get the inverse. |
| 1814 | let mut reciprocal = Self::from_u128(1).value; |
| 1815 | let status; |
| 1816 | reciprocal = unpack!(status=, reciprocal / self); |
| 1817 | if status != Status::OK { |
| 1818 | return None; |
| 1819 | } |
| 1820 | |
| 1821 | // Avoid multiplication with a denormal, it is not safe on all platforms and |
| 1822 | // may be slower than a normal division. |
| 1823 | if reciprocal.is_denormal() { |
| 1824 | return None; |
| 1825 | } |
| 1826 | |
| 1827 | assert!(reciprocal.is_finite_non_zero()); |
| 1828 | assert_eq!(reciprocal.sig, [1 << (S::PRECISION - 1)]); |
| 1829 | |
| 1830 | Some(reciprocal) |
| 1831 | } |
| 1832 | |
| 1833 | fn ilogb(mut self) -> ExpInt { |
| 1834 | if self.is_nan() { |
| 1835 | return IEK_NAN; |
| 1836 | } |
| 1837 | if self.is_zero() { |
| 1838 | return IEK_ZERO; |
| 1839 | } |
| 1840 | if self.is_infinite() { |
| 1841 | return IEK_INF; |
| 1842 | } |
| 1843 | if !self.is_denormal() { |
| 1844 | return self.exp; |
| 1845 | } |
| 1846 | |
| 1847 | let sig_bits = (S::PRECISION - 1) as ExpInt; |
| 1848 | self.exp += sig_bits; |
| 1849 | self = self.normalize(Round::NearestTiesToEven, Loss::ExactlyZero).value; |
| 1850 | self.exp - sig_bits |
| 1851 | } |
| 1852 | |
| 1853 | fn scalbn_r(mut self, exp: ExpInt, round: Round) -> Self { |
| 1854 | // If exp is wildly out-of-scale, simply adding it to self.exp will |
| 1855 | // overflow; clamp it to a safe range before adding, but ensure that the range |
| 1856 | // is large enough that the clamp does not change the result. The range we |
| 1857 | // need to support is the difference between the largest possible exponent and |
| 1858 | // the normalized exponent of half the smallest denormal. |
| 1859 | |
| 1860 | let sig_bits = (S::PRECISION - 1) as i32; |
| 1861 | let max_change = S::MAX_EXP as i32 - (S::MIN_EXP as i32 - sig_bits) + 1; |
| 1862 | |
| 1863 | // Clamp to one past the range ends to let normalize handle overflow. |
| 1864 | let exp_change = cmp::min(cmp::max(exp as i32, -max_change - 1), max_change); |
| 1865 | self.exp = self.exp.saturating_add(exp_change as ExpInt); |
| 1866 | self = self.normalize(round, Loss::ExactlyZero).value; |
| 1867 | if self.is_nan() { |
| 1868 | self = IeeeDefaultExceptionHandling::result_from_nan(self).value; |
| 1869 | } |
| 1870 | self |
| 1871 | } |
| 1872 | |
| 1873 | fn frexp_r(mut self, exp: &mut ExpInt, round: Round) -> Self { |
| 1874 | *exp = self.ilogb(); |
| 1875 | |
| 1876 | // Quiet signalling nans. |
| 1877 | if *exp == IEK_NAN { |
| 1878 | self = IeeeDefaultExceptionHandling::result_from_nan(self).value; |
| 1879 | return self; |
| 1880 | } |
| 1881 | |
| 1882 | if *exp == IEK_INF { |
| 1883 | return self; |
| 1884 | } |
| 1885 | |
| 1886 | // 1 is added because frexp is defined to return a normalized fraction in |
| 1887 | // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0). |
| 1888 | if *exp == IEK_ZERO { |
| 1889 | *exp = 0; |
| 1890 | } else { |
| 1891 | *exp += 1; |
| 1892 | } |
| 1893 | self.scalbn_r(-*exp, round) |
| 1894 | } |
| 1895 | } |
| 1896 | |
| 1897 | impl<S: Semantics, T: Semantics> FloatConvert<IeeeFloat<T>> for IeeeFloat<S> { |
| 1898 | fn convert_r(mut self, round: Round, loses_info: &mut bool) -> StatusAnd<IeeeFloat<T>> { |
| 1899 | // FIXME(eddyb) move this into the return result. |
| 1900 | *loses_info = false; |
| 1901 | |
| 1902 | // x86 has some unusual NaNs which cannot be represented in any other |
| 1903 | // format; note them here. |
| 1904 | fn is_x87_double_extended<S: Semantics>() -> bool { |
| 1905 | S::QNAN_SIGNIFICAND == X87DoubleExtendedS::QNAN_SIGNIFICAND |
| 1906 | } |
| 1907 | let loses_x87_pseudo_nan = is_x87_double_extended::<S>() |
| 1908 | && !is_x87_double_extended::<T>() |
| 1909 | && self.category() == Category::NaN |
| 1910 | && (self.sig[0] & S::QNAN_SIGNIFICAND) != S::QNAN_SIGNIFICAND; |
| 1911 | |
| 1912 | // NOTE(eddyb) this is done early because the target semantics may not |
| 1913 | // actually support encoding the distinction between SNaN and QNaN. |
| 1914 | // |
| 1915 | // Convert of sNaN creates qNaN and raises an exception (invalid op). |
| 1916 | // This also guarantees that a sNaN does not become Inf on a truncation |
| 1917 | // that loses all payload bits. |
| 1918 | let mut status = Status::OK; |
| 1919 | if self.is_nan() { |
| 1920 | self = unpack!(status|=, IeeeDefaultExceptionHandling::result_from_nan(self)); |
| 1921 | } |
| 1922 | |
| 1923 | let Self { mut sig, mut exp, .. } = self; |
| 1924 | |
| 1925 | // If this is a truncation of a denormal number, and the target semantics |
| 1926 | // has larger exponent range than the source semantics (this can happen |
| 1927 | // when truncating from PowerPC double-double to double format), the |
| 1928 | // right shift could lose result mantissa bits. Adjust exponent instead |
| 1929 | // of performing excessive shift. |
| 1930 | // Also do a similar trick in case shifting denormal would produce zero |
| 1931 | // significand as this case isn't handled correctly by normalize. |
| 1932 | let mut shift = T::PRECISION as ExpInt - S::PRECISION as ExpInt; |
| 1933 | if shift < 0 && self.is_finite_non_zero() { |
| 1934 | let omsb = sig::omsb(&sig) as ExpInt; |
| 1935 | let mut exp_change = omsb - S::PRECISION as ExpInt; |
| 1936 | if exp + exp_change < T::MIN_EXP { |
| 1937 | exp_change = T::MIN_EXP - exp; |
| 1938 | } |
| 1939 | if exp_change < shift { |
| 1940 | exp_change = shift; |
| 1941 | } |
| 1942 | if exp_change < 0 { |
| 1943 | shift -= exp_change; |
| 1944 | exp += exp_change; |
| 1945 | } else if omsb <= -shift { |
| 1946 | exp_change = omsb + shift - 1; // leave at least one bit set |
| 1947 | shift -= exp_change; |
| 1948 | exp += exp_change; |
| 1949 | } |
| 1950 | } |
| 1951 | |
| 1952 | // If this is a truncation, perform the shift. |
| 1953 | let mut loss = Loss::ExactlyZero; |
| 1954 | if shift < 0 && (self.is_finite_non_zero() || self.category() == Category::NaN && S::NAN_PAYLOAD_MASK != 0) { |
| 1955 | loss = sig::shift_right(&mut sig, &mut 0, -shift as usize); |
| 1956 | } |
| 1957 | |
| 1958 | // If this is an extension, perform the shift. |
| 1959 | if shift > 0 && (self.is_finite_non_zero() || self.category() == Category::NaN) { |
| 1960 | sig::shift_left(&mut sig, &mut 0, shift as usize); |
| 1961 | } |
| 1962 | |
| 1963 | let r = match self.category() { |
| 1964 | Category::Normal => { |
| 1965 | let r = IeeeFloat::<T> { |
| 1966 | sig, |
| 1967 | exp, |
| 1968 | read_only_category_do_not_mutate: self.category(), |
| 1969 | read_only_sign_do_not_mutate: self.is_negative(), |
| 1970 | marker: PhantomData, |
| 1971 | }; |
| 1972 | unpack!(status|=, r.normalize(round, loss)) |
| 1973 | } |
| 1974 | Category::NaN => { |
| 1975 | *loses_info = loss != Loss::ExactlyZero |
| 1976 | || loses_x87_pseudo_nan |
| 1977 | || S::NAN_PAYLOAD_MASK != 0 && T::NAN_PAYLOAD_MASK == 0; |
| 1978 | |
| 1979 | IeeeFloat::<T>::qnan(Some(sig[0])).with_sign(self.is_negative()) |
| 1980 | } |
| 1981 | Category::Infinity => IeeeFloat::<T>::INFINITY.with_sign(self.is_negative()), |
| 1982 | Category::Zero => IeeeFloat::<T>::ZERO.with_sign(self.is_negative()), |
| 1983 | }; |
| 1984 | |
| 1985 | // NOTE(eddyb) this catches all cases of e.g. ±Inf turning into NaN, |
| 1986 | // because of `T::NONFINITE_BEHAVIOR` not being `IEEE754`. |
| 1987 | if matches!(self.category(), Category::Infinity | Category::Zero) |
| 1988 | && (r.category() != self.category() || r.is_negative() != self.is_negative()) |
| 1989 | { |
| 1990 | status |= Status::INEXACT; |
| 1991 | } |
| 1992 | |
| 1993 | *loses_info |= (status - Status::INVALID_OP) != Status::OK; |
| 1994 | |
| 1995 | status.and(r) |
| 1996 | } |
| 1997 | } |
| 1998 | |
| 1999 | impl<S: Semantics> IeeeFloat<S> { |
| 2000 | /// Handle positive overflow. We either return infinity or |
| 2001 | /// the largest finite number. For negative overflow, |
| 2002 | /// negate the `round` argument before calling. |
| 2003 | fn overflow_result(round: Round) -> StatusAnd<Self> { |
| 2004 | match round { |
| 2005 | // Infinity? |
| 2006 | Round::NearestTiesToEven | Round::NearestTiesToAway | Round::TowardPositive => { |
| 2007 | (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY) |
| 2008 | } |
| 2009 | // Otherwise we become the largest finite number. |
| 2010 | Round::TowardNegative | Round::TowardZero => Status::INEXACT.and(Self::largest()), |
| 2011 | } |
| 2012 | } |
| 2013 | |
| 2014 | /// Returns TRUE if, when truncating the current number, with BIT the |
| 2015 | /// new LSB, with the given lost fraction and rounding mode, the result |
| 2016 | /// would need to be rounded away from zero (i.e., by increasing the |
| 2017 | /// signficand). This routine must work for Category::Zero of both signs, and |
| 2018 | /// Category::Normal numbers. |
| 2019 | fn round_away_from_zero(&self, round: Round, loss: Loss, bit: usize) -> bool { |
| 2020 | // NaNs and infinities should not have lost fractions. |
| 2021 | assert!(self.is_finite_non_zero() || self.is_zero()); |
| 2022 | |
| 2023 | // Current callers never pass this so we don't handle it. |
| 2024 | assert_ne!(loss, Loss::ExactlyZero); |
| 2025 | |
| 2026 | match round { |
| 2027 | Round::NearestTiesToAway => loss == Loss::ExactlyHalf || loss == Loss::MoreThanHalf, |
| 2028 | Round::NearestTiesToEven => { |
| 2029 | if loss == Loss::MoreThanHalf { |
| 2030 | return true; |
| 2031 | } |
| 2032 | |
| 2033 | // Our zeros don't have a significand to test. |
| 2034 | if loss == Loss::ExactlyHalf && self.category() != Category::Zero { |
| 2035 | return sig::get_bit(&self.sig, bit); |
| 2036 | } |
| 2037 | |
| 2038 | false |
| 2039 | } |
| 2040 | Round::TowardZero => false, |
| 2041 | Round::TowardPositive => !self.is_negative(), |
| 2042 | Round::TowardNegative => self.is_negative(), |
| 2043 | } |
| 2044 | } |
| 2045 | |
| 2046 | fn normalize(mut self, round: Round, mut loss: Loss) -> StatusAnd<Self> { |
| 2047 | if !self.is_finite_non_zero() { |
| 2048 | return Status::OK.and(self); |
| 2049 | } |
| 2050 | |
| 2051 | // Before rounding normalize the exponent of Category::Normal numbers. |
| 2052 | let mut omsb = sig::omsb(&self.sig); |
| 2053 | |
| 2054 | if omsb > 0 { |
| 2055 | // OMSB is numbered from 1. We want to place it in the integer |
| 2056 | // bit numbered PRECISION if possible, with a compensating change in |
| 2057 | // the exponent. |
| 2058 | let mut final_exp = self.exp.saturating_add(omsb as ExpInt - S::PRECISION as ExpInt); |
| 2059 | |
| 2060 | // If the resulting exponent is too high, overflow according to |
| 2061 | // the rounding mode. |
| 2062 | if final_exp > S::MAX_EXP { |
| 2063 | let round = round.negate_if(self.is_negative()); |
| 2064 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
| 2065 | } |
| 2066 | |
| 2067 | // Subnormal numbers have exponent MIN_EXP, and their MSB |
| 2068 | // is forced based on that. |
| 2069 | if final_exp < S::MIN_EXP { |
| 2070 | final_exp = S::MIN_EXP; |
| 2071 | } |
| 2072 | |
| 2073 | // Shifting left is easy as we don't lose precision. |
| 2074 | if final_exp < self.exp { |
| 2075 | assert_eq!(loss, Loss::ExactlyZero); |
| 2076 | |
| 2077 | let exp_change = (self.exp - final_exp) as usize; |
| 2078 | sig::shift_left(&mut self.sig, &mut self.exp, exp_change); |
| 2079 | |
| 2080 | return Status::OK.and(self); |
| 2081 | } |
| 2082 | |
| 2083 | // Shift right and capture any new lost fraction. |
| 2084 | if final_exp > self.exp { |
| 2085 | let exp_change = (final_exp - self.exp) as usize; |
| 2086 | loss = sig::shift_right(&mut self.sig, &mut self.exp, exp_change).combine(loss); |
| 2087 | |
| 2088 | // Keep OMSB up-to-date. |
| 2089 | omsb = omsb.saturating_sub(exp_change); |
| 2090 | } |
| 2091 | } |
| 2092 | |
| 2093 | // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up |
| 2094 | // the largest significand of `MAX_EXP` (which also has normals), though |
| 2095 | // comparing significands needs to ignore the integer bit `NAN` lacks. |
| 2096 | if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly |
| 2097 | && self.exp == Self::NAN.exp |
| 2098 | && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig |
| 2099 | { |
| 2100 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
| 2101 | } |
| 2102 | |
| 2103 | // Now round the number according to round given the lost |
| 2104 | // fraction. |
| 2105 | |
| 2106 | // As specified in IEEE 754, since we do not trap we do not report |
| 2107 | // underflow for exact results. |
| 2108 | if loss == Loss::ExactlyZero { |
| 2109 | // Canonicalize zeros. |
| 2110 | if omsb == 0 { |
| 2111 | self = Self::ZERO.copy_sign(self); |
| 2112 | } |
| 2113 | |
| 2114 | return Status::OK.and(self); |
| 2115 | } |
| 2116 | |
| 2117 | // Increment the significand if we're rounding away from zero. |
| 2118 | if self.round_away_from_zero(round, loss, 0) { |
| 2119 | if omsb == 0 { |
| 2120 | self.exp = S::MIN_EXP; |
| 2121 | } |
| 2122 | |
| 2123 | // We should never overflow. |
| 2124 | assert_eq!(sig::increment(&mut self.sig), 0); |
| 2125 | omsb = sig::omsb(&self.sig); |
| 2126 | |
| 2127 | // Did the significand increment overflow? |
| 2128 | if omsb == S::PRECISION + 1 { |
| 2129 | // Renormalize by incrementing the exponent and shifting our |
| 2130 | // significand right one. However if we already have the |
| 2131 | // maximum exponent we overflow to infinity. |
| 2132 | if self.exp == S::MAX_EXP { |
| 2133 | return (Status::OVERFLOW | Status::INEXACT).and(Self::INFINITY.copy_sign(self)); |
| 2134 | } |
| 2135 | |
| 2136 | let _: Loss = sig::shift_right(&mut self.sig, &mut self.exp, 1); |
| 2137 | |
| 2138 | return Status::INEXACT.and(self); |
| 2139 | } |
| 2140 | |
| 2141 | // NOTE(eddyb) for `NonfiniteBehavior::NanOnly`, the unique `NAN` takes up |
| 2142 | // the largest significand of `MAX_EXP` (which also has normals), though |
| 2143 | // comparing significands needs to ignore the integer bit `NAN` lacks. |
| 2144 | if S::NONFINITE_BEHAVIOR == NonfiniteBehavior::NanOnly |
| 2145 | && self.exp == Self::NAN.exp |
| 2146 | && [self.sig[0] & S::NAN_SIGNIFICAND_BASE] == Self::NAN.sig |
| 2147 | { |
| 2148 | return Self::overflow_result(round).map(|r| r.copy_sign(self)); |
| 2149 | } |
| 2150 | } |
| 2151 | |
| 2152 | // The normal case - we were and are not denormal, and any |
| 2153 | // significand increment above didn't overflow. |
| 2154 | if omsb == S::PRECISION { |
| 2155 | return Status::INEXACT.and(self); |
| 2156 | } |
| 2157 | |
| 2158 | // We have a non-zero denormal. |
| 2159 | assert!(omsb < S::PRECISION); |
| 2160 | |
| 2161 | // Canonicalize zeros. |
| 2162 | if omsb == 0 { |
| 2163 | self = Self::ZERO.copy_sign(self); |
| 2164 | } |
| 2165 | |
| 2166 | // The Category::Zero case is a denormal that underflowed to zero. |
| 2167 | (Status::UNDERFLOW | Status::INEXACT).and(self) |
| 2168 | } |
| 2169 | |
| 2170 | fn from_hexadecimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
| 2171 | let mut r = IeeeFloat { |
| 2172 | sig: [0], |
| 2173 | exp: 0, |
| 2174 | read_only_category_do_not_mutate: Category::Normal, |
| 2175 | read_only_sign_do_not_mutate: false, |
| 2176 | marker: PhantomData, |
| 2177 | }; |
| 2178 | |
| 2179 | let mut any_digits = false; |
| 2180 | let mut has_exp = false; |
| 2181 | let mut bit_pos = LIMB_BITS as isize; |
| 2182 | let mut loss = None; |
| 2183 | |
| 2184 | // Without leading or trailing zeros, irrespective of the dot. |
| 2185 | let mut first_sig_digit = None; |
| 2186 | let mut dot = s.len(); |
| 2187 | |
| 2188 | for (p, c) in s.char_indices() { |
| 2189 | // Skip leading zeros and any (hexa)decimal point. |
| 2190 | if c == '.' { |
| 2191 | if dot != s.len() { |
| 2192 | return Err(ParseError("String contains multiple dots" )); |
| 2193 | } |
| 2194 | dot = p; |
| 2195 | } else if let Some(hex_value) = c.to_digit(16) { |
| 2196 | any_digits = true; |
| 2197 | |
| 2198 | if first_sig_digit.is_none() { |
| 2199 | if hex_value == 0 { |
| 2200 | continue; |
| 2201 | } |
| 2202 | first_sig_digit = Some(p); |
| 2203 | } |
| 2204 | |
| 2205 | // Store the number while we have space. |
| 2206 | bit_pos -= 4; |
| 2207 | if bit_pos >= 0 { |
| 2208 | r.sig[0] |= (hex_value as Limb) << bit_pos; |
| 2209 | } else { |
| 2210 | // If zero or one-half (the hexadecimal digit 8) are followed |
| 2211 | // by non-zero, they're a little more than zero or one-half. |
| 2212 | if let Some(ref mut loss) = loss { |
| 2213 | if hex_value != 0 { |
| 2214 | if *loss == Loss::ExactlyZero { |
| 2215 | *loss = Loss::LessThanHalf; |
| 2216 | } |
| 2217 | if *loss == Loss::ExactlyHalf { |
| 2218 | *loss = Loss::MoreThanHalf; |
| 2219 | } |
| 2220 | } |
| 2221 | } else { |
| 2222 | loss = Some(match hex_value { |
| 2223 | 0 => Loss::ExactlyZero, |
| 2224 | 1..=7 => Loss::LessThanHalf, |
| 2225 | 8 => Loss::ExactlyHalf, |
| 2226 | 9..=15 => Loss::MoreThanHalf, |
| 2227 | _ => unreachable!(), |
| 2228 | }); |
| 2229 | } |
| 2230 | } |
| 2231 | } else if c == 'p' || c == 'P' { |
| 2232 | if !any_digits { |
| 2233 | return Err(ParseError("Significand has no digits" )); |
| 2234 | } |
| 2235 | |
| 2236 | if dot == s.len() { |
| 2237 | dot = p; |
| 2238 | } |
| 2239 | |
| 2240 | let mut chars = s[p + 1..].chars().peekable(); |
| 2241 | |
| 2242 | // Adjust for the given exponent. |
| 2243 | let exp_minus = chars.peek() == Some(&'-' ); |
| 2244 | if exp_minus || chars.peek() == Some(&'+' ) { |
| 2245 | chars.next(); |
| 2246 | } |
| 2247 | |
| 2248 | for c in chars { |
| 2249 | if let Some(value) = c.to_digit(10) { |
| 2250 | has_exp = true; |
| 2251 | r.exp = r.exp.saturating_mul(10).saturating_add(value as ExpInt); |
| 2252 | } else { |
| 2253 | return Err(ParseError("Invalid character in exponent" )); |
| 2254 | } |
| 2255 | } |
| 2256 | if !has_exp { |
| 2257 | return Err(ParseError("Exponent has no digits" )); |
| 2258 | } |
| 2259 | |
| 2260 | r.exp = r.exp.negate_if(exp_minus); |
| 2261 | |
| 2262 | break; |
| 2263 | } else { |
| 2264 | return Err(ParseError("Invalid character in significand" )); |
| 2265 | } |
| 2266 | } |
| 2267 | if !any_digits { |
| 2268 | return Err(ParseError("Significand has no digits" )); |
| 2269 | } |
| 2270 | |
| 2271 | // Hex floats require an exponent but not a hexadecimal point. |
| 2272 | if !has_exp { |
| 2273 | return Err(ParseError("Hex strings require an exponent" )); |
| 2274 | } |
| 2275 | |
| 2276 | // Ignore the exponent if we are zero. |
| 2277 | let first_sig_digit = match first_sig_digit { |
| 2278 | Some(p) => p, |
| 2279 | None => return Ok(Status::OK.and(Self::ZERO)), |
| 2280 | }; |
| 2281 | |
| 2282 | // Calculate the exponent adjustment implicit in the number of |
| 2283 | // significant digits and adjust for writing the significand starting |
| 2284 | // at the most significant nibble. |
| 2285 | let exp_adjustment = if dot > first_sig_digit { |
| 2286 | ExpInt::try_from(dot - first_sig_digit).unwrap() |
| 2287 | } else { |
| 2288 | -ExpInt::try_from(first_sig_digit - dot - 1).unwrap() |
| 2289 | }; |
| 2290 | let exp_adjustment = exp_adjustment |
| 2291 | .saturating_mul(4) |
| 2292 | .saturating_sub(1) |
| 2293 | .saturating_add(S::PRECISION as ExpInt) |
| 2294 | .saturating_sub(LIMB_BITS as ExpInt); |
| 2295 | r.exp = r.exp.saturating_add(exp_adjustment); |
| 2296 | |
| 2297 | Ok(r.normalize(round, loss.unwrap_or(Loss::ExactlyZero))) |
| 2298 | } |
| 2299 | |
| 2300 | fn from_decimal_string(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
| 2301 | // Given a normal decimal floating point number of the form |
| 2302 | // |
| 2303 | // dddd.dddd[eE][+-]ddd |
| 2304 | // |
| 2305 | // where the decimal point and exponent are optional, fill out the |
| 2306 | // variables below. Exponent is appropriate if the significand is |
| 2307 | // treated as an integer, and normalized_exp if the significand |
| 2308 | // is taken to have the decimal point after a single leading |
| 2309 | // non-zero digit. |
| 2310 | // |
| 2311 | // If the value is zero, first_sig_digit is None. |
| 2312 | |
| 2313 | let mut any_digits = false; |
| 2314 | let mut dec_exp = 0i32; |
| 2315 | |
| 2316 | // Without leading or trailing zeros, irrespective of the dot. |
| 2317 | let mut first_sig_digit = None; |
| 2318 | let mut last_sig_digit = 0; |
| 2319 | let mut dot = s.len(); |
| 2320 | |
| 2321 | for (p, c) in s.char_indices() { |
| 2322 | if c == '.' { |
| 2323 | if dot != s.len() { |
| 2324 | return Err(ParseError("String contains multiple dots" )); |
| 2325 | } |
| 2326 | dot = p; |
| 2327 | } else if let Some(dec_value) = c.to_digit(10) { |
| 2328 | any_digits = true; |
| 2329 | |
| 2330 | if dec_value != 0 { |
| 2331 | if first_sig_digit.is_none() { |
| 2332 | first_sig_digit = Some(p); |
| 2333 | } |
| 2334 | last_sig_digit = p; |
| 2335 | } |
| 2336 | } else if c == 'e' || c == 'E' { |
| 2337 | if !any_digits { |
| 2338 | return Err(ParseError("Significand has no digits" )); |
| 2339 | } |
| 2340 | |
| 2341 | if dot == s.len() { |
| 2342 | dot = p; |
| 2343 | } |
| 2344 | |
| 2345 | let mut chars = s[p + 1..].chars().peekable(); |
| 2346 | |
| 2347 | // Adjust for the given exponent. |
| 2348 | let exp_minus = chars.peek() == Some(&'-' ); |
| 2349 | if exp_minus || chars.peek() == Some(&'+' ) { |
| 2350 | chars.next(); |
| 2351 | } |
| 2352 | |
| 2353 | let mut any_exp_digits = false; |
| 2354 | for c in chars { |
| 2355 | if let Some(value) = c.to_digit(10) { |
| 2356 | any_exp_digits = true; |
| 2357 | dec_exp = dec_exp.saturating_mul(10).saturating_add(value as i32); |
| 2358 | } else { |
| 2359 | return Err(ParseError("Invalid character in exponent" )); |
| 2360 | } |
| 2361 | } |
| 2362 | // Treat no exponent as 0 to match binutils |
| 2363 | if !any_exp_digits { |
| 2364 | assert_eq!(dec_exp, 0); |
| 2365 | } |
| 2366 | |
| 2367 | dec_exp = dec_exp.negate_if(exp_minus); |
| 2368 | |
| 2369 | break; |
| 2370 | } else { |
| 2371 | return Err(ParseError("Invalid character in significand" )); |
| 2372 | } |
| 2373 | } |
| 2374 | if !any_digits { |
| 2375 | return Err(ParseError("Significand has no digits" )); |
| 2376 | } |
| 2377 | |
| 2378 | // Test if we have a zero number allowing for non-zero exponents. |
| 2379 | let first_sig_digit = match first_sig_digit { |
| 2380 | Some(p) => p, |
| 2381 | None => return Ok(Status::OK.and(Self::ZERO)), |
| 2382 | }; |
| 2383 | |
| 2384 | // Adjust the exponents for any decimal point. |
| 2385 | if dot > last_sig_digit { |
| 2386 | dec_exp = dec_exp.saturating_add((dot - last_sig_digit - 1) as i32); |
| 2387 | } else { |
| 2388 | dec_exp = dec_exp.saturating_sub((last_sig_digit - dot) as i32); |
| 2389 | } |
| 2390 | let significand_digits = |
| 2391 | last_sig_digit - first_sig_digit + 1 - (dot > first_sig_digit && dot < last_sig_digit) as usize; |
| 2392 | let normalized_exp = dec_exp.saturating_add(significand_digits as i32 - 1); |
| 2393 | |
| 2394 | // Handle the cases where exponents are obviously too large or too |
| 2395 | // small. Writing L for log 10 / log 2, a number d.ddddd*10^dec_exp |
| 2396 | // definitely overflows if |
| 2397 | // |
| 2398 | // (dec_exp - 1) * L >= MAX_EXP |
| 2399 | // |
| 2400 | // and definitely underflows to zero where |
| 2401 | // |
| 2402 | // (dec_exp + 1) * L <= MIN_EXP - PRECISION |
| 2403 | // |
| 2404 | // With integer arithmetic the tightest bounds for L are |
| 2405 | // |
| 2406 | // 93/28 < L < 196/59 [ numerator <= 256 ] |
| 2407 | // 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] |
| 2408 | |
| 2409 | // Check for MAX_EXP. |
| 2410 | if normalized_exp.saturating_sub(1).saturating_mul(42039) >= 12655 * S::MAX_EXP as i32 { |
| 2411 | // Overflow and round. |
| 2412 | return Ok(Self::overflow_result(round)); |
| 2413 | } |
| 2414 | |
| 2415 | // Check for MIN_EXP. |
| 2416 | if normalized_exp.saturating_add(1).saturating_mul(28738) <= 8651 * (S::MIN_EXP as i32 - S::PRECISION as i32) { |
| 2417 | // Underflow to zero and round. |
| 2418 | let r = if round == Round::TowardPositive { |
| 2419 | IeeeFloat::SMALLEST |
| 2420 | } else { |
| 2421 | IeeeFloat::ZERO |
| 2422 | }; |
| 2423 | return Ok((Status::UNDERFLOW | Status::INEXACT).and(r)); |
| 2424 | } |
| 2425 | |
| 2426 | // A tight upper bound on number of bits required to hold an |
| 2427 | // N-digit decimal integer is N * 196 / 59. Allocate enough space |
| 2428 | // to hold the full significand, and an extra limb required by |
| 2429 | // tcMultiplyPart. |
| 2430 | let max_limbs = limbs_for_bits(1 + 196 * significand_digits / 59); |
| 2431 | let mut dec_sig = DynPrecisionLimbVec::with_capacity(max_limbs); |
| 2432 | |
| 2433 | // Convert to binary efficiently - we do almost all multiplication |
| 2434 | // in a Limb. When this would overflow do we do a single |
| 2435 | // bignum multiplication, and then revert again to multiplication |
| 2436 | // in a Limb. |
| 2437 | let mut chars = s[first_sig_digit..last_sig_digit + 1].chars(); |
| 2438 | loop { |
| 2439 | let mut val = 0; |
| 2440 | let mut multiplier = 1; |
| 2441 | |
| 2442 | loop { |
| 2443 | let dec_value = match chars.next() { |
| 2444 | Some('.' ) => continue, |
| 2445 | Some(c) => c.to_digit(10).unwrap(), |
| 2446 | None => break, |
| 2447 | }; |
| 2448 | |
| 2449 | multiplier *= 10; |
| 2450 | val = val * 10 + dec_value as Limb; |
| 2451 | |
| 2452 | // The maximum number that can be multiplied by ten with any |
| 2453 | // digit added without overflowing a Limb. |
| 2454 | if multiplier > (!0 - 9) / 10 { |
| 2455 | break; |
| 2456 | } |
| 2457 | } |
| 2458 | |
| 2459 | // If we've consumed no digits, we're done. |
| 2460 | if multiplier == 1 { |
| 2461 | break; |
| 2462 | } |
| 2463 | |
| 2464 | // Multiply out the current limb. |
| 2465 | let mut carry = val; |
| 2466 | for x in &mut dec_sig { |
| 2467 | let [low, mut high] = sig::widening_mul(*x, multiplier); |
| 2468 | |
| 2469 | // Now add carry. |
| 2470 | let (low, overflow) = low.overflowing_add(carry); |
| 2471 | high += overflow as Limb; |
| 2472 | |
| 2473 | *x = low; |
| 2474 | carry = high; |
| 2475 | } |
| 2476 | |
| 2477 | // If we had carry, we need another limb (likely but not guaranteed). |
| 2478 | if carry > 0 { |
| 2479 | dec_sig.push(carry); |
| 2480 | } |
| 2481 | } |
| 2482 | |
| 2483 | // Calculate pow(5, abs(dec_exp)) into `pow5_full`. |
| 2484 | // The *_calc Vec's are reused scratch space, as an optimization. |
| 2485 | let (pow5_full, mut pow5_calc, mut sig_calc, mut sig_scratch_calc) = { |
| 2486 | let mut power = dec_exp.abs() as usize; |
| 2487 | |
| 2488 | const FIRST_EIGHT_POWERS: [Limb; 8] = [1, 5, 25, 125, 625, 3125, 15625, 78125]; |
| 2489 | |
| 2490 | let mut p5_scratch = DynPrecisionLimbVec::new(); |
| 2491 | let mut p5: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[4]].into_iter().collect(); |
| 2492 | |
| 2493 | let mut r_scratch = DynPrecisionLimbVec::new(); |
| 2494 | let mut r: DynPrecisionLimbVec = [FIRST_EIGHT_POWERS[power & 7]].into_iter().collect(); |
| 2495 | power >>= 3; |
| 2496 | |
| 2497 | while power > 0 { |
| 2498 | // Calculate pow(5,pow(2,n+3)). |
| 2499 | p5_scratch.resize(p5.len() * 2, 0); |
| 2500 | let _: Loss = sig::mul(&mut p5_scratch, &mut 0, &p5, &p5, p5.len() * 2 * LIMB_BITS); |
| 2501 | while p5_scratch.last() == Some(&0) { |
| 2502 | p5_scratch.pop(); |
| 2503 | } |
| 2504 | mem::swap(&mut p5, &mut p5_scratch); |
| 2505 | |
| 2506 | if power & 1 != 0 { |
| 2507 | r_scratch.resize(r.len() + p5.len(), 0); |
| 2508 | let _: Loss = sig::mul(&mut r_scratch, &mut 0, &r, &p5, (r.len() + p5.len()) * LIMB_BITS); |
| 2509 | while r_scratch.last() == Some(&0) { |
| 2510 | r_scratch.pop(); |
| 2511 | } |
| 2512 | mem::swap(&mut r, &mut r_scratch); |
| 2513 | } |
| 2514 | |
| 2515 | power >>= 1; |
| 2516 | } |
| 2517 | |
| 2518 | (r, r_scratch, p5, p5_scratch) |
| 2519 | }; |
| 2520 | |
| 2521 | // Attempt dec_sig * 10^dec_exp with increasing precision. |
| 2522 | let mut attempt = 0; |
| 2523 | loop { |
| 2524 | let calc_precision = (LIMB_BITS << attempt) - 1; |
| 2525 | attempt += 1; |
| 2526 | |
| 2527 | let calc_normal_from_limbs = |sig: &mut DynPrecisionLimbVec, limbs: &[Limb]| -> StatusAnd<ExpInt> { |
| 2528 | sig.resize(limbs_for_bits(calc_precision), 0); |
| 2529 | let (mut loss, mut exp) = sig::from_limbs(sig, limbs, calc_precision); |
| 2530 | |
| 2531 | // Before rounding normalize the exponent of Category::Normal numbers. |
| 2532 | let mut omsb = sig::omsb(sig); |
| 2533 | |
| 2534 | assert_ne!(omsb, 0); |
| 2535 | |
| 2536 | // OMSB is numbered from 1. We want to place it in the integer |
| 2537 | // bit numbered PRECISION if possible, with a compensating change in |
| 2538 | // the exponent. |
| 2539 | let final_exp = exp.saturating_add(omsb as ExpInt - calc_precision as ExpInt); |
| 2540 | |
| 2541 | // Shifting left is easy as we don't lose precision. |
| 2542 | if final_exp < exp { |
| 2543 | assert_eq!(loss, Loss::ExactlyZero); |
| 2544 | |
| 2545 | let exp_change = (exp - final_exp) as usize; |
| 2546 | sig::shift_left(sig, &mut exp, exp_change); |
| 2547 | |
| 2548 | return Status::OK.and(exp); |
| 2549 | } |
| 2550 | |
| 2551 | // Shift right and capture any new lost fraction. |
| 2552 | if final_exp > exp { |
| 2553 | let exp_change = (final_exp - exp) as usize; |
| 2554 | loss = sig::shift_right(sig, &mut exp, exp_change).combine(loss); |
| 2555 | |
| 2556 | // Keep OMSB up-to-date. |
| 2557 | omsb = omsb.saturating_sub(exp_change); |
| 2558 | } |
| 2559 | |
| 2560 | assert_eq!(omsb, calc_precision); |
| 2561 | |
| 2562 | // Now round the number according to round given the lost |
| 2563 | // fraction. |
| 2564 | |
| 2565 | // As specified in IEEE 754, since we do not trap we do not report |
| 2566 | // underflow for exact results. |
| 2567 | if loss == Loss::ExactlyZero { |
| 2568 | return Status::OK.and(exp); |
| 2569 | } |
| 2570 | |
| 2571 | // Increment the significand if we're rounding away from zero. |
| 2572 | if loss == Loss::MoreThanHalf || loss == Loss::ExactlyHalf && sig::get_bit(sig, 0) { |
| 2573 | // We should never overflow. |
| 2574 | assert_eq!(sig::increment(sig), 0); |
| 2575 | omsb = sig::omsb(sig); |
| 2576 | |
| 2577 | // Did the significand increment overflow? |
| 2578 | if omsb == calc_precision + 1 { |
| 2579 | let _: Loss = sig::shift_right(sig, &mut exp, 1); |
| 2580 | |
| 2581 | return Status::INEXACT.and(exp); |
| 2582 | } |
| 2583 | } |
| 2584 | |
| 2585 | // The normal case - we were and are not denormal, and any |
| 2586 | // significand increment above didn't overflow. |
| 2587 | Status::INEXACT.and(exp) |
| 2588 | }; |
| 2589 | |
| 2590 | let status; |
| 2591 | let mut exp = unpack!(status=, |
| 2592 | calc_normal_from_limbs(&mut sig_calc, &dec_sig)); |
| 2593 | let pow5_status; |
| 2594 | let pow5_exp = unpack!(pow5_status=, |
| 2595 | calc_normal_from_limbs(&mut pow5_calc, &pow5_full)); |
| 2596 | |
| 2597 | // Add dec_exp, as 10^n = 5^n * 2^n. |
| 2598 | exp += dec_exp as ExpInt; |
| 2599 | |
| 2600 | let mut used_bits = S::PRECISION; |
| 2601 | let mut truncated_bits = calc_precision - used_bits; |
| 2602 | |
| 2603 | let half_ulp_err1 = (status != Status::OK) as Limb; |
| 2604 | let (calc_loss, half_ulp_err2); |
| 2605 | if dec_exp >= 0 { |
| 2606 | exp += pow5_exp; |
| 2607 | |
| 2608 | sig_scratch_calc.resize(sig_calc.len() + pow5_calc.len(), 0); |
| 2609 | calc_loss = sig::mul(&mut sig_scratch_calc, &mut exp, &sig_calc, &pow5_calc, calc_precision); |
| 2610 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); |
| 2611 | |
| 2612 | half_ulp_err2 = (pow5_status != Status::OK) as Limb; |
| 2613 | } else { |
| 2614 | exp -= pow5_exp; |
| 2615 | |
| 2616 | sig_scratch_calc.resize(sig_calc.len(), 0); |
| 2617 | calc_loss = sig::div(&mut sig_scratch_calc, &mut exp, &mut sig_calc, &mut pow5_calc, calc_precision); |
| 2618 | mem::swap(&mut sig_calc, &mut sig_scratch_calc); |
| 2619 | |
| 2620 | // Denormal numbers have less precision. |
| 2621 | if exp < S::MIN_EXP { |
| 2622 | truncated_bits += (S::MIN_EXP - exp) as usize; |
| 2623 | used_bits = calc_precision.saturating_sub(truncated_bits); |
| 2624 | } |
| 2625 | // Extra half-ulp lost in reciprocal of exponent. |
| 2626 | half_ulp_err2 = 2 * (pow5_status != Status::OK || calc_loss != Loss::ExactlyZero) as Limb; |
| 2627 | } |
| 2628 | |
| 2629 | // Both sig::mul and sig::div return the |
| 2630 | // result with the integer bit set. |
| 2631 | assert!(sig::get_bit(&sig_calc, calc_precision - 1)); |
| 2632 | |
| 2633 | // The error from the true value, in half-ulps, on multiplying two |
| 2634 | // floating point numbers, which differ from the value they |
| 2635 | // approximate by at most half_ulp_err1 and half_ulp_err2 half-ulps, is strictly less |
| 2636 | // than the returned value. |
| 2637 | // |
| 2638 | // See "How to Read Floating Point Numbers Accurately" by William D Clinger. |
| 2639 | assert!(half_ulp_err1 < 2 || half_ulp_err2 < 2 || (half_ulp_err1 + half_ulp_err2 < 8)); |
| 2640 | |
| 2641 | let inexact = (calc_loss != Loss::ExactlyZero) as Limb; |
| 2642 | let half_ulp_err = if half_ulp_err1 + half_ulp_err2 == 0 { |
| 2643 | inexact * 2 // <= inexact half-ulps. |
| 2644 | } else { |
| 2645 | inexact + 2 * (half_ulp_err1 + half_ulp_err2) |
| 2646 | }; |
| 2647 | |
| 2648 | let ulps_from_boundary = { |
| 2649 | let bits = calc_precision - used_bits - 1; |
| 2650 | |
| 2651 | let i = bits / LIMB_BITS; |
| 2652 | let limb = sig_calc[i] & (!0 >> (LIMB_BITS - 1 - bits % LIMB_BITS)); |
| 2653 | let boundary = match round { |
| 2654 | Round::NearestTiesToEven | Round::NearestTiesToAway => 1 << (bits % LIMB_BITS), |
| 2655 | _ => 0, |
| 2656 | }; |
| 2657 | if i == 0 { |
| 2658 | let delta = limb.wrapping_sub(boundary); |
| 2659 | cmp::min(delta, delta.wrapping_neg()) |
| 2660 | } else if limb == boundary { |
| 2661 | if !sig::is_all_zeros(&sig_calc[1..i]) { |
| 2662 | !0 // A lot. |
| 2663 | } else { |
| 2664 | sig_calc[0] |
| 2665 | } |
| 2666 | } else if limb == boundary.wrapping_sub(1) { |
| 2667 | if sig_calc[1..i].iter().any(|&x| x.wrapping_neg() != 1) { |
| 2668 | !0 // A lot. |
| 2669 | } else { |
| 2670 | sig_calc[0].wrapping_neg() |
| 2671 | } |
| 2672 | } else { |
| 2673 | !0 // A lot. |
| 2674 | } |
| 2675 | }; |
| 2676 | |
| 2677 | // Are we guaranteed to round correctly if we truncate? |
| 2678 | if ulps_from_boundary.saturating_mul(2) >= half_ulp_err { |
| 2679 | let mut r = IeeeFloat { |
| 2680 | sig: [0], |
| 2681 | exp, |
| 2682 | read_only_category_do_not_mutate: Category::Normal, |
| 2683 | read_only_sign_do_not_mutate: false, |
| 2684 | marker: PhantomData, |
| 2685 | }; |
| 2686 | sig::extract(&mut r.sig, &sig_calc, used_bits, calc_precision - used_bits); |
| 2687 | // If we extracted less bits above we must adjust our exponent |
| 2688 | // to compensate for the implicit right shift. |
| 2689 | r.exp += (S::PRECISION - used_bits) as ExpInt; |
| 2690 | let loss = Loss::through_truncation(&sig_calc, truncated_bits); |
| 2691 | return Ok(r.normalize(round, loss)); |
| 2692 | } |
| 2693 | } |
| 2694 | } |
| 2695 | } |
| 2696 | |
| 2697 | impl Loss { |
| 2698 | /// Combine the effect of two lost fractions. |
| 2699 | #[inline ] |
| 2700 | fn combine(self, less_significant: Loss) -> Loss { |
| 2701 | let mut more_significant = self; |
| 2702 | if less_significant != Loss::ExactlyZero { |
| 2703 | if more_significant == Loss::ExactlyZero { |
| 2704 | more_significant = Loss::LessThanHalf; |
| 2705 | } else if more_significant == Loss::ExactlyHalf { |
| 2706 | more_significant = Loss::MoreThanHalf; |
| 2707 | } |
| 2708 | } |
| 2709 | |
| 2710 | more_significant |
| 2711 | } |
| 2712 | |
| 2713 | /// Return the fraction lost were a bignum truncated losing the least |
| 2714 | /// significant `bits` bits. |
| 2715 | #[inline ] |
| 2716 | fn through_truncation(limbs: &[Limb], bits: usize) -> Loss { |
| 2717 | if bits == 0 { |
| 2718 | return Loss::ExactlyZero; |
| 2719 | } |
| 2720 | |
| 2721 | let half_bit = bits - 1; |
| 2722 | let half_limb = half_bit / LIMB_BITS; |
| 2723 | let (half_limb, rest) = if half_limb < limbs.len() { |
| 2724 | (limbs[half_limb], &limbs[..half_limb]) |
| 2725 | } else { |
| 2726 | (0, limbs) |
| 2727 | }; |
| 2728 | let half = 1 << (half_bit % LIMB_BITS); |
| 2729 | let has_half = half_limb & half != 0; |
| 2730 | let has_rest = half_limb & (half - 1) != 0 || !sig::is_all_zeros(rest); |
| 2731 | |
| 2732 | match (has_half, has_rest) { |
| 2733 | (false, false) => Loss::ExactlyZero, |
| 2734 | (false, true) => Loss::LessThanHalf, |
| 2735 | (true, false) => Loss::ExactlyHalf, |
| 2736 | (true, true) => Loss::MoreThanHalf, |
| 2737 | } |
| 2738 | } |
| 2739 | } |
| 2740 | |
| 2741 | /// Implementation details of IeeeFloat significands, such as big integer arithmetic. |
| 2742 | /// As a rule of thumb, no functions in this module should dynamically allocate. |
| 2743 | mod sig { |
| 2744 | use super::{limbs_for_bits, ExpInt, Limb, Loss, LIMB_BITS}; |
| 2745 | use core::cmp::Ordering; |
| 2746 | use core::mem; |
| 2747 | |
| 2748 | #[inline ] |
| 2749 | pub(super) fn is_all_zeros(limbs: &[Limb]) -> bool { |
| 2750 | limbs.iter().all(|&l| l == 0) |
| 2751 | } |
| 2752 | |
| 2753 | /// One, not zero, based LSB. That is, returns 0 for a zeroed significand. |
| 2754 | #[inline ] |
| 2755 | pub(super) fn olsb(limbs: &[Limb]) -> usize { |
| 2756 | for i in 0..limbs.len() { |
| 2757 | if limbs[i] != 0 { |
| 2758 | return i * LIMB_BITS + limbs[i].trailing_zeros() as usize + 1; |
| 2759 | } |
| 2760 | } |
| 2761 | |
| 2762 | 0 |
| 2763 | } |
| 2764 | |
| 2765 | /// One, not zero, based MSB. That is, returns 0 for a zeroed significand. |
| 2766 | #[inline ] |
| 2767 | pub(super) fn omsb(limbs: &[Limb]) -> usize { |
| 2768 | for i in (0..limbs.len()).rev() { |
| 2769 | if limbs[i] != 0 { |
| 2770 | return (i + 1) * LIMB_BITS - limbs[i].leading_zeros() as usize; |
| 2771 | } |
| 2772 | } |
| 2773 | |
| 2774 | 0 |
| 2775 | } |
| 2776 | |
| 2777 | /// Comparison (unsigned) of two significands. |
| 2778 | #[inline ] |
| 2779 | pub(super) fn cmp(a: &[Limb], b: &[Limb]) -> Ordering { |
| 2780 | assert_eq!(a.len(), b.len()); |
| 2781 | for (a, b) in a.iter().zip(b).rev() { |
| 2782 | match a.cmp(b) { |
| 2783 | Ordering::Equal => {} |
| 2784 | o => return o, |
| 2785 | } |
| 2786 | } |
| 2787 | |
| 2788 | Ordering::Equal |
| 2789 | } |
| 2790 | |
| 2791 | /// Extract the given bit. |
| 2792 | #[inline ] |
| 2793 | pub(super) fn get_bit(limbs: &[Limb], bit: usize) -> bool { |
| 2794 | limbs[bit / LIMB_BITS] & (1 << (bit % LIMB_BITS)) != 0 |
| 2795 | } |
| 2796 | |
| 2797 | /// Set the given bit. |
| 2798 | #[inline ] |
| 2799 | pub(super) fn set_bit(limbs: &mut [Limb], bit: usize) { |
| 2800 | limbs[bit / LIMB_BITS] |= 1 << (bit % LIMB_BITS); |
| 2801 | } |
| 2802 | |
| 2803 | /// Shift `dst` left `bits` bits, subtract `bits` from its exponent. |
| 2804 | #[inline ] |
| 2805 | pub(super) fn shift_left(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) { |
| 2806 | if bits > 0 { |
| 2807 | // Our exponent should not underflow. |
| 2808 | *exp = exp.checked_sub(bits as ExpInt).unwrap(); |
| 2809 | |
| 2810 | // Jump is the inter-limb jump; shift is is intra-limb shift. |
| 2811 | let jump = bits / LIMB_BITS; |
| 2812 | let shift = bits % LIMB_BITS; |
| 2813 | |
| 2814 | for i in (0..dst.len()).rev() { |
| 2815 | let mut limb; |
| 2816 | |
| 2817 | if i < jump { |
| 2818 | limb = 0; |
| 2819 | } else { |
| 2820 | // dst[i] comes from the two limbs src[i - jump] and, if we have |
| 2821 | // an intra-limb shift, src[i - jump - 1]. |
| 2822 | limb = dst[i - jump]; |
| 2823 | if shift > 0 { |
| 2824 | limb <<= shift; |
| 2825 | if i >= jump + 1 { |
| 2826 | limb |= dst[i - jump - 1] >> (LIMB_BITS - shift); |
| 2827 | } |
| 2828 | } |
| 2829 | } |
| 2830 | |
| 2831 | dst[i] = limb; |
| 2832 | } |
| 2833 | } |
| 2834 | } |
| 2835 | |
| 2836 | /// Shift `dst` right `bits` bits noting lost fraction. |
| 2837 | #[inline ] |
| 2838 | pub(super) fn shift_right(dst: &mut [Limb], exp: &mut ExpInt, bits: usize) -> Loss { |
| 2839 | let loss = Loss::through_truncation(dst, bits); |
| 2840 | |
| 2841 | if bits > 0 { |
| 2842 | // Our exponent should not overflow. |
| 2843 | *exp = exp.checked_add(bits as ExpInt).unwrap(); |
| 2844 | |
| 2845 | // Jump is the inter-limb jump; shift is is intra-limb shift. |
| 2846 | let jump = bits / LIMB_BITS; |
| 2847 | let shift = bits % LIMB_BITS; |
| 2848 | |
| 2849 | // Perform the shift. This leaves the most significant `bits` bits |
| 2850 | // of the result at zero. |
| 2851 | for i in 0..dst.len() { |
| 2852 | let mut limb; |
| 2853 | |
| 2854 | if i + jump >= dst.len() { |
| 2855 | limb = 0; |
| 2856 | } else { |
| 2857 | limb = dst[i + jump]; |
| 2858 | if shift > 0 { |
| 2859 | limb >>= shift; |
| 2860 | if i + jump + 1 < dst.len() { |
| 2861 | limb |= dst[i + jump + 1] << (LIMB_BITS - shift); |
| 2862 | } |
| 2863 | } |
| 2864 | } |
| 2865 | |
| 2866 | dst[i] = limb; |
| 2867 | } |
| 2868 | } |
| 2869 | |
| 2870 | loss |
| 2871 | } |
| 2872 | |
| 2873 | /// Copy the bit vector of width `src_bits` from `src`, starting at bit SRC_LSB, |
| 2874 | /// to `dst`, such that the bit SRC_LSB becomes the least significant bit of `dst`. |
| 2875 | /// All high bits above `src_bits` in `dst` are zero-filled. |
| 2876 | #[inline ] |
| 2877 | pub(super) fn extract(dst: &mut [Limb], src: &[Limb], src_bits: usize, src_lsb: usize) { |
| 2878 | if src_bits == 0 { |
| 2879 | return; |
| 2880 | } |
| 2881 | |
| 2882 | let dst_limbs = limbs_for_bits(src_bits); |
| 2883 | assert!(dst_limbs <= dst.len()); |
| 2884 | |
| 2885 | let src = &src[src_lsb / LIMB_BITS..]; |
| 2886 | dst[..dst_limbs].copy_from_slice(&src[..dst_limbs]); |
| 2887 | |
| 2888 | let shift = src_lsb % LIMB_BITS; |
| 2889 | let _: Loss = shift_right(&mut dst[..dst_limbs], &mut 0, shift); |
| 2890 | |
| 2891 | // We now have (dst_limbs * LIMB_BITS - shift) bits from `src` |
| 2892 | // in `dst`. If this is less that src_bits, append the rest, else |
| 2893 | // clear the high bits. |
| 2894 | let n = dst_limbs * LIMB_BITS - shift; |
| 2895 | if n < src_bits { |
| 2896 | let mask = (1 << (src_bits - n)) - 1; |
| 2897 | dst[dst_limbs - 1] |= (src[dst_limbs] & mask) << n % LIMB_BITS; |
| 2898 | } else if n > src_bits && src_bits % LIMB_BITS > 0 { |
| 2899 | dst[dst_limbs - 1] &= (1 << (src_bits % LIMB_BITS)) - 1; |
| 2900 | } |
| 2901 | |
| 2902 | // Clear high limbs. |
| 2903 | for x in &mut dst[dst_limbs..] { |
| 2904 | *x = 0; |
| 2905 | } |
| 2906 | } |
| 2907 | |
| 2908 | /// We want the most significant PRECISION bits of `src`. There may not |
| 2909 | /// be that many; extract what we can. |
| 2910 | #[inline ] |
| 2911 | pub(super) fn from_limbs(dst: &mut [Limb], src: &[Limb], precision: usize) -> (Loss, ExpInt) { |
| 2912 | let omsb = omsb(src); |
| 2913 | |
| 2914 | if precision <= omsb { |
| 2915 | extract(dst, src, precision, omsb - precision); |
| 2916 | (Loss::through_truncation(src, omsb - precision), omsb as ExpInt - 1) |
| 2917 | } else { |
| 2918 | extract(dst, src, omsb, 0); |
| 2919 | (Loss::ExactlyZero, precision as ExpInt - 1) |
| 2920 | } |
| 2921 | } |
| 2922 | |
| 2923 | /// For every consecutive chunk of `bits` bits from `limbs`, |
| 2924 | /// going from most significant to the least significant bits, |
| 2925 | /// call `f` to transform those bits and store the result back. |
| 2926 | #[inline ] |
| 2927 | pub(super) fn each_chunk<F: FnMut(Limb) -> Limb>(limbs: &mut [Limb], bits: usize, mut f: F) { |
| 2928 | assert_eq!(LIMB_BITS % bits, 0); |
| 2929 | for limb in limbs.iter_mut().rev() { |
| 2930 | let mut r = 0; |
| 2931 | for i in (0..LIMB_BITS / bits).rev() { |
| 2932 | r |= f((*limb >> (i * bits)) & ((1 << bits) - 1)) << (i * bits); |
| 2933 | } |
| 2934 | *limb = r; |
| 2935 | } |
| 2936 | } |
| 2937 | |
| 2938 | /// Increment in-place, return the carry flag. |
| 2939 | #[inline ] |
| 2940 | pub(super) fn increment(dst: &mut [Limb]) -> Limb { |
| 2941 | for x in dst { |
| 2942 | *x = x.wrapping_add(1); |
| 2943 | if *x != 0 { |
| 2944 | return 0; |
| 2945 | } |
| 2946 | } |
| 2947 | |
| 2948 | 1 |
| 2949 | } |
| 2950 | |
| 2951 | /// Decrement in-place, return the borrow flag. |
| 2952 | #[inline ] |
| 2953 | pub(super) fn decrement(dst: &mut [Limb]) -> Limb { |
| 2954 | for x in dst { |
| 2955 | *x = x.wrapping_sub(1); |
| 2956 | if *x != !0 { |
| 2957 | return 0; |
| 2958 | } |
| 2959 | } |
| 2960 | |
| 2961 | 1 |
| 2962 | } |
| 2963 | |
| 2964 | /// `a += b + c` where `c` is zero or one. Returns the carry flag. |
| 2965 | #[inline ] |
| 2966 | pub(super) fn add(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { |
| 2967 | assert!(c <= 1); |
| 2968 | |
| 2969 | for (a, &b) in a.iter_mut().zip(b) { |
| 2970 | let (r, overflow) = a.overflowing_add(b); |
| 2971 | let (r, overflow2) = r.overflowing_add(c); |
| 2972 | *a = r; |
| 2973 | c = (overflow | overflow2) as Limb; |
| 2974 | } |
| 2975 | |
| 2976 | c |
| 2977 | } |
| 2978 | |
| 2979 | /// `a -= b + c` where `c` is zero or one. Returns the borrow flag. |
| 2980 | #[inline ] |
| 2981 | pub(super) fn sub(a: &mut [Limb], b: &[Limb], mut c: Limb) -> Limb { |
| 2982 | assert!(c <= 1); |
| 2983 | |
| 2984 | for (a, &b) in a.iter_mut().zip(b) { |
| 2985 | let (r, overflow) = a.overflowing_sub(b); |
| 2986 | let (r, overflow2) = r.overflowing_sub(c); |
| 2987 | *a = r; |
| 2988 | c = (overflow | overflow2) as Limb; |
| 2989 | } |
| 2990 | |
| 2991 | c |
| 2992 | } |
| 2993 | |
| 2994 | /// `a += b` or `a -= b`. Does not preserve `b`. |
| 2995 | #[inline ] |
| 2996 | pub(super) fn add_or_sub( |
| 2997 | a_sig: &mut [Limb], |
| 2998 | a_exp: &mut ExpInt, |
| 2999 | a_sign: &mut bool, |
| 3000 | b_sig: &mut [Limb], |
| 3001 | b_exp: ExpInt, |
| 3002 | b_sign: bool, |
| 3003 | ) -> Loss { |
| 3004 | // Are we bigger exponent-wise than the RHS? |
| 3005 | let bits = *a_exp - b_exp; |
| 3006 | |
| 3007 | // Determine if the operation on the absolute values is effectively |
| 3008 | // an addition or subtraction. |
| 3009 | // Subtraction is more subtle than one might naively expect. |
| 3010 | if *a_sign ^ b_sign { |
| 3011 | let loss; |
| 3012 | |
| 3013 | if bits == 0 { |
| 3014 | loss = Loss::ExactlyZero; |
| 3015 | } else if bits > 0 { |
| 3016 | loss = shift_right(b_sig, &mut 0, (bits - 1) as usize); |
| 3017 | shift_left(a_sig, a_exp, 1); |
| 3018 | } else { |
| 3019 | loss = shift_right(a_sig, a_exp, (-bits - 1) as usize); |
| 3020 | shift_left(b_sig, &mut 0, 1); |
| 3021 | } |
| 3022 | |
| 3023 | let borrow = (loss != Loss::ExactlyZero) as Limb; |
| 3024 | |
| 3025 | // Should we reverse the subtraction. |
| 3026 | if cmp(a_sig, b_sig) == Ordering::Less { |
| 3027 | // The code above is intended to ensure that no borrow is necessary. |
| 3028 | assert_eq!(sub(b_sig, a_sig, borrow), 0); |
| 3029 | a_sig.copy_from_slice(b_sig); |
| 3030 | *a_sign = !*a_sign; |
| 3031 | } else { |
| 3032 | // The code above is intended to ensure that no borrow is necessary. |
| 3033 | assert_eq!(sub(a_sig, b_sig, borrow), 0); |
| 3034 | } |
| 3035 | |
| 3036 | // Invert the lost fraction - it was on the RHS and subtracted. |
| 3037 | match loss { |
| 3038 | Loss::LessThanHalf => Loss::MoreThanHalf, |
| 3039 | Loss::MoreThanHalf => Loss::LessThanHalf, |
| 3040 | _ => loss, |
| 3041 | } |
| 3042 | } else { |
| 3043 | let loss = if bits > 0 { |
| 3044 | shift_right(b_sig, &mut 0, bits as usize) |
| 3045 | } else { |
| 3046 | shift_right(a_sig, a_exp, -bits as usize) |
| 3047 | }; |
| 3048 | // We have a guard bit; generating a carry cannot happen. |
| 3049 | assert_eq!(add(a_sig, b_sig, 0), 0); |
| 3050 | loss |
| 3051 | } |
| 3052 | } |
| 3053 | |
| 3054 | /// `[low, high] = a * b`. |
| 3055 | /// |
| 3056 | /// This cannot overflow, because |
| 3057 | /// |
| 3058 | /// `(n - 1) * (n - 1) + 2 * (n - 1) == (n - 1) * (n + 1)` |
| 3059 | /// |
| 3060 | /// which is less than n^2. |
| 3061 | #[inline ] |
| 3062 | pub(super) fn widening_mul(a: Limb, b: Limb) -> [Limb; 2] { |
| 3063 | let mut wide = [0, 0]; |
| 3064 | |
| 3065 | if a == 0 || b == 0 { |
| 3066 | return wide; |
| 3067 | } |
| 3068 | |
| 3069 | const HALF_BITS: usize = LIMB_BITS / 2; |
| 3070 | |
| 3071 | let select = |limb, i| (limb >> (i * HALF_BITS)) & ((1 << HALF_BITS) - 1); |
| 3072 | for i in 0..2 { |
| 3073 | for j in 0..2 { |
| 3074 | let mut x = [select(a, i) * select(b, j), 0]; |
| 3075 | shift_left(&mut x, &mut 0, (i + j) * HALF_BITS); |
| 3076 | assert_eq!(add(&mut wide, &x, 0), 0); |
| 3077 | } |
| 3078 | } |
| 3079 | |
| 3080 | wide |
| 3081 | } |
| 3082 | |
| 3083 | /// `dst = a * b` (for normal `a` and `b`). Returns the lost fraction. |
| 3084 | #[inline ] |
| 3085 | pub(super) fn mul<'a>( |
| 3086 | dst: &mut [Limb], |
| 3087 | exp: &mut ExpInt, |
| 3088 | mut a: &'a [Limb], |
| 3089 | mut b: &'a [Limb], |
| 3090 | precision: usize, |
| 3091 | ) -> Loss { |
| 3092 | // Put the narrower number on the `a` for less loops below. |
| 3093 | if a.len() > b.len() { |
| 3094 | mem::swap(&mut a, &mut b); |
| 3095 | } |
| 3096 | |
| 3097 | for x in &mut dst[..b.len()] { |
| 3098 | *x = 0; |
| 3099 | } |
| 3100 | |
| 3101 | for i in 0..a.len() { |
| 3102 | let mut carry = 0; |
| 3103 | for j in 0..b.len() { |
| 3104 | let [low, mut high] = widening_mul(a[i], b[j]); |
| 3105 | |
| 3106 | // Now add carry. |
| 3107 | let (low, overflow) = low.overflowing_add(carry); |
| 3108 | high += overflow as Limb; |
| 3109 | |
| 3110 | // And now `dst[i + j]`, and store the new low part there. |
| 3111 | let (low, overflow) = low.overflowing_add(dst[i + j]); |
| 3112 | high += overflow as Limb; |
| 3113 | |
| 3114 | dst[i + j] = low; |
| 3115 | carry = high; |
| 3116 | } |
| 3117 | dst[i + b.len()] = carry; |
| 3118 | } |
| 3119 | |
| 3120 | // Assume the operands involved in the multiplication are single-precision |
| 3121 | // FP, and the two multiplicants are: |
| 3122 | // a = a23 . a22 ... a0 * 2^e1 |
| 3123 | // b = b23 . b22 ... b0 * 2^e2 |
| 3124 | // the result of multiplication is: |
| 3125 | // dst = c48 c47 c46 . c45 ... c0 * 2^(e1+e2) |
| 3126 | // Note that there are three significant bits at the left-hand side of the |
| 3127 | // radix point: two for the multiplication, and an overflow bit for the |
| 3128 | // addition (that will always be zero at this point). Move the radix point |
| 3129 | // toward left by two bits, and adjust exponent accordingly. |
| 3130 | *exp += 2; |
| 3131 | |
| 3132 | // Convert the result having "2 * precision" significant-bits back to the one |
| 3133 | // having "precision" significant-bits. First, move the radix point from |
| 3134 | // poision "2*precision - 1" to "precision - 1". The exponent need to be |
| 3135 | // adjusted by "2*precision - 1" - "precision - 1" = "precision". |
| 3136 | *exp -= precision as ExpInt + 1; |
| 3137 | |
| 3138 | // In case MSB resides at the left-hand side of radix point, shift the |
| 3139 | // mantissa right by some amount to make sure the MSB reside right before |
| 3140 | // the radix point (i.e. "MSB . rest-significant-bits"). |
| 3141 | // |
| 3142 | // Note that the result is not normalized when "omsb < precision". So, the |
| 3143 | // caller needs to call IeeeFloat::normalize() if normalized value is |
| 3144 | // expected. |
| 3145 | let omsb = omsb(dst); |
| 3146 | if omsb <= precision { |
| 3147 | Loss::ExactlyZero |
| 3148 | } else { |
| 3149 | shift_right(dst, exp, omsb - precision) |
| 3150 | } |
| 3151 | } |
| 3152 | |
| 3153 | /// `quotient = dividend / divisor`. Returns the lost fraction. |
| 3154 | /// Does not preserve `dividend` or `divisor`. |
| 3155 | #[inline ] |
| 3156 | pub(super) fn div( |
| 3157 | quotient: &mut [Limb], |
| 3158 | exp: &mut ExpInt, |
| 3159 | dividend: &mut [Limb], |
| 3160 | divisor: &mut [Limb], |
| 3161 | precision: usize, |
| 3162 | ) -> Loss { |
| 3163 | // Normalize the divisor. |
| 3164 | let bits = precision - omsb(divisor); |
| 3165 | shift_left(divisor, &mut 0, bits); |
| 3166 | *exp += bits as ExpInt; |
| 3167 | |
| 3168 | // Normalize the dividend. |
| 3169 | let bits = precision - omsb(dividend); |
| 3170 | shift_left(dividend, exp, bits); |
| 3171 | |
| 3172 | // Division by 1. |
| 3173 | let olsb_divisor = olsb(divisor); |
| 3174 | if olsb_divisor == precision { |
| 3175 | quotient.copy_from_slice(dividend); |
| 3176 | return Loss::ExactlyZero; |
| 3177 | } |
| 3178 | |
| 3179 | // Ensure the dividend >= divisor initially for the loop below. |
| 3180 | // Incidentally, this means that the division loop below is |
| 3181 | // guaranteed to set the integer bit to one. |
| 3182 | if cmp(dividend, divisor) == Ordering::Less { |
| 3183 | shift_left(dividend, exp, 1); |
| 3184 | assert_ne!(cmp(dividend, divisor), Ordering::Less) |
| 3185 | } |
| 3186 | |
| 3187 | // Helper for figuring out the lost fraction. |
| 3188 | let lost_fraction = |dividend: &[Limb], divisor: &[Limb]| match cmp(dividend, divisor) { |
| 3189 | Ordering::Greater => Loss::MoreThanHalf, |
| 3190 | Ordering::Equal => Loss::ExactlyHalf, |
| 3191 | Ordering::Less => { |
| 3192 | if is_all_zeros(dividend) { |
| 3193 | Loss::ExactlyZero |
| 3194 | } else { |
| 3195 | Loss::LessThanHalf |
| 3196 | } |
| 3197 | } |
| 3198 | }; |
| 3199 | |
| 3200 | // Try to perform a (much faster) short division for small divisors. |
| 3201 | let divisor_bits = precision - (olsb_divisor - 1); |
| 3202 | macro_rules! try_short_div { |
| 3203 | ($W:ty, $H:ty, $half:expr) => { |
| 3204 | if divisor_bits * 2 <= $half { |
| 3205 | // Extract the small divisor. |
| 3206 | let _: Loss = shift_right(divisor, &mut 0, olsb_divisor - 1); |
| 3207 | let divisor = divisor[0] as $H as $W; |
| 3208 | |
| 3209 | // Shift the dividend to produce a quotient with the unit bit set. |
| 3210 | let top_limb = *dividend.last().unwrap(); |
| 3211 | let mut rem = (top_limb >> (LIMB_BITS - (divisor_bits - 1))) as $H; |
| 3212 | shift_left(dividend, &mut 0, divisor_bits - 1); |
| 3213 | |
| 3214 | // Apply short division in place on $H (of $half bits) chunks. |
| 3215 | each_chunk(dividend, $half, |chunk| { |
| 3216 | let chunk = chunk as $H; |
| 3217 | let combined = ((rem as $W) << $half) | (chunk as $W); |
| 3218 | rem = (combined % divisor) as $H; |
| 3219 | (combined / divisor) as $H as Limb |
| 3220 | }); |
| 3221 | quotient.copy_from_slice(dividend); |
| 3222 | |
| 3223 | return lost_fraction(&[(rem as Limb) << 1], &[divisor as Limb]); |
| 3224 | } |
| 3225 | }; |
| 3226 | } |
| 3227 | |
| 3228 | try_short_div!(u32, u16, 16); |
| 3229 | try_short_div!(u64, u32, 32); |
| 3230 | try_short_div!(u128, u64, 64); |
| 3231 | |
| 3232 | // Zero the quotient before setting bits in it. |
| 3233 | for x in &mut quotient[..limbs_for_bits(precision)] { |
| 3234 | *x = 0; |
| 3235 | } |
| 3236 | |
| 3237 | // Long division. |
| 3238 | for bit in (0..precision).rev() { |
| 3239 | if cmp(dividend, divisor) != Ordering::Less { |
| 3240 | sub(dividend, divisor, 0); |
| 3241 | set_bit(quotient, bit); |
| 3242 | } |
| 3243 | shift_left(dividend, &mut 0, 1); |
| 3244 | } |
| 3245 | |
| 3246 | lost_fraction(dividend, divisor) |
| 3247 | } |
| 3248 | } |
| 3249 | |