| 1 | //! Support for floating point types that use PowerPC semantics. |
| 2 | |
| 3 | use crate::ieee; |
| 4 | use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd}; |
| 5 | |
| 6 | use core::cmp::Ordering; |
| 7 | use core::fmt; |
| 8 | use core::ops::Neg; |
| 9 | |
| 10 | /// A larger floating point number represented by two smaller floats. |
| 11 | #[must_use ] |
| 12 | #[derive (Copy, Clone, PartialEq, PartialOrd, Debug)] |
| 13 | pub struct DoubleFloat<F>(F, F); |
| 14 | |
| 15 | /// 128-bit floating point number comprised of two IEEE [`Double`](ieee::Double) values. |
| 16 | /// |
| 17 | /// This is the "IBM Extended Double" format, described at |
| 18 | /// <https://www.ibm.com/docs/en/aix/7.3?topic=sepl-128-bit-long-double-floating-point-data-type>. |
| 19 | pub type DoubleDouble = DoubleFloat<ieee::Double>; |
| 20 | |
| 21 | // These are legacy semantics for the Fallback, inaccrurate implementation of |
| 22 | // IBM double-double, if the accurate DoubleDouble doesn't handle the |
| 23 | // operation. It's equivalent to having an IEEE number with consecutive 106 |
| 24 | // bits of mantissa and 11 bits of exponent. |
| 25 | // |
| 26 | // It's not equivalent to IBM double-double. For example, a legit IBM |
| 27 | // double-double, 1 + epsilon: |
| 28 | // |
| 29 | // 1 + epsilon = 1 + (1 >> 1076) |
| 30 | // |
| 31 | // is not representable by a consecutive 106 bits of mantissa. |
| 32 | // |
| 33 | // Currently, these semantics are used in the following way: |
| 34 | // |
| 35 | // DoubleDouble -> (Double, Double) -> |
| 36 | // DoubleDouble's Fallback -> IEEE operations |
| 37 | // |
| 38 | // FIXME: Implement all operations in DoubleDouble, and delete these |
| 39 | // semantics. |
| 40 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
| 41 | pub struct FallbackS<F>(F); |
| 42 | type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>; |
| 43 | impl<F: Float> ieee::Semantics for FallbackS<F> { |
| 44 | // Forbid any conversion to/from bits. |
| 45 | const BITS: usize = 0; |
| 46 | const EXP_BITS: usize = 0; |
| 47 | |
| 48 | const PRECISION: usize = F::PRECISION * 2; |
| 49 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
| 50 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; |
| 51 | } |
| 52 | |
| 53 | // Convert number to F. To avoid spurious underflows, we re- |
| 54 | // normalize against the F exponent range first, and only *then* |
| 55 | // truncate the mantissa. The result of that second conversion |
| 56 | // may be inexact, but should never underflow. |
| 57 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
| 58 | pub struct FallbackExtendedS<F>(F); |
| 59 | type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>; |
| 60 | impl<F: Float> ieee::Semantics for FallbackExtendedS<F> { |
| 61 | // Forbid any conversion to/from bits. |
| 62 | const BITS: usize = 0; |
| 63 | const EXP_BITS: usize = 0; |
| 64 | |
| 65 | const PRECISION: usize = Fallback::<F>::PRECISION; |
| 66 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
| 67 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt; |
| 68 | } |
| 69 | |
| 70 | impl<F: Float> From<Fallback<F>> for DoubleFloat<F> |
| 71 | where |
| 72 | F: FloatConvert<FallbackExtended<F>>, |
| 73 | FallbackExtended<F>: FloatConvert<F>, |
| 74 | { |
| 75 | fn from(x: Fallback<F>) -> Self { |
| 76 | let mut status; |
| 77 | let mut loses_info = false; |
| 78 | |
| 79 | let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info)); |
| 80 | assert_eq!((status, loses_info), (Status::OK, false)); |
| 81 | |
| 82 | let a = unpack!(status=, extended.convert(&mut loses_info)); |
| 83 | assert_eq!(status - Status::INEXACT, Status::OK); |
| 84 | |
| 85 | // If conversion was exact or resulted in a special case, we're done; |
| 86 | // just set the second double to zero. Otherwise, re-convert back to |
| 87 | // the extended format and compute the difference. This now should |
| 88 | // convert exactly to double. |
| 89 | let b = if a.is_finite_non_zero() && loses_info { |
| 90 | let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info)); |
| 91 | assert_eq!((status, loses_info), (Status::OK, false)); |
| 92 | let v = unpack!(status=, extended - u); |
| 93 | assert_eq!(status, Status::OK); |
| 94 | let v = unpack!(status=, v.convert(&mut loses_info)); |
| 95 | assert_eq!((status, loses_info), (Status::OK, false)); |
| 96 | v |
| 97 | } else { |
| 98 | F::ZERO |
| 99 | }; |
| 100 | |
| 101 | DoubleFloat(a, b) |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> { |
| 106 | fn from(DoubleFloat(a: F, b: F): DoubleFloat<F>) -> Self { |
| 107 | let mut status: Status; |
| 108 | let mut loses_info: bool = false; |
| 109 | |
| 110 | // Get the first F and convert to our format. |
| 111 | let a: IeeeFloat> = unpack!(status=, a.convert(&mut loses_info)); |
| 112 | assert_eq!((status, loses_info), (Status::OK, false)); |
| 113 | |
| 114 | // Unless we have a special case, add in second F. |
| 115 | if a.is_finite_non_zero() { |
| 116 | let b: IeeeFloat> = unpack!(status=, b.convert(&mut loses_info)); |
| 117 | assert_eq!((status, loses_info), (Status::OK, false)); |
| 118 | |
| 119 | (a + b).value |
| 120 | } else { |
| 121 | a |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | float_common_impls!(DoubleFloat<F>); |
| 127 | |
| 128 | impl<F: Float> Neg for DoubleFloat<F> { |
| 129 | type Output = Self; |
| 130 | fn neg(self) -> Self { |
| 131 | if self.1.is_finite_non_zero() { |
| 132 | DoubleFloat(-self.0, -self.1) |
| 133 | } else { |
| 134 | DoubleFloat(-self.0, self.1) |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> { |
| 140 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 141 | fmt::Display::fmt(&Fallback::from(*self), f) |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F> |
| 146 | where |
| 147 | Self: From<Fallback<F>>, |
| 148 | { |
| 149 | const BITS: usize = F::BITS * 2; |
| 150 | const PRECISION: usize = Fallback::<F>::PRECISION; |
| 151 | const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP; |
| 152 | const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP; |
| 153 | |
| 154 | const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO); |
| 155 | |
| 156 | const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO); |
| 157 | |
| 158 | // FIXME(eddyb) remove when qnan becomes const fn. |
| 159 | const NAN: Self = DoubleFloat(F::NAN, F::ZERO); |
| 160 | |
| 161 | fn qnan(payload: Option<u128>) -> Self { |
| 162 | DoubleFloat(F::qnan(payload), F::ZERO) |
| 163 | } |
| 164 | |
| 165 | fn snan(payload: Option<u128>) -> Self { |
| 166 | DoubleFloat(F::snan(payload), F::ZERO) |
| 167 | } |
| 168 | |
| 169 | fn largest() -> Self { |
| 170 | let status; |
| 171 | let mut r = DoubleFloat(F::largest(), F::largest()); |
| 172 | r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1)); |
| 173 | r.1 = unpack!(status=, r.1.next_down()); |
| 174 | assert_eq!(status, Status::OK); |
| 175 | r |
| 176 | } |
| 177 | |
| 178 | const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO); |
| 179 | |
| 180 | fn smallest_normalized() -> Self { |
| 181 | DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO) |
| 182 | } |
| 183 | |
| 184 | // Implement addition, subtraction, multiplication and division based on: |
| 185 | // "Software for Doubled-Precision Floating-Point Computations", |
| 186 | // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. |
| 187 | |
| 188 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 189 | match (self.category(), rhs.category()) { |
| 190 | (Category::Infinity, Category::Infinity) => { |
| 191 | if self.is_negative() != rhs.is_negative() { |
| 192 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) |
| 193 | } else { |
| 194 | Status::OK.and(self) |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self), |
| 199 | |
| 200 | (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs), |
| 201 | |
| 202 | (Category::Normal, Category::Normal) => { |
| 203 | let mut status = Status::OK; |
| 204 | let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1); |
| 205 | let mut z = a; |
| 206 | z = unpack!(status|=, z.add_r(c, round)); |
| 207 | if !z.is_finite() { |
| 208 | if !z.is_infinite() { |
| 209 | return status.and(DoubleFloat(z, F::ZERO)); |
| 210 | } |
| 211 | status = Status::OK; |
| 212 | let a_cmp_c = a.cmp_abs_normal(c); |
| 213 | z = cc; |
| 214 | z = unpack!(status|=, z.add_r(aa, round)); |
| 215 | if a_cmp_c == Ordering::Greater { |
| 216 | // z = cc + aa + c + a; |
| 217 | z = unpack!(status|=, z.add_r(c, round)); |
| 218 | z = unpack!(status|=, z.add_r(a, round)); |
| 219 | } else { |
| 220 | // z = cc + aa + a + c; |
| 221 | z = unpack!(status|=, z.add_r(a, round)); |
| 222 | z = unpack!(status|=, z.add_r(c, round)); |
| 223 | } |
| 224 | if !z.is_finite() { |
| 225 | return status.and(DoubleFloat(z, F::ZERO)); |
| 226 | } |
| 227 | self.0 = z; |
| 228 | let mut zz = aa; |
| 229 | zz = unpack!(status|=, zz.add_r(cc, round)); |
| 230 | if a_cmp_c == Ordering::Greater { |
| 231 | // self.1 = a - z + c + zz; |
| 232 | self.1 = a; |
| 233 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
| 234 | self.1 = unpack!(status|=, self.1.add_r(c, round)); |
| 235 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
| 236 | } else { |
| 237 | // self.1 = c - z + a + zz; |
| 238 | self.1 = c; |
| 239 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
| 240 | self.1 = unpack!(status|=, self.1.add_r(a, round)); |
| 241 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
| 242 | } |
| 243 | } else { |
| 244 | // q = a - z; |
| 245 | let mut q = a; |
| 246 | q = unpack!(status|=, q.sub_r(z, round)); |
| 247 | |
| 248 | // zz = q + c + (a - (q + z)) + aa + cc; |
| 249 | // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. |
| 250 | let mut zz = q; |
| 251 | zz = unpack!(status|=, zz.add_r(c, round)); |
| 252 | q = unpack!(status|=, q.add_r(z, round)); |
| 253 | q = unpack!(status|=, q.sub_r(a, round)); |
| 254 | q = -q; |
| 255 | zz = unpack!(status|=, zz.add_r(q, round)); |
| 256 | zz = unpack!(status|=, zz.add_r(aa, round)); |
| 257 | zz = unpack!(status|=, zz.add_r(cc, round)); |
| 258 | if zz.is_zero() && !zz.is_negative() { |
| 259 | return Status::OK.and(DoubleFloat(z, F::ZERO)); |
| 260 | } |
| 261 | self.0 = z; |
| 262 | self.0 = unpack!(status|=, self.0.add_r(zz, round)); |
| 263 | if !self.0.is_finite() { |
| 264 | self.1 = F::ZERO; |
| 265 | return status.and(self); |
| 266 | } |
| 267 | self.1 = z; |
| 268 | self.1 = unpack!(status|=, self.1.sub_r(self.0, round)); |
| 269 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
| 270 | } |
| 271 | status.and(self) |
| 272 | } |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 277 | // Interesting observation: For special categories, finding the lowest |
| 278 | // common ancestor of the following layered graph gives the correct |
| 279 | // return category: |
| 280 | // |
| 281 | // NaN |
| 282 | // / \ |
| 283 | // Zero Inf |
| 284 | // \ / |
| 285 | // Normal |
| 286 | // |
| 287 | // e.g. NaN * NaN = NaN |
| 288 | // Zero * Inf = NaN |
| 289 | // Normal * Zero = Zero |
| 290 | // Normal * Inf = Inf |
| 291 | match (self.category(), rhs.category()) { |
| 292 | (Category::NaN, _) => Status::OK.and(self), |
| 293 | |
| 294 | (_, Category::NaN) => Status::OK.and(rhs), |
| 295 | |
| 296 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN), |
| 297 | |
| 298 | (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self), |
| 299 | |
| 300 | (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs), |
| 301 | |
| 302 | (Category::Normal, Category::Normal) => { |
| 303 | let mut status = Status::OK; |
| 304 | let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1); |
| 305 | // t = a * c |
| 306 | let mut t = a; |
| 307 | t = unpack!(status|=, t.mul_r(c, round)); |
| 308 | if !t.is_finite_non_zero() { |
| 309 | return status.and(DoubleFloat(t, F::ZERO)); |
| 310 | } |
| 311 | |
| 312 | // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). |
| 313 | let mut tau = a; |
| 314 | tau = unpack!(status|=, tau.mul_add_r(c, -t, round)); |
| 315 | // v = a * d |
| 316 | let mut v = a; |
| 317 | v = unpack!(status|=, v.mul_r(d, round)); |
| 318 | // w = b * c |
| 319 | let mut w = b; |
| 320 | w = unpack!(status|=, w.mul_r(c, round)); |
| 321 | v = unpack!(status|=, v.add_r(w, round)); |
| 322 | // tau += v + w |
| 323 | tau = unpack!(status|=, tau.add_r(v, round)); |
| 324 | // u = t + tau |
| 325 | let mut u = t; |
| 326 | u = unpack!(status|=, u.add_r(tau, round)); |
| 327 | |
| 328 | self.0 = u; |
| 329 | if !u.is_finite() { |
| 330 | self.1 = F::ZERO; |
| 331 | } else { |
| 332 | // self.1 = (t - u) + tau |
| 333 | t = unpack!(status|=, t.sub_r(u, round)); |
| 334 | t = unpack!(status|=, t.add_r(tau, round)); |
| 335 | self.1 = t; |
| 336 | } |
| 337 | status.and(self) |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { |
| 343 | Fallback::from(self) |
| 344 | .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round) |
| 345 | .map(Self::from) |
| 346 | } |
| 347 | |
| 348 | fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
| 349 | Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) |
| 350 | } |
| 351 | |
| 352 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { |
| 353 | Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from) |
| 354 | } |
| 355 | |
| 356 | fn c_fmod(self, rhs: Self) -> StatusAnd<Self> { |
| 357 | Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) |
| 358 | } |
| 359 | |
| 360 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { |
| 361 | Fallback::from(self).round_to_integral(round).map(Self::from) |
| 362 | } |
| 363 | |
| 364 | fn next_up(self) -> StatusAnd<Self> { |
| 365 | Fallback::from(self).next_up().map(Self::from) |
| 366 | } |
| 367 | |
| 368 | fn from_bits(input: u128) -> Self { |
| 369 | let (a, b) = (input, input >> F::BITS); |
| 370 | DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1))) |
| 371 | } |
| 372 | |
| 373 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { |
| 374 | Fallback::from_u128_r(input, round).map(Self::from) |
| 375 | } |
| 376 | |
| 377 | fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
| 378 | Fallback::from_str_r(s, round).map(|r| r.map(Self::from)) |
| 379 | } |
| 380 | |
| 381 | fn to_bits(self) -> u128 { |
| 382 | self.0.to_bits() | (self.1.to_bits() << F::BITS) |
| 383 | } |
| 384 | |
| 385 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { |
| 386 | Fallback::from(self).to_u128_r(width, round, is_exact) |
| 387 | } |
| 388 | |
| 389 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { |
| 390 | self.0.cmp_abs_normal(rhs.0).then_with(|| { |
| 391 | let result = self.1.cmp_abs_normal(rhs.1); |
| 392 | if result != Ordering::Equal { |
| 393 | let against = self.0.is_negative() ^ self.1.is_negative(); |
| 394 | let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative(); |
| 395 | (!against) |
| 396 | .cmp(&!rhs_against) |
| 397 | .then_with(|| if against { result.reverse() } else { result }) |
| 398 | } else { |
| 399 | result |
| 400 | } |
| 401 | }) |
| 402 | } |
| 403 | |
| 404 | fn bitwise_eq(self, rhs: Self) -> bool { |
| 405 | self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1) |
| 406 | } |
| 407 | |
| 408 | fn is_negative(self) -> bool { |
| 409 | self.0.is_negative() |
| 410 | } |
| 411 | |
| 412 | fn is_denormal(self) -> bool { |
| 413 | self.category() == Category::Normal |
| 414 | && (self.0.is_denormal() || self.0.is_denormal() || |
| 415 | // (double)(Hi + Lo) == Hi defines a normal number. |
| 416 | self.0 != (self.0 + self.1).value) |
| 417 | } |
| 418 | |
| 419 | fn is_signaling(self) -> bool { |
| 420 | self.0.is_signaling() |
| 421 | } |
| 422 | |
| 423 | fn category(self) -> Category { |
| 424 | self.0.category() |
| 425 | } |
| 426 | |
| 427 | fn is_integer(self) -> bool { |
| 428 | self.0.is_integer() && self.1.is_integer() |
| 429 | } |
| 430 | |
| 431 | fn get_exact_inverse(self) -> Option<Self> { |
| 432 | Fallback::from(self).get_exact_inverse().map(Self::from) |
| 433 | } |
| 434 | |
| 435 | fn ilogb(self) -> ExpInt { |
| 436 | self.0.ilogb() |
| 437 | } |
| 438 | |
| 439 | fn scalbn_r(self, exp: ExpInt, round: Round) -> Self { |
| 440 | DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round)) |
| 441 | } |
| 442 | |
| 443 | fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self { |
| 444 | let a = self.0.frexp_r(exp, round); |
| 445 | let mut b = self.1; |
| 446 | if self.category() == Category::Normal { |
| 447 | b = b.scalbn_r(-*exp, round); |
| 448 | } |
| 449 | DoubleFloat(a, b) |
| 450 | } |
| 451 | } |
| 452 | |
| 453 | // HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat` |
| 454 | // has private fields, and it's not worth it to make them public just for testing. |
| 455 | #[test ] |
| 456 | fn is_integer() { |
| 457 | let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into()); |
| 458 | assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer()); |
| 459 | assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer()); |
| 460 | assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer()); |
| 461 | } |
| 462 | |