1use crate::ieee;
2use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd};
3
4use core::cmp::Ordering;
5use core::fmt;
6use core::ops::Neg;
7
8#[must_use]
9#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)]
10pub struct DoubleFloat<F>(F, F);
11pub type DoubleDouble = DoubleFloat<ieee::Double>;
12
13// These are legacy semantics for the Fallback, inaccrurate implementation of
14// IBM double-double, if the accurate DoubleDouble doesn't handle the
15// operation. It's equivalent to having an IEEE number with consecutive 106
16// bits of mantissa and 11 bits of exponent.
17//
18// It's not equivalent to IBM double-double. For example, a legit IBM
19// double-double, 1 + epsilon:
20//
21// 1 + epsilon = 1 + (1 >> 1076)
22//
23// is not representable by a consecutive 106 bits of mantissa.
24//
25// Currently, these semantics are used in the following way:
26//
27// DoubleDouble -> (Double, Double) ->
28// DoubleDouble's Fallback -> IEEE operations
29//
30// FIXME: Implement all operations in DoubleDouble, and delete these
31// semantics.
32// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
33pub struct FallbackS<F>(F);
34type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>;
35impl<F: Float> ieee::Semantics for FallbackS<F> {
36 // Forbid any conversion to/from bits.
37 const BITS: usize = 0;
38 const EXP_BITS: usize = 0;
39
40 const PRECISION: usize = F::PRECISION * 2;
41 const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
42 const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt;
43}
44
45// Convert number to F. To avoid spurious underflows, we re-
46// normalize against the F exponent range first, and only *then*
47// truncate the mantissa. The result of that second conversion
48// may be inexact, but should never underflow.
49// FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds.
50pub struct FallbackExtendedS<F>(F);
51type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>;
52impl<F: Float> ieee::Semantics for FallbackExtendedS<F> {
53 // Forbid any conversion to/from bits.
54 const BITS: usize = 0;
55 const EXP_BITS: usize = 0;
56
57 const PRECISION: usize = Fallback::<F>::PRECISION;
58 const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt;
59 const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt;
60}
61
62impl<F: Float> From<Fallback<F>> for DoubleFloat<F>
63where
64 F: FloatConvert<FallbackExtended<F>>,
65 FallbackExtended<F>: FloatConvert<F>,
66{
67 fn from(x: Fallback<F>) -> Self {
68 let mut status;
69 let mut loses_info = false;
70
71 let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info));
72 assert_eq!((status, loses_info), (Status::OK, false));
73
74 let a = unpack!(status=, extended.convert(&mut loses_info));
75 assert_eq!(status - Status::INEXACT, Status::OK);
76
77 // If conversion was exact or resulted in a special case, we're done;
78 // just set the second double to zero. Otherwise, re-convert back to
79 // the extended format and compute the difference. This now should
80 // convert exactly to double.
81 let b = if a.is_finite_non_zero() && loses_info {
82 let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info));
83 assert_eq!((status, loses_info), (Status::OK, false));
84 let v = unpack!(status=, extended - u);
85 assert_eq!(status, Status::OK);
86 let v = unpack!(status=, v.convert(&mut loses_info));
87 assert_eq!((status, loses_info), (Status::OK, false));
88 v
89 } else {
90 F::ZERO
91 };
92
93 DoubleFloat(a, b)
94 }
95}
96
97impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> {
98 fn from(DoubleFloat(a: F, b: F): DoubleFloat<F>) -> Self {
99 let mut status: Status;
100 let mut loses_info: bool = false;
101
102 // Get the first F and convert to our format.
103 let a: IeeeFloat> = unpack!(status=, a.convert(&mut loses_info));
104 assert_eq!((status, loses_info), (Status::OK, false));
105
106 // Unless we have a special case, add in second F.
107 if a.is_finite_non_zero() {
108 let b: IeeeFloat> = unpack!(status=, b.convert(&mut loses_info));
109 assert_eq!((status, loses_info), (Status::OK, false));
110
111 (a + b).value
112 } else {
113 a
114 }
115 }
116}
117
118float_common_impls!(DoubleFloat<F>);
119
120impl<F: Float> Neg for DoubleFloat<F> {
121 type Output = Self;
122 fn neg(self) -> Self {
123 if self.1.is_finite_non_zero() {
124 DoubleFloat(-self.0, -self.1)
125 } else {
126 DoubleFloat(-self.0, self.1)
127 }
128 }
129}
130
131impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> {
132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133 fmt::Display::fmt(&Fallback::from(*self), f)
134 }
135}
136
137impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F>
138where
139 Self: From<Fallback<F>>,
140{
141 const BITS: usize = F::BITS * 2;
142 const PRECISION: usize = Fallback::<F>::PRECISION;
143 const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP;
144 const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP;
145
146 const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO);
147
148 const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO);
149
150 // FIXME(eddyb) remove when qnan becomes const fn.
151 const NAN: Self = DoubleFloat(F::NAN, F::ZERO);
152
153 fn qnan(payload: Option<u128>) -> Self {
154 DoubleFloat(F::qnan(payload), F::ZERO)
155 }
156
157 fn snan(payload: Option<u128>) -> Self {
158 DoubleFloat(F::snan(payload), F::ZERO)
159 }
160
161 fn largest() -> Self {
162 let status;
163 let mut r = DoubleFloat(F::largest(), F::largest());
164 r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1));
165 r.1 = unpack!(status=, r.1.next_down());
166 assert_eq!(status, Status::OK);
167 r
168 }
169
170 const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO);
171
172 fn smallest_normalized() -> Self {
173 DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO)
174 }
175
176 // Implement addition, subtraction, multiplication and division based on:
177 // "Software for Doubled-Precision Floating-Point Computations",
178 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
179
180 fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
181 match (self.category(), rhs.category()) {
182 (Category::Infinity, Category::Infinity) => {
183 if self.is_negative() != rhs.is_negative() {
184 Status::INVALID_OP.and(Self::NAN.copy_sign(self))
185 } else {
186 Status::OK.and(self)
187 }
188 }
189
190 (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self),
191
192 (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs),
193
194 (Category::Normal, Category::Normal) => {
195 let mut status = Status::OK;
196 let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1);
197 let mut z = a;
198 z = unpack!(status|=, z.add_r(c, round));
199 if !z.is_finite() {
200 if !z.is_infinite() {
201 return status.and(DoubleFloat(z, F::ZERO));
202 }
203 status = Status::OK;
204 let a_cmp_c = a.cmp_abs_normal(c);
205 z = cc;
206 z = unpack!(status|=, z.add_r(aa, round));
207 if a_cmp_c == Ordering::Greater {
208 // z = cc + aa + c + a;
209 z = unpack!(status|=, z.add_r(c, round));
210 z = unpack!(status|=, z.add_r(a, round));
211 } else {
212 // z = cc + aa + a + c;
213 z = unpack!(status|=, z.add_r(a, round));
214 z = unpack!(status|=, z.add_r(c, round));
215 }
216 if !z.is_finite() {
217 return status.and(DoubleFloat(z, F::ZERO));
218 }
219 self.0 = z;
220 let mut zz = aa;
221 zz = unpack!(status|=, zz.add_r(cc, round));
222 if a_cmp_c == Ordering::Greater {
223 // self.1 = a - z + c + zz;
224 self.1 = a;
225 self.1 = unpack!(status|=, self.1.sub_r(z, round));
226 self.1 = unpack!(status|=, self.1.add_r(c, round));
227 self.1 = unpack!(status|=, self.1.add_r(zz, round));
228 } else {
229 // self.1 = c - z + a + zz;
230 self.1 = c;
231 self.1 = unpack!(status|=, self.1.sub_r(z, round));
232 self.1 = unpack!(status|=, self.1.add_r(a, round));
233 self.1 = unpack!(status|=, self.1.add_r(zz, round));
234 }
235 } else {
236 // q = a - z;
237 let mut q = a;
238 q = unpack!(status|=, q.sub_r(z, round));
239
240 // zz = q + c + (a - (q + z)) + aa + cc;
241 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
242 let mut zz = q;
243 zz = unpack!(status|=, zz.add_r(c, round));
244 q = unpack!(status|=, q.add_r(z, round));
245 q = unpack!(status|=, q.sub_r(a, round));
246 q = -q;
247 zz = unpack!(status|=, zz.add_r(q, round));
248 zz = unpack!(status|=, zz.add_r(aa, round));
249 zz = unpack!(status|=, zz.add_r(cc, round));
250 if zz.is_zero() && !zz.is_negative() {
251 return Status::OK.and(DoubleFloat(z, F::ZERO));
252 }
253 self.0 = z;
254 self.0 = unpack!(status|=, self.0.add_r(zz, round));
255 if !self.0.is_finite() {
256 self.1 = F::ZERO;
257 return status.and(self);
258 }
259 self.1 = z;
260 self.1 = unpack!(status|=, self.1.sub_r(self.0, round));
261 self.1 = unpack!(status|=, self.1.add_r(zz, round));
262 }
263 status.and(self)
264 }
265 }
266 }
267
268 fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> {
269 // Interesting observation: For special categories, finding the lowest
270 // common ancestor of the following layered graph gives the correct
271 // return category:
272 //
273 // NaN
274 // / \
275 // Zero Inf
276 // \ /
277 // Normal
278 //
279 // e.g. NaN * NaN = NaN
280 // Zero * Inf = NaN
281 // Normal * Zero = Zero
282 // Normal * Inf = Inf
283 match (self.category(), rhs.category()) {
284 (Category::NaN, _) => Status::OK.and(self),
285
286 (_, Category::NaN) => Status::OK.and(rhs),
287
288 (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN),
289
290 (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self),
291
292 (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs),
293
294 (Category::Normal, Category::Normal) => {
295 let mut status = Status::OK;
296 let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1);
297 // t = a * c
298 let mut t = a;
299 t = unpack!(status|=, t.mul_r(c, round));
300 if !t.is_finite_non_zero() {
301 return status.and(DoubleFloat(t, F::ZERO));
302 }
303
304 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
305 let mut tau = a;
306 tau = unpack!(status|=, tau.mul_add_r(c, -t, round));
307 // v = a * d
308 let mut v = a;
309 v = unpack!(status|=, v.mul_r(d, round));
310 // w = b * c
311 let mut w = b;
312 w = unpack!(status|=, w.mul_r(c, round));
313 v = unpack!(status|=, v.add_r(w, round));
314 // tau += v + w
315 tau = unpack!(status|=, tau.add_r(v, round));
316 // u = t + tau
317 let mut u = t;
318 u = unpack!(status|=, u.add_r(tau, round));
319
320 self.0 = u;
321 if !u.is_finite() {
322 self.1 = F::ZERO;
323 } else {
324 // self.1 = (t - u) + tau
325 t = unpack!(status|=, t.sub_r(u, round));
326 t = unpack!(status|=, t.add_r(tau, round));
327 self.1 = t;
328 }
329 status.and(self)
330 }
331 }
332 }
333
334 fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> {
335 Fallback::from(self)
336 .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round)
337 .map(Self::from)
338 }
339
340 fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> {
341 Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from)
342 }
343
344 fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> {
345 Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from)
346 }
347
348 fn c_fmod(self, rhs: Self) -> StatusAnd<Self> {
349 Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from)
350 }
351
352 fn round_to_integral(self, round: Round) -> StatusAnd<Self> {
353 Fallback::from(self).round_to_integral(round).map(Self::from)
354 }
355
356 fn next_up(self) -> StatusAnd<Self> {
357 Fallback::from(self).next_up().map(Self::from)
358 }
359
360 fn from_bits(input: u128) -> Self {
361 let (a, b) = (input, input >> F::BITS);
362 DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1)))
363 }
364
365 fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> {
366 Fallback::from_u128_r(input, round).map(Self::from)
367 }
368
369 fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> {
370 Fallback::from_str_r(s, round).map(|r| r.map(Self::from))
371 }
372
373 fn to_bits(self) -> u128 {
374 self.0.to_bits() | (self.1.to_bits() << F::BITS)
375 }
376
377 fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> {
378 Fallback::from(self).to_u128_r(width, round, is_exact)
379 }
380
381 fn cmp_abs_normal(self, rhs: Self) -> Ordering {
382 self.0.cmp_abs_normal(rhs.0).then_with(|| {
383 let result = self.1.cmp_abs_normal(rhs.1);
384 if result != Ordering::Equal {
385 let against = self.0.is_negative() ^ self.1.is_negative();
386 let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative();
387 (!against)
388 .cmp(&!rhs_against)
389 .then_with(|| if against { result.reverse() } else { result })
390 } else {
391 result
392 }
393 })
394 }
395
396 fn bitwise_eq(self, rhs: Self) -> bool {
397 self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1)
398 }
399
400 fn is_negative(self) -> bool {
401 self.0.is_negative()
402 }
403
404 fn is_denormal(self) -> bool {
405 self.category() == Category::Normal
406 && (self.0.is_denormal() || self.0.is_denormal() ||
407 // (double)(Hi + Lo) == Hi defines a normal number.
408 self.0 != (self.0 + self.1).value)
409 }
410
411 fn is_signaling(self) -> bool {
412 self.0.is_signaling()
413 }
414
415 fn category(self) -> Category {
416 self.0.category()
417 }
418
419 fn is_integer(self) -> bool {
420 self.0.is_integer() && self.1.is_integer()
421 }
422
423 fn get_exact_inverse(self) -> Option<Self> {
424 Fallback::from(self).get_exact_inverse().map(Self::from)
425 }
426
427 fn ilogb(self) -> ExpInt {
428 self.0.ilogb()
429 }
430
431 fn scalbn_r(self, exp: ExpInt, round: Round) -> Self {
432 DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round))
433 }
434
435 fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self {
436 let a = self.0.frexp_r(exp, round);
437 let mut b = self.1;
438 if self.category() == Category::Normal {
439 b = b.scalbn_r(-*exp, round);
440 }
441 DoubleFloat(a, b)
442 }
443}
444
445// HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat`
446// has private fields, and it's not worth it to make them public just for testing.
447#[test]
448fn is_integer() {
449 let double_from_f64: impl Fn(f64) -> IeeeFloat<…> = |f: f64| ieee::Double::from_bits(input:f.to_bits().into());
450 assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer());
451 assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer());
452 assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer());
453}
454