1 | use crate::ieee; |
2 | use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd}; |
3 | |
4 | use core::cmp::Ordering; |
5 | use core::fmt; |
6 | use core::ops::Neg; |
7 | |
8 | #[must_use ] |
9 | #[derive (Copy, Clone, PartialEq, PartialOrd, Debug)] |
10 | pub struct DoubleFloat<F>(F, F); |
11 | pub type DoubleDouble = DoubleFloat<ieee::Double>; |
12 | |
13 | // These are legacy semantics for the Fallback, inaccrurate implementation of |
14 | // IBM double-double, if the accurate DoubleDouble doesn't handle the |
15 | // operation. It's equivalent to having an IEEE number with consecutive 106 |
16 | // bits of mantissa and 11 bits of exponent. |
17 | // |
18 | // It's not equivalent to IBM double-double. For example, a legit IBM |
19 | // double-double, 1 + epsilon: |
20 | // |
21 | // 1 + epsilon = 1 + (1 >> 1076) |
22 | // |
23 | // is not representable by a consecutive 106 bits of mantissa. |
24 | // |
25 | // Currently, these semantics are used in the following way: |
26 | // |
27 | // DoubleDouble -> (Double, Double) -> |
28 | // DoubleDouble's Fallback -> IEEE operations |
29 | // |
30 | // FIXME: Implement all operations in DoubleDouble, and delete these |
31 | // semantics. |
32 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
33 | pub struct FallbackS<F>(F); |
34 | type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>; |
35 | impl<F: Float> ieee::Semantics for FallbackS<F> { |
36 | // Forbid any conversion to/from bits. |
37 | const BITS: usize = 0; |
38 | const EXP_BITS: usize = 0; |
39 | |
40 | const PRECISION: usize = F::PRECISION * 2; |
41 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
42 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; |
43 | } |
44 | |
45 | // Convert number to F. To avoid spurious underflows, we re- |
46 | // normalize against the F exponent range first, and only *then* |
47 | // truncate the mantissa. The result of that second conversion |
48 | // may be inexact, but should never underflow. |
49 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
50 | pub struct FallbackExtendedS<F>(F); |
51 | type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>; |
52 | impl<F: Float> ieee::Semantics for FallbackExtendedS<F> { |
53 | // Forbid any conversion to/from bits. |
54 | const BITS: usize = 0; |
55 | const EXP_BITS: usize = 0; |
56 | |
57 | const PRECISION: usize = Fallback::<F>::PRECISION; |
58 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
59 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt; |
60 | } |
61 | |
62 | impl<F: Float> From<Fallback<F>> for DoubleFloat<F> |
63 | where |
64 | F: FloatConvert<FallbackExtended<F>>, |
65 | FallbackExtended<F>: FloatConvert<F>, |
66 | { |
67 | fn from(x: Fallback<F>) -> Self { |
68 | let mut status; |
69 | let mut loses_info = false; |
70 | |
71 | let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info)); |
72 | assert_eq!((status, loses_info), (Status::OK, false)); |
73 | |
74 | let a = unpack!(status=, extended.convert(&mut loses_info)); |
75 | assert_eq!(status - Status::INEXACT, Status::OK); |
76 | |
77 | // If conversion was exact or resulted in a special case, we're done; |
78 | // just set the second double to zero. Otherwise, re-convert back to |
79 | // the extended format and compute the difference. This now should |
80 | // convert exactly to double. |
81 | let b = if a.is_finite_non_zero() && loses_info { |
82 | let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info)); |
83 | assert_eq!((status, loses_info), (Status::OK, false)); |
84 | let v = unpack!(status=, extended - u); |
85 | assert_eq!(status, Status::OK); |
86 | let v = unpack!(status=, v.convert(&mut loses_info)); |
87 | assert_eq!((status, loses_info), (Status::OK, false)); |
88 | v |
89 | } else { |
90 | F::ZERO |
91 | }; |
92 | |
93 | DoubleFloat(a, b) |
94 | } |
95 | } |
96 | |
97 | impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> { |
98 | fn from(DoubleFloat(a: F, b: F): DoubleFloat<F>) -> Self { |
99 | let mut status: Status; |
100 | let mut loses_info: bool = false; |
101 | |
102 | // Get the first F and convert to our format. |
103 | let a: IeeeFloat> = unpack!(status=, a.convert(&mut loses_info)); |
104 | assert_eq!((status, loses_info), (Status::OK, false)); |
105 | |
106 | // Unless we have a special case, add in second F. |
107 | if a.is_finite_non_zero() { |
108 | let b: IeeeFloat> = unpack!(status=, b.convert(&mut loses_info)); |
109 | assert_eq!((status, loses_info), (Status::OK, false)); |
110 | |
111 | (a + b).value |
112 | } else { |
113 | a |
114 | } |
115 | } |
116 | } |
117 | |
118 | float_common_impls!(DoubleFloat<F>); |
119 | |
120 | impl<F: Float> Neg for DoubleFloat<F> { |
121 | type Output = Self; |
122 | fn neg(self) -> Self { |
123 | if self.1.is_finite_non_zero() { |
124 | DoubleFloat(-self.0, -self.1) |
125 | } else { |
126 | DoubleFloat(-self.0, self.1) |
127 | } |
128 | } |
129 | } |
130 | |
131 | impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> { |
132 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
133 | fmt::Display::fmt(&Fallback::from(*self), f) |
134 | } |
135 | } |
136 | |
137 | impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F> |
138 | where |
139 | Self: From<Fallback<F>>, |
140 | { |
141 | const BITS: usize = F::BITS * 2; |
142 | const PRECISION: usize = Fallback::<F>::PRECISION; |
143 | const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP; |
144 | const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP; |
145 | |
146 | const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO); |
147 | |
148 | const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO); |
149 | |
150 | // FIXME(eddyb) remove when qnan becomes const fn. |
151 | const NAN: Self = DoubleFloat(F::NAN, F::ZERO); |
152 | |
153 | fn qnan(payload: Option<u128>) -> Self { |
154 | DoubleFloat(F::qnan(payload), F::ZERO) |
155 | } |
156 | |
157 | fn snan(payload: Option<u128>) -> Self { |
158 | DoubleFloat(F::snan(payload), F::ZERO) |
159 | } |
160 | |
161 | fn largest() -> Self { |
162 | let status; |
163 | let mut r = DoubleFloat(F::largest(), F::largest()); |
164 | r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1)); |
165 | r.1 = unpack!(status=, r.1.next_down()); |
166 | assert_eq!(status, Status::OK); |
167 | r |
168 | } |
169 | |
170 | const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO); |
171 | |
172 | fn smallest_normalized() -> Self { |
173 | DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO) |
174 | } |
175 | |
176 | // Implement addition, subtraction, multiplication and division based on: |
177 | // "Software for Doubled-Precision Floating-Point Computations", |
178 | // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. |
179 | |
180 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
181 | match (self.category(), rhs.category()) { |
182 | (Category::Infinity, Category::Infinity) => { |
183 | if self.is_negative() != rhs.is_negative() { |
184 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) |
185 | } else { |
186 | Status::OK.and(self) |
187 | } |
188 | } |
189 | |
190 | (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self), |
191 | |
192 | (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs), |
193 | |
194 | (Category::Normal, Category::Normal) => { |
195 | let mut status = Status::OK; |
196 | let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1); |
197 | let mut z = a; |
198 | z = unpack!(status|=, z.add_r(c, round)); |
199 | if !z.is_finite() { |
200 | if !z.is_infinite() { |
201 | return status.and(DoubleFloat(z, F::ZERO)); |
202 | } |
203 | status = Status::OK; |
204 | let a_cmp_c = a.cmp_abs_normal(c); |
205 | z = cc; |
206 | z = unpack!(status|=, z.add_r(aa, round)); |
207 | if a_cmp_c == Ordering::Greater { |
208 | // z = cc + aa + c + a; |
209 | z = unpack!(status|=, z.add_r(c, round)); |
210 | z = unpack!(status|=, z.add_r(a, round)); |
211 | } else { |
212 | // z = cc + aa + a + c; |
213 | z = unpack!(status|=, z.add_r(a, round)); |
214 | z = unpack!(status|=, z.add_r(c, round)); |
215 | } |
216 | if !z.is_finite() { |
217 | return status.and(DoubleFloat(z, F::ZERO)); |
218 | } |
219 | self.0 = z; |
220 | let mut zz = aa; |
221 | zz = unpack!(status|=, zz.add_r(cc, round)); |
222 | if a_cmp_c == Ordering::Greater { |
223 | // self.1 = a - z + c + zz; |
224 | self.1 = a; |
225 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
226 | self.1 = unpack!(status|=, self.1.add_r(c, round)); |
227 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
228 | } else { |
229 | // self.1 = c - z + a + zz; |
230 | self.1 = c; |
231 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
232 | self.1 = unpack!(status|=, self.1.add_r(a, round)); |
233 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
234 | } |
235 | } else { |
236 | // q = a - z; |
237 | let mut q = a; |
238 | q = unpack!(status|=, q.sub_r(z, round)); |
239 | |
240 | // zz = q + c + (a - (q + z)) + aa + cc; |
241 | // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. |
242 | let mut zz = q; |
243 | zz = unpack!(status|=, zz.add_r(c, round)); |
244 | q = unpack!(status|=, q.add_r(z, round)); |
245 | q = unpack!(status|=, q.sub_r(a, round)); |
246 | q = -q; |
247 | zz = unpack!(status|=, zz.add_r(q, round)); |
248 | zz = unpack!(status|=, zz.add_r(aa, round)); |
249 | zz = unpack!(status|=, zz.add_r(cc, round)); |
250 | if zz.is_zero() && !zz.is_negative() { |
251 | return Status::OK.and(DoubleFloat(z, F::ZERO)); |
252 | } |
253 | self.0 = z; |
254 | self.0 = unpack!(status|=, self.0.add_r(zz, round)); |
255 | if !self.0.is_finite() { |
256 | self.1 = F::ZERO; |
257 | return status.and(self); |
258 | } |
259 | self.1 = z; |
260 | self.1 = unpack!(status|=, self.1.sub_r(self.0, round)); |
261 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
262 | } |
263 | status.and(self) |
264 | } |
265 | } |
266 | } |
267 | |
268 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
269 | // Interesting observation: For special categories, finding the lowest |
270 | // common ancestor of the following layered graph gives the correct |
271 | // return category: |
272 | // |
273 | // NaN |
274 | // / \ |
275 | // Zero Inf |
276 | // \ / |
277 | // Normal |
278 | // |
279 | // e.g. NaN * NaN = NaN |
280 | // Zero * Inf = NaN |
281 | // Normal * Zero = Zero |
282 | // Normal * Inf = Inf |
283 | match (self.category(), rhs.category()) { |
284 | (Category::NaN, _) => Status::OK.and(self), |
285 | |
286 | (_, Category::NaN) => Status::OK.and(rhs), |
287 | |
288 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN), |
289 | |
290 | (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self), |
291 | |
292 | (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs), |
293 | |
294 | (Category::Normal, Category::Normal) => { |
295 | let mut status = Status::OK; |
296 | let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1); |
297 | // t = a * c |
298 | let mut t = a; |
299 | t = unpack!(status|=, t.mul_r(c, round)); |
300 | if !t.is_finite_non_zero() { |
301 | return status.and(DoubleFloat(t, F::ZERO)); |
302 | } |
303 | |
304 | // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). |
305 | let mut tau = a; |
306 | tau = unpack!(status|=, tau.mul_add_r(c, -t, round)); |
307 | // v = a * d |
308 | let mut v = a; |
309 | v = unpack!(status|=, v.mul_r(d, round)); |
310 | // w = b * c |
311 | let mut w = b; |
312 | w = unpack!(status|=, w.mul_r(c, round)); |
313 | v = unpack!(status|=, v.add_r(w, round)); |
314 | // tau += v + w |
315 | tau = unpack!(status|=, tau.add_r(v, round)); |
316 | // u = t + tau |
317 | let mut u = t; |
318 | u = unpack!(status|=, u.add_r(tau, round)); |
319 | |
320 | self.0 = u; |
321 | if !u.is_finite() { |
322 | self.1 = F::ZERO; |
323 | } else { |
324 | // self.1 = (t - u) + tau |
325 | t = unpack!(status|=, t.sub_r(u, round)); |
326 | t = unpack!(status|=, t.add_r(tau, round)); |
327 | self.1 = t; |
328 | } |
329 | status.and(self) |
330 | } |
331 | } |
332 | } |
333 | |
334 | fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { |
335 | Fallback::from(self) |
336 | .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round) |
337 | .map(Self::from) |
338 | } |
339 | |
340 | fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
341 | Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) |
342 | } |
343 | |
344 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { |
345 | Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from) |
346 | } |
347 | |
348 | fn c_fmod(self, rhs: Self) -> StatusAnd<Self> { |
349 | Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) |
350 | } |
351 | |
352 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { |
353 | Fallback::from(self).round_to_integral(round).map(Self::from) |
354 | } |
355 | |
356 | fn next_up(self) -> StatusAnd<Self> { |
357 | Fallback::from(self).next_up().map(Self::from) |
358 | } |
359 | |
360 | fn from_bits(input: u128) -> Self { |
361 | let (a, b) = (input, input >> F::BITS); |
362 | DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1))) |
363 | } |
364 | |
365 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { |
366 | Fallback::from_u128_r(input, round).map(Self::from) |
367 | } |
368 | |
369 | fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
370 | Fallback::from_str_r(s, round).map(|r| r.map(Self::from)) |
371 | } |
372 | |
373 | fn to_bits(self) -> u128 { |
374 | self.0.to_bits() | (self.1.to_bits() << F::BITS) |
375 | } |
376 | |
377 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { |
378 | Fallback::from(self).to_u128_r(width, round, is_exact) |
379 | } |
380 | |
381 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { |
382 | self.0.cmp_abs_normal(rhs.0).then_with(|| { |
383 | let result = self.1.cmp_abs_normal(rhs.1); |
384 | if result != Ordering::Equal { |
385 | let against = self.0.is_negative() ^ self.1.is_negative(); |
386 | let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative(); |
387 | (!against) |
388 | .cmp(&!rhs_against) |
389 | .then_with(|| if against { result.reverse() } else { result }) |
390 | } else { |
391 | result |
392 | } |
393 | }) |
394 | } |
395 | |
396 | fn bitwise_eq(self, rhs: Self) -> bool { |
397 | self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1) |
398 | } |
399 | |
400 | fn is_negative(self) -> bool { |
401 | self.0.is_negative() |
402 | } |
403 | |
404 | fn is_denormal(self) -> bool { |
405 | self.category() == Category::Normal |
406 | && (self.0.is_denormal() || self.0.is_denormal() || |
407 | // (double)(Hi + Lo) == Hi defines a normal number. |
408 | self.0 != (self.0 + self.1).value) |
409 | } |
410 | |
411 | fn is_signaling(self) -> bool { |
412 | self.0.is_signaling() |
413 | } |
414 | |
415 | fn category(self) -> Category { |
416 | self.0.category() |
417 | } |
418 | |
419 | fn is_integer(self) -> bool { |
420 | self.0.is_integer() && self.1.is_integer() |
421 | } |
422 | |
423 | fn get_exact_inverse(self) -> Option<Self> { |
424 | Fallback::from(self).get_exact_inverse().map(Self::from) |
425 | } |
426 | |
427 | fn ilogb(self) -> ExpInt { |
428 | self.0.ilogb() |
429 | } |
430 | |
431 | fn scalbn_r(self, exp: ExpInt, round: Round) -> Self { |
432 | DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round)) |
433 | } |
434 | |
435 | fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self { |
436 | let a = self.0.frexp_r(exp, round); |
437 | let mut b = self.1; |
438 | if self.category() == Category::Normal { |
439 | b = b.scalbn_r(-*exp, round); |
440 | } |
441 | DoubleFloat(a, b) |
442 | } |
443 | } |
444 | |
445 | // HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat` |
446 | // has private fields, and it's not worth it to make them public just for testing. |
447 | #[test ] |
448 | fn is_integer() { |
449 | let double_from_f64: impl Fn(f64) -> IeeeFloat<…> = |f: f64| ieee::Double::from_bits(input:f.to_bits().into()); |
450 | assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer()); |
451 | assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer()); |
452 | assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer()); |
453 | } |
454 | |