1 | //! Support for floating point types that use PowerPC semantics. |
---|---|
2 | |
3 | use crate::ieee; |
4 | use crate::{Category, ExpInt, Float, FloatConvert, ParseError, Round, Status, StatusAnd}; |
5 | |
6 | use core::cmp::Ordering; |
7 | use core::fmt; |
8 | use core::ops::Neg; |
9 | |
10 | /// A larger floating point number represented by two smaller floats. |
11 | #[must_use] |
12 | #[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] |
13 | pub struct DoubleFloat<F>(F, F); |
14 | |
15 | /// 128-bit floating point number comprised of two IEEE [`Double`](ieee::Double) values. |
16 | /// |
17 | /// This is the "IBM Extended Double" format, described at |
18 | /// <https://www.ibm.com/docs/en/aix/7.3?topic=sepl-128-bit-long-double-floating-point-data-type>. |
19 | pub type DoubleDouble = DoubleFloat<ieee::Double>; |
20 | |
21 | // These are legacy semantics for the Fallback, inaccrurate implementation of |
22 | // IBM double-double, if the accurate DoubleDouble doesn't handle the |
23 | // operation. It's equivalent to having an IEEE number with consecutive 106 |
24 | // bits of mantissa and 11 bits of exponent. |
25 | // |
26 | // It's not equivalent to IBM double-double. For example, a legit IBM |
27 | // double-double, 1 + epsilon: |
28 | // |
29 | // 1 + epsilon = 1 + (1 >> 1076) |
30 | // |
31 | // is not representable by a consecutive 106 bits of mantissa. |
32 | // |
33 | // Currently, these semantics are used in the following way: |
34 | // |
35 | // DoubleDouble -> (Double, Double) -> |
36 | // DoubleDouble's Fallback -> IEEE operations |
37 | // |
38 | // FIXME: Implement all operations in DoubleDouble, and delete these |
39 | // semantics. |
40 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
41 | pub struct FallbackS<F>(F); |
42 | type Fallback<F> = ieee::IeeeFloat<FallbackS<F>>; |
43 | impl<F: Float> ieee::Semantics for FallbackS<F> { |
44 | // Forbid any conversion to/from bits. |
45 | const BITS: usize = 0; |
46 | const EXP_BITS: usize = 0; |
47 | |
48 | const PRECISION: usize = F::PRECISION * 2; |
49 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
50 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt + F::PRECISION as ExpInt; |
51 | } |
52 | |
53 | // Convert number to F. To avoid spurious underflows, we re- |
54 | // normalize against the F exponent range first, and only *then* |
55 | // truncate the mantissa. The result of that second conversion |
56 | // may be inexact, but should never underflow. |
57 | // FIXME(eddyb) This shouldn't need to be `pub`, it's only used in bounds. |
58 | pub struct FallbackExtendedS<F>(F); |
59 | type FallbackExtended<F> = ieee::IeeeFloat<FallbackExtendedS<F>>; |
60 | impl<F: Float> ieee::Semantics for FallbackExtendedS<F> { |
61 | // Forbid any conversion to/from bits. |
62 | const BITS: usize = 0; |
63 | const EXP_BITS: usize = 0; |
64 | |
65 | const PRECISION: usize = Fallback::<F>::PRECISION; |
66 | const MAX_EXP: ExpInt = F::MAX_EXP as ExpInt; |
67 | const MIN_EXP: ExpInt = F::MIN_EXP as ExpInt; |
68 | } |
69 | |
70 | impl<F: Float> From<Fallback<F>> for DoubleFloat<F> |
71 | where |
72 | F: FloatConvert<FallbackExtended<F>>, |
73 | FallbackExtended<F>: FloatConvert<F>, |
74 | { |
75 | fn from(x: Fallback<F>) -> Self { |
76 | let mut status; |
77 | let mut loses_info = false; |
78 | |
79 | let extended: FallbackExtended<F> = unpack!(status=, x.convert(&mut loses_info)); |
80 | assert_eq!((status, loses_info), (Status::OK, false)); |
81 | |
82 | let a = unpack!(status=, extended.convert(&mut loses_info)); |
83 | assert_eq!(status - Status::INEXACT, Status::OK); |
84 | |
85 | // If conversion was exact or resulted in a special case, we're done; |
86 | // just set the second double to zero. Otherwise, re-convert back to |
87 | // the extended format and compute the difference. This now should |
88 | // convert exactly to double. |
89 | let b = if a.is_finite_non_zero() && loses_info { |
90 | let u: FallbackExtended<F> = unpack!(status=, a.convert(&mut loses_info)); |
91 | assert_eq!((status, loses_info), (Status::OK, false)); |
92 | let v = unpack!(status=, extended - u); |
93 | assert_eq!(status, Status::OK); |
94 | let v = unpack!(status=, v.convert(&mut loses_info)); |
95 | assert_eq!((status, loses_info), (Status::OK, false)); |
96 | v |
97 | } else { |
98 | F::ZERO |
99 | }; |
100 | |
101 | DoubleFloat(a, b) |
102 | } |
103 | } |
104 | |
105 | impl<F: FloatConvert<Self>> From<DoubleFloat<F>> for Fallback<F> { |
106 | fn from(DoubleFloat(a: F, b: F): DoubleFloat<F>) -> Self { |
107 | let mut status: Status; |
108 | let mut loses_info: bool = false; |
109 | |
110 | // Get the first F and convert to our format. |
111 | let a: IeeeFloat |
112 | assert_eq!((status, loses_info), (Status::OK, false)); |
113 | |
114 | // Unless we have a special case, add in second F. |
115 | if a.is_finite_non_zero() { |
116 | let b: IeeeFloat |
117 | assert_eq!((status, loses_info), (Status::OK, false)); |
118 | |
119 | (a + b).value |
120 | } else { |
121 | a |
122 | } |
123 | } |
124 | } |
125 | |
126 | float_common_impls!(DoubleFloat<F>); |
127 | |
128 | impl<F: Float> Neg for DoubleFloat<F> { |
129 | type Output = Self; |
130 | fn neg(self) -> Self { |
131 | if self.1.is_finite_non_zero() { |
132 | DoubleFloat(-self.0, -self.1) |
133 | } else { |
134 | DoubleFloat(-self.0, self.1) |
135 | } |
136 | } |
137 | } |
138 | |
139 | impl<F: FloatConvert<Fallback<F>>> fmt::Display for DoubleFloat<F> { |
140 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
141 | fmt::Display::fmt(&Fallback::from(*self), f) |
142 | } |
143 | } |
144 | |
145 | impl<F: FloatConvert<Fallback<F>>> Float for DoubleFloat<F> |
146 | where |
147 | Self: From<Fallback<F>>, |
148 | { |
149 | const BITS: usize = F::BITS * 2; |
150 | const PRECISION: usize = Fallback::<F>::PRECISION; |
151 | const MAX_EXP: ExpInt = Fallback::<F>::MAX_EXP; |
152 | const MIN_EXP: ExpInt = Fallback::<F>::MIN_EXP; |
153 | |
154 | const ZERO: Self = DoubleFloat(F::ZERO, F::ZERO); |
155 | |
156 | const INFINITY: Self = DoubleFloat(F::INFINITY, F::ZERO); |
157 | |
158 | // FIXME(eddyb) remove when qnan becomes const fn. |
159 | const NAN: Self = DoubleFloat(F::NAN, F::ZERO); |
160 | |
161 | fn qnan(payload: Option<u128>) -> Self { |
162 | DoubleFloat(F::qnan(payload), F::ZERO) |
163 | } |
164 | |
165 | fn snan(payload: Option<u128>) -> Self { |
166 | DoubleFloat(F::snan(payload), F::ZERO) |
167 | } |
168 | |
169 | fn largest() -> Self { |
170 | let status; |
171 | let mut r = DoubleFloat(F::largest(), F::largest()); |
172 | r.1 = r.1.scalbn(-(F::PRECISION as ExpInt + 1)); |
173 | r.1 = unpack!(status=, r.1.next_down()); |
174 | assert_eq!(status, Status::OK); |
175 | r |
176 | } |
177 | |
178 | const SMALLEST: Self = DoubleFloat(F::SMALLEST, F::ZERO); |
179 | |
180 | fn smallest_normalized() -> Self { |
181 | DoubleFloat(F::smallest_normalized().scalbn(F::PRECISION as ExpInt), F::ZERO) |
182 | } |
183 | |
184 | // Implement addition, subtraction, multiplication and division based on: |
185 | // "Software for Doubled-Precision Floating-Point Computations", |
186 | // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283. |
187 | |
188 | fn add_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
189 | match (self.category(), rhs.category()) { |
190 | (Category::Infinity, Category::Infinity) => { |
191 | if self.is_negative() != rhs.is_negative() { |
192 | Status::INVALID_OP.and(Self::NAN.copy_sign(self)) |
193 | } else { |
194 | Status::OK.and(self) |
195 | } |
196 | } |
197 | |
198 | (_, Category::Zero) | (Category::NaN, _) | (Category::Infinity, Category::Normal) => Status::OK.and(self), |
199 | |
200 | (Category::Zero, _) | (_, Category::NaN) | (_, Category::Infinity) => Status::OK.and(rhs), |
201 | |
202 | (Category::Normal, Category::Normal) => { |
203 | let mut status = Status::OK; |
204 | let (a, aa, c, cc) = (self.0, self.1, rhs.0, rhs.1); |
205 | let mut z = a; |
206 | z = unpack!(status|=, z.add_r(c, round)); |
207 | if !z.is_finite() { |
208 | if !z.is_infinite() { |
209 | return status.and(DoubleFloat(z, F::ZERO)); |
210 | } |
211 | status = Status::OK; |
212 | let a_cmp_c = a.cmp_abs_normal(c); |
213 | z = cc; |
214 | z = unpack!(status|=, z.add_r(aa, round)); |
215 | if a_cmp_c == Ordering::Greater { |
216 | // z = cc + aa + c + a; |
217 | z = unpack!(status|=, z.add_r(c, round)); |
218 | z = unpack!(status|=, z.add_r(a, round)); |
219 | } else { |
220 | // z = cc + aa + a + c; |
221 | z = unpack!(status|=, z.add_r(a, round)); |
222 | z = unpack!(status|=, z.add_r(c, round)); |
223 | } |
224 | if !z.is_finite() { |
225 | return status.and(DoubleFloat(z, F::ZERO)); |
226 | } |
227 | self.0 = z; |
228 | let mut zz = aa; |
229 | zz = unpack!(status|=, zz.add_r(cc, round)); |
230 | if a_cmp_c == Ordering::Greater { |
231 | // self.1 = a - z + c + zz; |
232 | self.1 = a; |
233 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
234 | self.1 = unpack!(status|=, self.1.add_r(c, round)); |
235 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
236 | } else { |
237 | // self.1 = c - z + a + zz; |
238 | self.1 = c; |
239 | self.1 = unpack!(status|=, self.1.sub_r(z, round)); |
240 | self.1 = unpack!(status|=, self.1.add_r(a, round)); |
241 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
242 | } |
243 | } else { |
244 | // q = a - z; |
245 | let mut q = a; |
246 | q = unpack!(status|=, q.sub_r(z, round)); |
247 | |
248 | // zz = q + c + (a - (q + z)) + aa + cc; |
249 | // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies. |
250 | let mut zz = q; |
251 | zz = unpack!(status|=, zz.add_r(c, round)); |
252 | q = unpack!(status|=, q.add_r(z, round)); |
253 | q = unpack!(status|=, q.sub_r(a, round)); |
254 | q = -q; |
255 | zz = unpack!(status|=, zz.add_r(q, round)); |
256 | zz = unpack!(status|=, zz.add_r(aa, round)); |
257 | zz = unpack!(status|=, zz.add_r(cc, round)); |
258 | if zz.is_zero() && !zz.is_negative() { |
259 | return Status::OK.and(DoubleFloat(z, F::ZERO)); |
260 | } |
261 | self.0 = z; |
262 | self.0 = unpack!(status|=, self.0.add_r(zz, round)); |
263 | if !self.0.is_finite() { |
264 | self.1 = F::ZERO; |
265 | return status.and(self); |
266 | } |
267 | self.1 = z; |
268 | self.1 = unpack!(status|=, self.1.sub_r(self.0, round)); |
269 | self.1 = unpack!(status|=, self.1.add_r(zz, round)); |
270 | } |
271 | status.and(self) |
272 | } |
273 | } |
274 | } |
275 | |
276 | fn mul_r(mut self, rhs: Self, round: Round) -> StatusAnd<Self> { |
277 | // Interesting observation: For special categories, finding the lowest |
278 | // common ancestor of the following layered graph gives the correct |
279 | // return category: |
280 | // |
281 | // NaN |
282 | // / \ |
283 | // Zero Inf |
284 | // \ / |
285 | // Normal |
286 | // |
287 | // e.g. NaN * NaN = NaN |
288 | // Zero * Inf = NaN |
289 | // Normal * Zero = Zero |
290 | // Normal * Inf = Inf |
291 | match (self.category(), rhs.category()) { |
292 | (Category::NaN, _) => Status::OK.and(self), |
293 | |
294 | (_, Category::NaN) => Status::OK.and(rhs), |
295 | |
296 | (Category::Zero, Category::Infinity) | (Category::Infinity, Category::Zero) => Status::OK.and(Self::NAN), |
297 | |
298 | (Category::Zero, _) | (Category::Infinity, _) => Status::OK.and(self), |
299 | |
300 | (_, Category::Zero) | (_, Category::Infinity) => Status::OK.and(rhs), |
301 | |
302 | (Category::Normal, Category::Normal) => { |
303 | let mut status = Status::OK; |
304 | let (a, b, c, d) = (self.0, self.1, rhs.0, rhs.1); |
305 | // t = a * c |
306 | let mut t = a; |
307 | t = unpack!(status|=, t.mul_r(c, round)); |
308 | if !t.is_finite_non_zero() { |
309 | return status.and(DoubleFloat(t, F::ZERO)); |
310 | } |
311 | |
312 | // tau = fmsub(a, c, t), that is -fmadd(-a, c, t). |
313 | let mut tau = a; |
314 | tau = unpack!(status|=, tau.mul_add_r(c, -t, round)); |
315 | // v = a * d |
316 | let mut v = a; |
317 | v = unpack!(status|=, v.mul_r(d, round)); |
318 | // w = b * c |
319 | let mut w = b; |
320 | w = unpack!(status|=, w.mul_r(c, round)); |
321 | v = unpack!(status|=, v.add_r(w, round)); |
322 | // tau += v + w |
323 | tau = unpack!(status|=, tau.add_r(v, round)); |
324 | // u = t + tau |
325 | let mut u = t; |
326 | u = unpack!(status|=, u.add_r(tau, round)); |
327 | |
328 | self.0 = u; |
329 | if !u.is_finite() { |
330 | self.1 = F::ZERO; |
331 | } else { |
332 | // self.1 = (t - u) + tau |
333 | t = unpack!(status|=, t.sub_r(u, round)); |
334 | t = unpack!(status|=, t.add_r(tau, round)); |
335 | self.1 = t; |
336 | } |
337 | status.and(self) |
338 | } |
339 | } |
340 | } |
341 | |
342 | fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self> { |
343 | Fallback::from(self) |
344 | .mul_add_r(Fallback::from(multiplicand), Fallback::from(addend), round) |
345 | .map(Self::from) |
346 | } |
347 | |
348 | fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
349 | Fallback::from(self).div_r(Fallback::from(rhs), round).map(Self::from) |
350 | } |
351 | |
352 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self> { |
353 | Fallback::from(self).ieee_rem(Fallback::from(rhs)).map(Self::from) |
354 | } |
355 | |
356 | fn c_fmod(self, rhs: Self) -> StatusAnd<Self> { |
357 | Fallback::from(self).c_fmod(Fallback::from(rhs)).map(Self::from) |
358 | } |
359 | |
360 | fn round_to_integral(self, round: Round) -> StatusAnd<Self> { |
361 | Fallback::from(self).round_to_integral(round).map(Self::from) |
362 | } |
363 | |
364 | fn next_up(self) -> StatusAnd<Self> { |
365 | Fallback::from(self).next_up().map(Self::from) |
366 | } |
367 | |
368 | fn from_bits(input: u128) -> Self { |
369 | let (a, b) = (input, input >> F::BITS); |
370 | DoubleFloat(F::from_bits(a & ((1 << F::BITS) - 1)), F::from_bits(b & ((1 << F::BITS) - 1))) |
371 | } |
372 | |
373 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self> { |
374 | Fallback::from_u128_r(input, round).map(Self::from) |
375 | } |
376 | |
377 | fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError> { |
378 | Fallback::from_str_r(s, round).map(|r| r.map(Self::from)) |
379 | } |
380 | |
381 | fn to_bits(self) -> u128 { |
382 | self.0.to_bits() | (self.1.to_bits() << F::BITS) |
383 | } |
384 | |
385 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128> { |
386 | Fallback::from(self).to_u128_r(width, round, is_exact) |
387 | } |
388 | |
389 | fn cmp_abs_normal(self, rhs: Self) -> Ordering { |
390 | self.0.cmp_abs_normal(rhs.0).then_with(|| { |
391 | let result = self.1.cmp_abs_normal(rhs.1); |
392 | if result != Ordering::Equal { |
393 | let against = self.0.is_negative() ^ self.1.is_negative(); |
394 | let rhs_against = rhs.0.is_negative() ^ rhs.1.is_negative(); |
395 | (!against) |
396 | .cmp(&!rhs_against) |
397 | .then_with(|| if against { result.reverse() } else { result }) |
398 | } else { |
399 | result |
400 | } |
401 | }) |
402 | } |
403 | |
404 | fn bitwise_eq(self, rhs: Self) -> bool { |
405 | self.0.bitwise_eq(rhs.0) && self.1.bitwise_eq(rhs.1) |
406 | } |
407 | |
408 | fn is_negative(self) -> bool { |
409 | self.0.is_negative() |
410 | } |
411 | |
412 | fn is_denormal(self) -> bool { |
413 | self.category() == Category::Normal |
414 | && (self.0.is_denormal() || self.0.is_denormal() || |
415 | // (double)(Hi + Lo) == Hi defines a normal number. |
416 | self.0 != (self.0 + self.1).value) |
417 | } |
418 | |
419 | fn is_signaling(self) -> bool { |
420 | self.0.is_signaling() |
421 | } |
422 | |
423 | fn category(self) -> Category { |
424 | self.0.category() |
425 | } |
426 | |
427 | fn is_integer(self) -> bool { |
428 | self.0.is_integer() && self.1.is_integer() |
429 | } |
430 | |
431 | fn get_exact_inverse(self) -> Option<Self> { |
432 | Fallback::from(self).get_exact_inverse().map(Self::from) |
433 | } |
434 | |
435 | fn ilogb(self) -> ExpInt { |
436 | self.0.ilogb() |
437 | } |
438 | |
439 | fn scalbn_r(self, exp: ExpInt, round: Round) -> Self { |
440 | DoubleFloat(self.0.scalbn_r(exp, round), self.1.scalbn_r(exp, round)) |
441 | } |
442 | |
443 | fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self { |
444 | let a = self.0.frexp_r(exp, round); |
445 | let mut b = self.1; |
446 | if self.category() == Category::Normal { |
447 | b = b.scalbn_r(-*exp, round); |
448 | } |
449 | DoubleFloat(a, b) |
450 | } |
451 | } |
452 | |
453 | // HACK(eddyb) this is here instead of in `tests/ppc.rs` because `DoubleFloat` |
454 | // has private fields, and it's not worth it to make them public just for testing. |
455 | #[test] |
456 | fn is_integer() { |
457 | let double_from_f64 = |f: f64| ieee::Double::from_bits(f.to_bits().into()); |
458 | assert!(DoubleFloat(double_from_f64(-0.0), double_from_f64(-0.0)).is_integer()); |
459 | assert!(!DoubleFloat(double_from_f64(3.14159), double_from_f64(-0.0)).is_integer()); |
460 | assert!(!DoubleFloat(double_from_f64(-0.0), double_from_f64(3.14159)).is_integer()); |
461 | } |
462 |
Definitions
- DoubleFloat
- DoubleDouble
- FallbackS
- Fallback
- FallbackExtendedS
- FallbackExtended
- from
- from
- Output
- neg
- fmt
- qnan
- snan
- largest
- smallest_normalized
- add_r
- mul_r
- mul_add_r
- div_r
- ieee_rem
- c_fmod
- round_to_integral
- next_up
- from_bits
- from_u128_r
- from_str_r
- to_bits
- to_u128_r
- cmp_abs_normal
- bitwise_eq
- is_negative
- is_denormal
- is_signaling
- category
- is_integer
- get_exact_inverse
- ilogb
- scalbn_r
Learn Rust with the experts
Find out more