1 | //! Port of LLVM's APFloat software floating-point implementation from the |
---|---|
2 | //! following C++ sources (please update commit hash when backporting): |
3 | //! <https://github.com/llvm/llvm-project/commit/462a31f5a5abb905869ea93cc49b096079b11aa4> |
4 | //! * `llvm/include/llvm/ADT/APFloat.h` -> `Float` and `FloatConvert` traits |
5 | //! * `llvm/lib/Support/APFloat.cpp` -> `ieee` and `ppc` modules |
6 | //! * `llvm/unittests/ADT/APFloatTest.cpp` -> `tests` directory |
7 | //! |
8 | //! The port contains no unsafe code, global state, or side-effects in general, |
9 | //! and the only allocations are in the conversion to/from decimal strings. |
10 | //! |
11 | //! Most of the API and the testcases are intact in some form or another, |
12 | //! with some ergonomic changes, such as idiomatic short names, returning |
13 | //! new values instead of mutating the receiver, and having separate method |
14 | //! variants that take a non-default rounding mode (with the suffix `_r`). |
15 | //! Comments have been preserved where possible, only slightly adapted. |
16 | //! |
17 | //! Instead of keeping a pointer to a configuration struct and inspecting it |
18 | //! dynamically on every operation, types (e.g. `ieee::Double`), traits |
19 | //! (e.g. `ieee::Semantics`) and associated constants are employed for |
20 | //! increased type safety and performance. |
21 | //! |
22 | //! On-heap bigints are replaced everywhere (except in decimal conversion), |
23 | //! with short arrays of `type Limb = u128` elements (instead of `u64`), |
24 | //! This allows fitting the largest supported significands in one integer |
25 | //! (`ieee::Quad` and `ppc::Fallback` use slightly less than 128 bits). |
26 | //! All of the functions in the `ieee::sig` module operate on slices. |
27 | //! |
28 | //! # Note |
29 | //! |
30 | //! This API is completely unstable and subject to change. |
31 | |
32 | #![no_std] |
33 | #![deny(warnings)] |
34 | #![forbid(unsafe_code)] |
35 | |
36 | #[macro_use] |
37 | extern crate bitflags; |
38 | |
39 | extern crate alloc; |
40 | |
41 | use core::cmp::Ordering; |
42 | use core::fmt; |
43 | use core::ops::{Add, Div, Mul, Neg, Rem, Sub}; |
44 | use core::ops::{AddAssign, DivAssign, MulAssign, RemAssign, SubAssign}; |
45 | use core::str::FromStr; |
46 | |
47 | bitflags! { |
48 | /// IEEE-754R 7: Default exception handling. |
49 | /// |
50 | /// UNDERFLOW or OVERFLOW are always returned or-ed with INEXACT. |
51 | /// |
52 | /// APFloat models this behavior specified by IEEE-754: |
53 | /// "For operations producing results in floating-point format, the default |
54 | /// result of an operation that signals the invalid operation exception |
55 | /// shall be a quiet NaN." |
56 | #[must_use] |
57 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] |
58 | pub struct Status: u8 { |
59 | const OK = 0x00; |
60 | const INVALID_OP = 0x01; |
61 | const DIV_BY_ZERO = 0x02; |
62 | const OVERFLOW = 0x04; |
63 | const UNDERFLOW = 0x08; |
64 | const INEXACT = 0x10; |
65 | } |
66 | } |
67 | |
68 | /// The result of a computation consisting of the output value and the exceptions, if any. |
69 | #[must_use] |
70 | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] |
71 | pub struct StatusAnd<T> { |
72 | pub status: Status, |
73 | pub value: T, |
74 | } |
75 | |
76 | impl Status { |
77 | /// Add a value to this status to create a [`StatusAnd`]. |
78 | pub fn and<T>(self, value: T) -> StatusAnd<T> { |
79 | StatusAnd { status: self, value } |
80 | } |
81 | } |
82 | |
83 | impl<T> StatusAnd<T> { |
84 | /// Keep the existing status but apply a transformation to `value`. |
85 | pub fn map<F: FnOnce(T) -> U, U>(self, f: F) -> StatusAnd<U> { |
86 | StatusAnd { |
87 | status: self.status, |
88 | value: f(self.value), |
89 | } |
90 | } |
91 | } |
92 | |
93 | impl<T: core::fmt::Debug> StatusAnd<T> { |
94 | /// Extract the inner value if there were no errors. If there were errors, panic. |
95 | pub fn unwrap(self) -> T { |
96 | assert_eq!(self.status, Status::OK, "called `StatusAnd::unwrap()` on an error value. Value:{:?} ", self.value); |
97 | self.value |
98 | } |
99 | } |
100 | |
101 | #[macro_export] |
102 | macro_rules! unpack { |
103 | ($status:ident|=, $e:expr) => { |
104 | match $e { |
105 | $crate::StatusAnd { status, value } => { |
106 | $status |= status; |
107 | value |
108 | } |
109 | } |
110 | }; |
111 | ($status:ident=, $e:expr) => { |
112 | match $e { |
113 | $crate::StatusAnd { status, value } => { |
114 | $status = status; |
115 | value |
116 | } |
117 | } |
118 | }; |
119 | } |
120 | |
121 | /// Category of internally-represented number. |
122 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] |
123 | pub enum Category { |
124 | Infinity, |
125 | NaN, |
126 | Normal, |
127 | Zero, |
128 | } |
129 | |
130 | /// IEEE-754R 4.3: Rounding-direction attributes. |
131 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] |
132 | pub enum Round { |
133 | NearestTiesToEven, |
134 | TowardPositive, |
135 | TowardNegative, |
136 | TowardZero, |
137 | NearestTiesToAway, |
138 | } |
139 | |
140 | impl Neg for Round { |
141 | type Output = Round; |
142 | #[inline] |
143 | fn neg(self) -> Round { |
144 | match self { |
145 | Round::TowardPositive => Round::TowardNegative, |
146 | Round::TowardNegative => Round::TowardPositive, |
147 | Round::NearestTiesToEven | Round::TowardZero | Round::NearestTiesToAway => self, |
148 | } |
149 | } |
150 | } |
151 | |
152 | /// A signed type to represent a floating point number's unbiased exponent. |
153 | pub type ExpInt = i32; |
154 | |
155 | // \c ilogb error results. |
156 | pub const IEK_INF: ExpInt = ExpInt::max_value(); |
157 | pub const IEK_NAN: ExpInt = ExpInt::min_value(); |
158 | pub const IEK_ZERO: ExpInt = ExpInt::min_value() + 1; |
159 | |
160 | /// An error which can occur when parsing a floating point number from a string. |
161 | #[derive(Copy, Clone, PartialEq, Eq, Debug)] |
162 | pub struct ParseError(pub &'static str); |
163 | |
164 | /// A self-contained host- and target-independent arbitrary-precision |
165 | /// floating-point software implementation. |
166 | /// |
167 | /// `apfloat` uses significand bignum integer arithmetic as provided by functions |
168 | /// in the `ieee::sig`. |
169 | /// |
170 | /// Written for clarity rather than speed, in particular with a view to use in |
171 | /// the front-end of a cross compiler so that target arithmetic can be correctly |
172 | /// performed on the host. Performance should nonetheless be reasonable, |
173 | /// particularly for its intended use. It may be useful as a base |
174 | /// implementation for a run-time library during development of a faster |
175 | /// target-specific one. |
176 | /// |
177 | /// All 5 rounding modes in the IEEE-754R draft are handled correctly for all |
178 | /// implemented operations. Currently implemented operations are add, subtract, |
179 | /// multiply, divide, fused-multiply-add, conversion-to-float, |
180 | /// conversion-to-integer and conversion-from-integer. New rounding modes |
181 | /// (e.g. away from zero) can be added with three or four lines of code. |
182 | /// |
183 | /// Four formats are built-in: IEEE single precision, double precision, |
184 | /// quadruple precision, and x87 80-bit extended double (when operating with |
185 | /// full extended precision). Adding a new format that obeys IEEE semantics |
186 | /// only requires adding two lines of code: a declaration and definition of the |
187 | /// format. |
188 | /// |
189 | /// All operations return the status of that operation as an exception bit-mask, |
190 | /// so multiple operations can be done consecutively with their results or-ed |
191 | /// together. The returned status can be useful for compiler diagnostics; e.g., |
192 | /// inexact, underflow and overflow can be easily diagnosed on constant folding, |
193 | /// and compiler optimizers can determine what exceptions would be raised by |
194 | /// folding operations and optimize, or perhaps not optimize, accordingly. |
195 | /// |
196 | /// At present, underflow tininess is detected after rounding; it should be |
197 | /// straight forward to add support for the before-rounding case too. |
198 | /// |
199 | /// The library reads hexadecimal floating point numbers as per C99, and |
200 | /// correctly rounds if necessary according to the specified rounding mode. |
201 | /// Syntax is required to have been validated by the caller. |
202 | /// |
203 | /// It also reads decimal floating point numbers and correctly rounds according |
204 | /// to the specified rounding mode. |
205 | /// |
206 | /// Non-zero finite numbers are represented internally as a sign bit, a 16-bit |
207 | /// signed exponent, and the significand as an array of integer limbs. After |
208 | /// normalization of a number of precision P the exponent is within the range of |
209 | /// the format, and if the number is not denormal the P-th bit of the |
210 | /// significand is set as an explicit integer bit. For denormals the most |
211 | /// significant bit is shifted right so that the exponent is maintained at the |
212 | /// format's minimum, so that the smallest denormal has just the least |
213 | /// significant bit of the significand set. The sign of zeros and infinities |
214 | /// is significant; the exponent and significand of such numbers is not stored, |
215 | /// but has a known implicit (deterministic) value: 0 for the significands, 0 |
216 | /// for zero exponent, all 1 bits for infinity exponent. For NaNs the sign and |
217 | /// significand are deterministic, although not really meaningful, and preserved |
218 | /// in non-conversion operations. The exponent is implicitly all 1 bits. |
219 | /// |
220 | /// `apfloat` does not provide any exception handling beyond default exception |
221 | /// handling. We represent Signaling NaNs via IEEE-754R 2008 6.2.1 should clause |
222 | /// by encoding Signaling NaNs with the first bit of its trailing significand as |
223 | /// 0. |
224 | /// |
225 | /// Future work |
226 | /// =========== |
227 | /// |
228 | /// Some features that may or may not be worth adding: |
229 | /// |
230 | /// Optional ability to detect underflow tininess before rounding. |
231 | /// |
232 | /// New formats: x87 in single and double precision mode (IEEE apart from |
233 | /// extended exponent range) (hard). |
234 | /// |
235 | /// New operations: sqrt, nexttoward. |
236 | /// |
237 | pub trait Float: |
238 | Copy |
239 | + Default |
240 | + FromStr<Err = ParseError> |
241 | + PartialOrd |
242 | + fmt::Display |
243 | + Neg<Output = Self> |
244 | + AddAssign |
245 | + SubAssign |
246 | + MulAssign |
247 | + DivAssign |
248 | + RemAssign |
249 | + Add<Output = StatusAnd<Self>> |
250 | + Sub<Output = StatusAnd<Self>> |
251 | + Mul<Output = StatusAnd<Self>> |
252 | + Div<Output = StatusAnd<Self>> |
253 | + Rem<Output = StatusAnd<Self>> |
254 | { |
255 | /// Total number of bits in the in-memory format. |
256 | const BITS: usize; |
257 | |
258 | /// Number of bits in the significand. This includes the integer bit. |
259 | const PRECISION: usize; |
260 | |
261 | /// The largest E such that 2^E is representable; this matches the |
262 | /// definition of IEEE 754. |
263 | const MAX_EXP: ExpInt; |
264 | |
265 | /// The smallest E such that 2^E is a normalized number; this |
266 | /// matches the definition of IEEE 754. |
267 | const MIN_EXP: ExpInt; |
268 | |
269 | /// Positive Zero. |
270 | const ZERO: Self; |
271 | |
272 | /// Positive Infinity. |
273 | const INFINITY: Self; |
274 | |
275 | /// NaN (Not a Number). |
276 | // FIXME(eddyb) provide a default when qnan becomes const fn. |
277 | const NAN: Self; |
278 | |
279 | /// Factory for QNaN values. |
280 | // FIXME(eddyb) should be const fn. |
281 | fn qnan(payload: Option<u128>) -> Self; |
282 | |
283 | /// Factory for SNaN values. |
284 | // FIXME(eddyb) should be const fn. |
285 | fn snan(payload: Option<u128>) -> Self; |
286 | |
287 | /// Largest finite number. |
288 | // FIXME(eddyb) should be const (but FloatPair::largest is nontrivial). |
289 | fn largest() -> Self; |
290 | |
291 | /// Smallest (by magnitude) finite number. |
292 | /// Might be denormalized, which implies a relative loss of precision. |
293 | const SMALLEST: Self; |
294 | |
295 | /// Smallest (by magnitude) normalized finite number. |
296 | // FIXME(eddyb) should be const (but FloatPair::smallest_normalized is nontrivial). |
297 | fn smallest_normalized() -> Self; |
298 | |
299 | // Arithmetic |
300 | |
301 | fn add_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; |
302 | fn sub_r(self, rhs: Self, round: Round) -> StatusAnd<Self> { |
303 | self.add_r(-rhs, round) |
304 | } |
305 | fn mul_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; |
306 | fn mul_add_r(self, multiplicand: Self, addend: Self, round: Round) -> StatusAnd<Self>; |
307 | fn mul_add(self, multiplicand: Self, addend: Self) -> StatusAnd<Self> { |
308 | self.mul_add_r(multiplicand, addend, Round::NearestTiesToEven) |
309 | } |
310 | fn div_r(self, rhs: Self, round: Round) -> StatusAnd<Self>; |
311 | /// IEEE remainder. |
312 | fn ieee_rem(self, rhs: Self) -> StatusAnd<Self>; |
313 | /// C fmod, or llvm frem. |
314 | fn c_fmod(self, rhs: Self) -> StatusAnd<Self>; |
315 | fn round_to_integral(self, round: Round) -> StatusAnd<Self>; |
316 | |
317 | /// IEEE-754R 2008 5.3.1: nextUp. |
318 | fn next_up(self) -> StatusAnd<Self>; |
319 | |
320 | /// IEEE-754R 2008 5.3.1: nextDown. |
321 | /// |
322 | /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with |
323 | /// appropriate sign switching before/after the computation. |
324 | fn next_down(self) -> StatusAnd<Self> { |
325 | (-self).next_up().map(|r| -r) |
326 | } |
327 | |
328 | fn abs(self) -> Self { |
329 | if self.is_negative() { |
330 | -self |
331 | } else { |
332 | self |
333 | } |
334 | } |
335 | fn copy_sign(self, rhs: Self) -> Self { |
336 | if self.is_negative() != rhs.is_negative() { |
337 | -self |
338 | } else { |
339 | self |
340 | } |
341 | } |
342 | |
343 | // Conversions |
344 | fn from_bits(input: u128) -> Self; |
345 | fn from_i128_r(input: i128, round: Round) -> StatusAnd<Self> { |
346 | if input < 0 { |
347 | Self::from_u128_r(input.wrapping_neg() as u128, -round).map(|r| -r) |
348 | } else { |
349 | Self::from_u128_r(input as u128, round) |
350 | } |
351 | } |
352 | fn from_i128(input: i128) -> StatusAnd<Self> { |
353 | Self::from_i128_r(input, Round::NearestTiesToEven) |
354 | } |
355 | fn from_u128_r(input: u128, round: Round) -> StatusAnd<Self>; |
356 | fn from_u128(input: u128) -> StatusAnd<Self> { |
357 | Self::from_u128_r(input, Round::NearestTiesToEven) |
358 | } |
359 | fn from_str_r(s: &str, round: Round) -> Result<StatusAnd<Self>, ParseError>; |
360 | fn to_bits(self) -> u128; |
361 | |
362 | /// Convert a floating point number to an integer according to the |
363 | /// rounding mode. In case of an invalid operation exception, |
364 | /// deterministic values are returned, namely zero for NaNs and the |
365 | /// minimal or maximal value respectively for underflow or overflow. |
366 | /// If the rounded value is in range but the floating point number is |
367 | /// not the exact integer, the C standard doesn't require an inexact |
368 | /// exception to be raised. IEEE-854 does require it so we do that. |
369 | /// |
370 | /// Note that for conversions to integer type the C standard requires |
371 | /// round-to-zero to always be used. |
372 | /// |
373 | /// The *is_exact output tells whether the result is exact, in the sense |
374 | /// that converting it back to the original floating point type produces |
375 | /// the original value. This is almost equivalent to result==Status::OK, |
376 | /// except for negative zeroes. |
377 | fn to_i128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<i128> { |
378 | let status; |
379 | if self.is_negative() { |
380 | if self.is_zero() { |
381 | // Negative zero can't be represented as an int. |
382 | *is_exact = false; |
383 | } |
384 | let r = unpack!(status=, (-self).to_u128_r(width, -round, is_exact)); |
385 | |
386 | // Check for values that don't fit in the signed integer. |
387 | if r > (1 << (width - 1)) { |
388 | // Return the most negative integer for the given width. |
389 | *is_exact = false; |
390 | Status::INVALID_OP.and(-1 << (width - 1)) |
391 | } else { |
392 | status.and(r.wrapping_neg() as i128) |
393 | } |
394 | } else { |
395 | // Positive case is simpler, can pretend it's a smaller unsigned |
396 | // integer, and `to_u128` will take care of all the edge cases. |
397 | self.to_u128_r(width - 1, round, is_exact).map(|r| r as i128) |
398 | } |
399 | } |
400 | fn to_i128(self, width: usize) -> StatusAnd<i128> { |
401 | self.to_i128_r(width, Round::TowardZero, &mut true) |
402 | } |
403 | fn to_u128_r(self, width: usize, round: Round, is_exact: &mut bool) -> StatusAnd<u128>; |
404 | fn to_u128(self, width: usize) -> StatusAnd<u128> { |
405 | self.to_u128_r(width, Round::TowardZero, &mut true) |
406 | } |
407 | |
408 | fn cmp_abs_normal(self, rhs: Self) -> Ordering; |
409 | |
410 | /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0). |
411 | fn bitwise_eq(self, rhs: Self) -> bool; |
412 | |
413 | // IEEE-754R 5.7.2 General operations. |
414 | |
415 | /// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if |
416 | /// both are not NaN. If either argument is a NaN, returns the other argument. |
417 | fn min(self, other: Self) -> Self { |
418 | if self.is_nan() { |
419 | other |
420 | } else if other.is_nan() { |
421 | self |
422 | } else if other < self { |
423 | other |
424 | } else { |
425 | self |
426 | } |
427 | } |
428 | |
429 | /// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if |
430 | /// both are not NaN. If either argument is a NaN, returns the other argument. |
431 | fn max(self, other: Self) -> Self { |
432 | if self.is_nan() { |
433 | other |
434 | } else if other.is_nan() { |
435 | self |
436 | } else if self < other { |
437 | other |
438 | } else { |
439 | self |
440 | } |
441 | } |
442 | |
443 | /// Implements IEEE 754-2018 minimum semantics. Returns the smaller of 2 |
444 | /// arguments, propagating NaNs and treating -0 as less than +0. |
445 | fn minimum(self, other: Self) -> Self { |
446 | if self.is_nan() { |
447 | self |
448 | } else if other.is_nan() { |
449 | other |
450 | } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() { |
451 | if self.is_negative() { |
452 | self |
453 | } else { |
454 | other |
455 | } |
456 | } else if other < self { |
457 | other |
458 | } else { |
459 | self |
460 | } |
461 | } |
462 | |
463 | /// Implements IEEE 754-2018 maximum semantics. Returns the larger of 2 |
464 | /// arguments, propagating NaNs and treating -0 as less than +0. |
465 | fn maximum(self, other: Self) -> Self { |
466 | if self.is_nan() { |
467 | self |
468 | } else if other.is_nan() { |
469 | other |
470 | } else if self.is_zero() && other.is_zero() && self.is_negative() != other.is_negative() { |
471 | if self.is_negative() { |
472 | other |
473 | } else { |
474 | self |
475 | } |
476 | } else if self < other { |
477 | other |
478 | } else { |
479 | self |
480 | } |
481 | } |
482 | |
483 | /// IEEE-754R isSignMinus: Returns true if and only if the current value is |
484 | /// negative. |
485 | /// |
486 | /// This applies to zeros and NaNs as well. |
487 | fn is_negative(self) -> bool; |
488 | |
489 | /// IEEE-754R isNormal: Returns true if and only if the current value is normal. |
490 | /// |
491 | /// This implies that the current value of the float is not zero, subnormal, |
492 | /// infinite, or NaN following the definition of normality from IEEE-754R. |
493 | fn is_normal(self) -> bool { |
494 | !self.is_denormal() && self.is_finite_non_zero() |
495 | } |
496 | |
497 | /// Returns true if and only if the current value is zero, subnormal, or |
498 | /// normal. |
499 | /// |
500 | /// This means that the value is not infinite or NaN. |
501 | fn is_finite(self) -> bool { |
502 | !self.is_nan() && !self.is_infinite() |
503 | } |
504 | |
505 | /// Returns true if and only if the float is plus or minus zero. |
506 | fn is_zero(self) -> bool { |
507 | self.category() == Category::Zero |
508 | } |
509 | |
510 | /// IEEE-754R isSubnormal(): Returns true if and only if the float is a |
511 | /// denormal. |
512 | fn is_denormal(self) -> bool; |
513 | |
514 | /// IEEE-754R isInfinite(): Returns true if and only if the float is infinity. |
515 | fn is_infinite(self) -> bool { |
516 | self.category() == Category::Infinity |
517 | } |
518 | |
519 | /// Returns true if and only if the float is a quiet or signaling NaN. |
520 | fn is_nan(self) -> bool { |
521 | self.category() == Category::NaN |
522 | } |
523 | |
524 | /// Returns true if and only if the float is a signaling NaN. |
525 | fn is_signaling(self) -> bool; |
526 | |
527 | // Simple Queries |
528 | |
529 | fn category(self) -> Category; |
530 | fn is_non_zero(self) -> bool { |
531 | !self.is_zero() |
532 | } |
533 | fn is_finite_non_zero(self) -> bool { |
534 | self.is_finite() && !self.is_zero() |
535 | } |
536 | fn is_pos_zero(self) -> bool { |
537 | self.is_zero() && !self.is_negative() |
538 | } |
539 | fn is_neg_zero(self) -> bool { |
540 | self.is_zero() && self.is_negative() |
541 | } |
542 | fn is_pos_infinity(self) -> bool { |
543 | self.is_infinite() && !self.is_negative() |
544 | } |
545 | fn is_neg_infinity(self) -> bool { |
546 | self.is_infinite() && self.is_negative() |
547 | } |
548 | |
549 | /// Returns true if and only if the number has the smallest possible non-zero |
550 | /// magnitude in the current semantics. |
551 | fn is_smallest(self) -> bool { |
552 | Self::SMALLEST.copy_sign(self).bitwise_eq(self) |
553 | } |
554 | |
555 | /// Returns true if this is the smallest (by magnitude) normalized finite |
556 | /// number in the given semantics. |
557 | fn is_smallest_normalized(self) -> bool { |
558 | Self::smallest_normalized().copy_sign(self).bitwise_eq(self) |
559 | } |
560 | |
561 | /// Returns true if and only if the number has the largest possible finite |
562 | /// magnitude in the current semantics. |
563 | fn is_largest(self) -> bool { |
564 | Self::largest().copy_sign(self).bitwise_eq(self) |
565 | } |
566 | |
567 | /// Returns true if and only if the number is an exact integer. |
568 | fn is_integer(self) -> bool { |
569 | // This could be made more efficient; I'm going for obviously correct. |
570 | if !self.is_finite() { |
571 | return false; |
572 | } |
573 | self.round_to_integral(Round::TowardZero).value.bitwise_eq(self) |
574 | } |
575 | |
576 | /// If this value has an exact multiplicative inverse, return it. |
577 | fn get_exact_inverse(self) -> Option<Self>; |
578 | |
579 | /// Returns the exponent of the internal representation of the Float. |
580 | /// |
581 | /// Because the radix of Float is 2, this is equivalent to floor(log2(x)). |
582 | /// For special Float values, this returns special error codes: |
583 | /// |
584 | /// NaN -> \c IEK_NAN |
585 | /// 0 -> \c IEK_ZERO |
586 | /// Inf -> \c IEK_INF |
587 | /// |
588 | fn ilogb(self) -> ExpInt; |
589 | |
590 | /// Returns: self * 2^exp for integral exponents. |
591 | fn scalbn_r(self, exp: ExpInt, round: Round) -> Self; |
592 | fn scalbn(self, exp: ExpInt) -> Self { |
593 | self.scalbn_r(exp, Round::NearestTiesToEven) |
594 | } |
595 | |
596 | /// Equivalent of C standard library function. |
597 | /// |
598 | /// While the C standard says exp is an unspecified value for infinity and nan, |
599 | /// this returns INT_MAX for infinities, and INT_MIN for NaNs (see `ilogb`). |
600 | fn frexp_r(self, exp: &mut ExpInt, round: Round) -> Self; |
601 | fn frexp(self, exp: &mut ExpInt) -> Self { |
602 | self.frexp_r(exp, Round::NearestTiesToEven) |
603 | } |
604 | } |
605 | |
606 | /// Convert between floating point types. |
607 | pub trait FloatConvert<T: Float>: Float { |
608 | /// Convert a value of one floating point type to another. |
609 | /// The return value corresponds to the IEEE754 exceptions. *loses_info |
610 | /// records whether the transformation lost information, i.e. whether |
611 | /// converting the result back to the original type will produce the |
612 | /// original value (this is almost the same as return value==Status::OK, |
613 | /// but there are edge cases where this is not so). |
614 | fn convert_r(self, round: Round, loses_info: &mut bool) -> StatusAnd<T>; |
615 | |
616 | /// Convert with default [`NearestTiesToEven`](Round::NearestTiesToEven) rounding. |
617 | fn convert(self, loses_info: &mut bool) -> StatusAnd<T> { |
618 | self.convert_r(Round::NearestTiesToEven, loses_info) |
619 | } |
620 | } |
621 | |
622 | macro_rules! float_common_impls { |
623 | ($ty:ident<$t:tt>) => { |
624 | impl<$t> Default for $ty<$t> |
625 | where |
626 | Self: Float, |
627 | { |
628 | #[inline] |
629 | fn default() -> Self { |
630 | Self::ZERO |
631 | } |
632 | } |
633 | |
634 | impl<$t> ::core::str::FromStr for $ty<$t> |
635 | where |
636 | Self: Float, |
637 | { |
638 | type Err = ParseError; |
639 | #[inline] |
640 | fn from_str(s: &str) -> Result<Self, ParseError> { |
641 | Self::from_str_r(s, Round::NearestTiesToEven).map(|x| x.value) |
642 | } |
643 | } |
644 | |
645 | // Rounding ties to the nearest even, by default. |
646 | |
647 | impl<$t> ::core::ops::Add for $ty<$t> |
648 | where |
649 | Self: Float, |
650 | { |
651 | type Output = StatusAnd<Self>; |
652 | #[inline] |
653 | fn add(self, rhs: Self) -> StatusAnd<Self> { |
654 | self.add_r(rhs, Round::NearestTiesToEven) |
655 | } |
656 | } |
657 | |
658 | impl<$t> ::core::ops::Sub for $ty<$t> |
659 | where |
660 | Self: Float, |
661 | { |
662 | type Output = StatusAnd<Self>; |
663 | #[inline] |
664 | fn sub(self, rhs: Self) -> StatusAnd<Self> { |
665 | self.sub_r(rhs, Round::NearestTiesToEven) |
666 | } |
667 | } |
668 | |
669 | impl<$t> ::core::ops::Mul for $ty<$t> |
670 | where |
671 | Self: Float, |
672 | { |
673 | type Output = StatusAnd<Self>; |
674 | #[inline] |
675 | fn mul(self, rhs: Self) -> StatusAnd<Self> { |
676 | self.mul_r(rhs, Round::NearestTiesToEven) |
677 | } |
678 | } |
679 | |
680 | impl<$t> ::core::ops::Div for $ty<$t> |
681 | where |
682 | Self: Float, |
683 | { |
684 | type Output = StatusAnd<Self>; |
685 | #[inline] |
686 | fn div(self, rhs: Self) -> StatusAnd<Self> { |
687 | self.div_r(rhs, Round::NearestTiesToEven) |
688 | } |
689 | } |
690 | |
691 | impl<$t> ::core::ops::Rem for $ty<$t> |
692 | where |
693 | Self: Float, |
694 | { |
695 | type Output = StatusAnd<Self>; |
696 | #[inline] |
697 | fn rem(self, rhs: Self) -> StatusAnd<Self> { |
698 | self.c_fmod(rhs) |
699 | } |
700 | } |
701 | |
702 | impl<$t> ::core::ops::AddAssign for $ty<$t> |
703 | where |
704 | Self: Float, |
705 | { |
706 | #[inline] |
707 | fn add_assign(&mut self, rhs: Self) { |
708 | *self = (*self + rhs).value; |
709 | } |
710 | } |
711 | |
712 | impl<$t> ::core::ops::SubAssign for $ty<$t> |
713 | where |
714 | Self: Float, |
715 | { |
716 | #[inline] |
717 | fn sub_assign(&mut self, rhs: Self) { |
718 | *self = (*self - rhs).value; |
719 | } |
720 | } |
721 | |
722 | impl<$t> ::core::ops::MulAssign for $ty<$t> |
723 | where |
724 | Self: Float, |
725 | { |
726 | #[inline] |
727 | fn mul_assign(&mut self, rhs: Self) { |
728 | *self = (*self * rhs).value; |
729 | } |
730 | } |
731 | |
732 | impl<$t> ::core::ops::DivAssign for $ty<$t> |
733 | where |
734 | Self: Float, |
735 | { |
736 | #[inline] |
737 | fn div_assign(&mut self, rhs: Self) { |
738 | *self = (*self / rhs).value; |
739 | } |
740 | } |
741 | |
742 | impl<$t> ::core::ops::RemAssign for $ty<$t> |
743 | where |
744 | Self: Float, |
745 | { |
746 | #[inline] |
747 | fn rem_assign(&mut self, rhs: Self) { |
748 | *self = (*self % rhs).value; |
749 | } |
750 | } |
751 | }; |
752 | } |
753 | |
754 | pub mod ieee; |
755 | pub mod ppc; |
756 |
Definitions
- Status
- StatusAnd
- status
- value
- and
- map
- unwrap
- unpack
- Category
- Infinity
- NaN
- Normal
- Zero
- Round
- NearestTiesToEven
- TowardPositive
- TowardNegative
- TowardZero
- NearestTiesToAway
- Output
- neg
- ExpInt
- ParseError
- Float
- qnan
- snan
- largest
- smallest_normalized
- add_r
- sub_r
- mul_r
- mul_add_r
- mul_add
- div_r
- ieee_rem
- c_fmod
- round_to_integral
- next_up
- next_down
- abs
- copy_sign
- from_bits
- from_i128_r
- from_i128
- from_u128_r
- from_u128
- from_str_r
- to_bits
- to_i128_r
- to_i128
- to_u128_r
- to_u128
- cmp_abs_normal
- bitwise_eq
- min
- max
- minimum
- maximum
- is_negative
- is_normal
- is_finite
- is_zero
- is_denormal
- is_infinite
- is_nan
- is_signaling
- category
- is_non_zero
- is_finite_non_zero
- is_pos_zero
- is_neg_zero
- is_pos_infinity
- is_neg_infinity
- is_smallest
- is_smallest_normalized
- is_largest
- is_integer
- get_exact_inverse
- ilogb
- scalbn_r
- scalbn
- frexp_r
- frexp
- FloatConvert
- convert_r
- convert
Learn Rust with the experts
Find out more