errors.rs - Codebrowser

1	// Adapted from https://github.com/Alexhuszagh/rust-lexical.
2
3	//! Estimate the error in an 80-bit approximation of a float.
4	//!
5	//! This estimates the error in a floating-point representation.
6	//!
7	//! This implementation is loosely based off the Golang implementation,
8	//! found here: <https://golang.org/src/strconv/atof.go>
9
10	use super::float::*;
11	use super::num::*;
12	use super::rounding::*;
13
14	pub(crate) trait FloatErrors {
15	/// Get the full error scale.
16	fn error_scale() -> u32;
17	/// Get the half error scale.
18	fn error_halfscale() -> u32;
19	/// Determine if the number of errors is tolerable for float precision.
20	fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat) -> bool;
21	}
22
23	/// Check if the error is accurate with a round-nearest rounding scheme.
24	#[inline]
25	fn nearest_error_is_accurate(errors: u64, fp: &ExtendedFloat, extrabits: u64) -> bool {
26	// Round-to-nearest, need to use the halfway point.
27	if extrabits == `65` {
28	// Underflow, we have a shift larger than the mantissa.
29	// Representation is valid only* if the value is close enough*
30	// overflow to the next bit within errors. If it overflows,
31	// the representation is not* valid.*
32	!fp.mant.overflowing_add(errors).1
33	} else {
34	let mask: u64 = lower_n_mask(extrabits);
35	let extra: u64 = fp.mant & mask;
36
37	// Round-to-nearest, need to check if we're close to halfway.
38	// IE, b10100 \| 100000, where `\|` signifies the truncation point.
39	let halfway: u64 = lower_n_halfway(extrabits);
40	let cmp1 = halfway.wrapping_sub(errors) < extra;
41	let cmp2 = extra < halfway.wrapping_add(errors);
42
43	// If both comparisons are true, we have significant rounding error,
44	// and the value cannot be exactly represented. Otherwise, the
45	// representation is valid.
46	!(cmp1 && cmp2)
47	}
48	}
49
50	impl FloatErrors for u64 {
51	#[inline]
52	fn error_scale() -> u32 {
53	`8`
54	}
55
56	#[inline]
57	fn error_halfscale() -> u32 {
58	u64::error_scale() / `2`
59	}
60
61	#[inline]
62	fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat) -> bool {
63	// Determine if extended-precision float is a good approximation.
64	// If the error has affected too many units, the float will be
65	// inaccurate, or if the representation is too close to halfway
66	// that any operations could affect this halfway representation.
67	// See the documentation for dtoa for more information.
68	let bias = -(F::EXPONENT_BIAS - F::MANTISSA_SIZE);
69	let denormal_exp = bias - `63`;
70	// This is always a valid u32, since (denormal_exp - fp.exp)
71	// will always be positive and the significand size is {23, 52}.
72	let extrabits = if fp.exp <= denormal_exp {
73	`64` - F::MANTISSA_SIZE + denormal_exp - fp.exp
74	} else {
75	`63` - F::MANTISSA_SIZE
76	};
77
78	// Our logic is as follows: we want to determine if the actual
79	// mantissa and the errors during calculation differ significantly
80	// from the rounding point. The rounding point for round-nearest
81	// is the halfway point, IE, this when the truncated bits start
82	// with b1000..., while the rounding point for the round-toward
83	// is when the truncated bits are equal to 0.
84	// To do so, we can check whether the rounding point +/- the error
85	// are >/< the actual lower n bits.
86	//
87	// For whether we need to use signed or unsigned types for this
88	// analysis, see this example, using u8 rather than u64 to simplify
89	// things.
90	//
91	// # Comparisons
92	// cmp1 = (halfway - errors) < extra
93	// cmp1 = extra < (halfway + errors)
94	//
95	// # Large Extrabits, Low Errors
96	//
97	// extrabits = 8
98	// halfway = 0b10000000
99	// extra = 0b10000010
100	// errors = 0b00000100
101	// halfway - errors = 0b01111100
102	// halfway + errors = 0b10000100
103	//
104	// Unsigned:
105	// halfway - errors = 124
106	// halfway + errors = 132
107	// extra = 130
108	// cmp1 = true
109	// cmp2 = true
110	// Signed:
111	// halfway - errors = 124
112	// halfway + errors = -124
113	// extra = -126
114	// cmp1 = false
115	// cmp2 = true
116	//
117	// # Conclusion
118	//
119	// Since errors will always be small, and since we want to detect
120	// if the representation is accurate, we need to use an unsigned
121	// type for comparisons.
122
123	let extrabits = extrabits as u64;
124	let errors = count as u64;
125	if extrabits > `65` {
126	// Underflow, we have a literal 0.
127	return `true`;
128	}
129
130	nearest_error_is_accurate(errors, fp, extrabits)
131	}
132	}
133

Provided by KDAB

Definitions