scalbn.rs source code [crates/compiler_builtins/libm/src/math/generic/scalbn.rs]

1	use super::super::{CastFrom, CastInto, Float, IntTy, MinInt};
2
3	/// Scale the exponent.
4	///
5	/// From N3220:
6	///
7	/// > The scalbn and scalbln functions compute `x b^n`, where `b = FLT_RADIX` if the return type*
8	/// > of the function is a standard floating type, or `b = 10` if the return type of the function
9	/// > is a decimal floating type. A range error occurs for some finite x, depending on n.
10	/// >
11	/// > [...]
12	/// >
13	/// > `scalbn(±0, n)` returns `±0`.*
14	/// > `scalbn(x, 0)` returns `x`.*
15	/// > `scalbn(±∞, n)` returns `±∞`.*
16	/// >
17	/// > If the calculation does not overflow or underflow, the returned value is exact and
18	/// > independent of the current rounding direction mode.
19	pub fn scalbn<F: Float>(mut x: F, mut n: i32) -> F
20	where
21	u32: CastInto<F::Int>,
22	F::Int: CastFrom<i32>,
23	F::Int: CastFrom<u32>,
24	{
25	let zero = IntTy::<F>::ZERO;
26
27	// Bits including the implicit bit
28	let sig_total_bits = F::SIG_BITS + `1`;
29
30	// Maximum and minimum values when biased
31	let exp_max = F::EXP_MAX;
32	let exp_min = F::EXP_MIN;
33
34	// 2 ^ Emax, maximum positive with null significand (0x1p1023 for f64)
35	let f_exp_max = F::from_parts(`false`, F::EXP_BIAS << `1`, zero);
36
37	// 2 ^ Emin, minimum positive normal with null significand (0x1p-1022 for f64)
38	let f_exp_min = F::from_parts(`false`, `1`, zero);
39
40	// 2 ^ sig_total_bits, moltiplier to normalize subnormals (0x1p53 for f64)
41	let f_pow_subnorm = F::from_parts(`false`, sig_total_bits + F::EXP_BIAS, zero);
42
43	/*
44	* The goal is to multiply `x` by a scale factor that applies `n`. However, there are cases
45	* where `2^n` is not representable by `F` but the result should be, e.g. `x = 2^Emin` with
46	* `n = -EMin + 2` (one out of range of 2^Emax). To get around this, reduce the magnitude of
47	* the final scale operation by prescaling by the max/min power representable by `F`.
48	*/
49
50	if n > exp_max {
51	// Worse case positive `n`: `x` is the minimum subnormal value, the result is `F::MAX`.
52	// This can be reached by three scaling multiplications (two here and one final).
53	debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= exp_max * `3`);
54
55	x *= f_exp_max;
56	n -= exp_max;
57	if n > exp_max {
58	x *= f_exp_max;
59	n -= exp_max;
60	if n > exp_max {
61	n = exp_max;
62	}
63	}
64	} else if n < exp_min {
65	// When scaling toward 0, the prescaling is limited to a value that does not allow `x` to
66	// go subnormal. This avoids double rounding.
67	if F::BITS > `16` {
68	// `mul` s.t. `!(x mul).is_subnormal() ∀ x`*
69	let mul = f_exp_min * f_pow_subnorm;
70	let add = -exp_min - sig_total_bits as i32;
71
72	// Worse case negative `n`: `x` is the maximum positive value, the result is `F::MIN`.
73	// This must be reachable by three scaling multiplications (two here and one final).
74	debug_assert!(-exp_min + F::SIG_BITS as i32 + exp_max <= add * `2` + -exp_min);
75
76	x *= mul;
77	n += add;
78
79	if n < exp_min {
80	x *= mul;
81	n += add;
82
83	if n < exp_min {
84	n = exp_min;
85	}
86	}
87	} else {
88	// `f16` is unique compared to other float types in that the difference between the
89	// minimum exponent and the significand bits (`add = -exp_min - sig_total_bits`) is
90	// small, only three. The above method depend on decrementing `n` by `add` two times;
91	// for other float types this works out because `add` is a substantial fraction of
92	// the exponent range. For `f16`, however, 3 is relatively small compared to the
93	// exponent range (which is 39), so that requires ~10 prescale rounds rather than two.
94	//
95	// Work aroudn this by using a different algorithm that calculates the prescale
96	// dynamically based on the maximum possible value. This adds more operations per round
97	// since it needs to construct the scale, but works better in the general case.
98	let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
99	let mul = F::from_parts(`false`, (F::EXP_BIAS as i32 - add) as u32, zero);
100
101	x *= mul;
102	n += add;
103
104	if n < exp_min {
105	let add = -(n + sig_total_bits as i32).clamp(exp_min, sig_total_bits as i32);
106	let mul = F::from_parts(`false`, (F::EXP_BIAS as i32 - add) as u32, zero);
107
108	x *= mul;
109	n += add;
110
111	if n < exp_min {
112	n = exp_min;
113	}
114	}
115	}
116	}
117
118	let scale = F::from_parts(`false`, (F::EXP_BIAS as i32 + n) as u32, zero);
119	x * scale
120	}
121
122	#[cfg(test)]
123	mod tests {
124	use super::super::super::Int;
125	use super::*;
126
127	// Tests against N3220
128	fn spec_test<F: Float>()
129	where
130	u32: CastInto<F::Int>,
131	F::Int: CastFrom<i32>,
132	F::Int: CastFrom<u32>,
133	{
134	// `scalbn(±0, n)` returns `±0`.
135	assert_biteq!(scalbn(F::NEG_ZERO, `10`), F::NEG_ZERO);
136	assert_biteq!(scalbn(F::NEG_ZERO, `0`), F::NEG_ZERO);
137	assert_biteq!(scalbn(F::NEG_ZERO, -`10`), F::NEG_ZERO);
138	assert_biteq!(scalbn(F::ZERO, `10`), F::ZERO);
139	assert_biteq!(scalbn(F::ZERO, `0`), F::ZERO);
140	assert_biteq!(scalbn(F::ZERO, -`10`), F::ZERO);
141
142	// `scalbn(x, 0)` returns `x`.
143	assert_biteq!(scalbn(F::MIN, `0`), F::MIN);
144	assert_biteq!(scalbn(F::MAX, `0`), F::MAX);
145	assert_biteq!(scalbn(F::INFINITY, `0`), F::INFINITY);
146	assert_biteq!(scalbn(F::NEG_INFINITY, `0`), F::NEG_INFINITY);
147	assert_biteq!(scalbn(F::ZERO, `0`), F::ZERO);
148	assert_biteq!(scalbn(F::NEG_ZERO, `0`), F::NEG_ZERO);
149
150	// `scalbn(±∞, n)` returns `±∞`.
151	assert_biteq!(scalbn(F::INFINITY, `10`), F::INFINITY);
152	assert_biteq!(scalbn(F::INFINITY, -`10`), F::INFINITY);
153	assert_biteq!(scalbn(F::NEG_INFINITY, `10`), F::NEG_INFINITY);
154	assert_biteq!(scalbn(F::NEG_INFINITY, -`10`), F::NEG_INFINITY);
155
156	// NaN should remain NaNs.
157	assert!(scalbn(F::NAN, `10`).is_nan());
158	assert!(scalbn(F::NAN, `0`).is_nan());
159	assert!(scalbn(F::NAN, -`10`).is_nan());
160	assert!(scalbn(-F::NAN, `10`).is_nan());
161	assert!(scalbn(-F::NAN, `0`).is_nan());
162	assert!(scalbn(-F::NAN, -`10`).is_nan());
163	}
164
165	#[test]
166	#[cfg(f16_enabled)]
167	fn spec_test_f16() {
168	spec_test::<f16>();
169	}
170
171	#[test]
172	fn spec_test_f32() {
173	spec_test::<f32>();
174	}
175
176	#[test]
177	fn spec_test_f64() {
178	spec_test::<f64>();
179	}
180
181	#[test]
182	#[cfg(f128_enabled)]
183	fn spec_test_f128() {
184	spec_test::<f128>();
185	}
186	}
187