1 | //===-- Double-precision x^y function -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/math/pow.h" |
10 | #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. |
11 | #include "hdr/errno_macros.h" |
12 | #include "hdr/fenv_macros.h" |
13 | #include "src/__support/CPP/bit.h" |
14 | #include "src/__support/FPUtil/FEnvImpl.h" |
15 | #include "src/__support/FPUtil/FPBits.h" |
16 | #include "src/__support/FPUtil/PolyEval.h" |
17 | #include "src/__support/FPUtil/double_double.h" |
18 | #include "src/__support/FPUtil/multiply_add.h" |
19 | #include "src/__support/FPUtil/nearest_integer.h" |
20 | #include "src/__support/FPUtil/sqrt.h" // Speedup for pow(x, 1/2) = sqrt(x) |
21 | #include "src/__support/common.h" |
22 | #include "src/__support/macros/config.h" |
23 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
24 | |
25 | namespace LIBC_NAMESPACE_DECL { |
26 | |
27 | using fputil::DoubleDouble; |
28 | |
29 | namespace { |
30 | |
31 | // Constants for log2(x) range reduction, generated by Sollya with: |
32 | // > for i from 0 to 127 do { |
33 | // r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) ); |
34 | // b = nearestint(log2(r) * 2^41) * 2^-41; |
35 | // c = round(log2(r) - b, D, RN); |
36 | // print("{", -c, ",", -b, "},"); |
37 | // }; |
38 | // This is the same as -log2(RD[i]), with the least significant bits of the |
39 | // high part set to be 2^-41, so that the sum of high parts + e_x is exact in |
40 | // double precision. |
41 | // We also replace the first and the last ones to be 0. |
42 | constexpr DoubleDouble LOG2_R_DD[128] = { |
43 | {0.0, 0.0}, |
44 | {-0x1.19b14945cf6bap-44, 0x1.72c7ba21p-7}, |
45 | {-0x1.95539356f93dcp-43, 0x1.743ee862p-6}, |
46 | {0x1.abe0a48f83604p-43, 0x1.184b8e4c5p-5}, |
47 | {0x1.635577970e04p-43, 0x1.77394c9d9p-5}, |
48 | {-0x1.401fbaaa67e3cp-45, 0x1.d6ebd1f2p-5}, |
49 | {-0x1.5b1799ceaeb51p-43, 0x1.1bb32a6008p-4}, |
50 | {0x1.7c407050799bfp-43, 0x1.4c560fe688p-4}, |
51 | {0x1.da6339da288fcp-43, 0x1.7d60496cf8p-4}, |
52 | {0x1.be4f6f22dbbadp-43, 0x1.960caf9ab8p-4}, |
53 | {-0x1.c760bc9b188c4p-45, 0x1.c7b528b71p-4}, |
54 | {0x1.164e932b2d51cp-44, 0x1.f9c95dc1dp-4}, |
55 | {0x1.924ae921f7ecap-45, 0x1.097e38ce6p-3}, |
56 | {-0x1.6d25a5b8a19b2p-44, 0x1.22dadc2ab4p-3}, |
57 | {0x1.e50a1644ac794p-43, 0x1.3c6fb650ccp-3}, |
58 | {0x1.f34baa74a7942p-43, 0x1.494f863b8cp-3}, |
59 | {-0x1.8f7aac147fdc1p-46, 0x1.633a8bf438p-3}, |
60 | {0x1.f84be19cb9578p-43, 0x1.7046031c78p-3}, |
61 | {-0x1.66cccab240e9p-46, 0x1.8a8980abfcp-3}, |
62 | {-0x1.3f7a55cd2af4cp-47, 0x1.97c1cb13c8p-3}, |
63 | {0x1.3458cde69308cp-43, 0x1.b2602497d4p-3}, |
64 | {-0x1.667f21fa8423fp-44, 0x1.bfc67a8p-3}, |
65 | {0x1.d2fe4574e09b9p-47, 0x1.dac22d3e44p-3}, |
66 | {0x1.367bde40c5e6dp-43, 0x1.e857d3d36p-3}, |
67 | {0x1.d45da26510033p-46, 0x1.01d9bbcfa6p-2}, |
68 | {-0x1.7204f55bbf90dp-44, 0x1.08bce0d96p-2}, |
69 | {-0x1.d4f1b95e0ff45p-43, 0x1.169c05364p-2}, |
70 | {0x1.c20d74c0211bfp-44, 0x1.1d982c9d52p-2}, |
71 | {0x1.ad89a083e072ap-43, 0x1.249cd2b13cp-2}, |
72 | {0x1.cd0cb4492f1bcp-43, 0x1.32bfee370ep-2}, |
73 | {-0x1.2101a9685c779p-47, 0x1.39de8e155ap-2}, |
74 | {0x1.9451cd394fe8dp-43, 0x1.4106017c3ep-2}, |
75 | {0x1.661e393a16b95p-44, 0x1.4f6fbb2cecp-2}, |
76 | {-0x1.c6d8d86531d56p-44, 0x1.56b22e6b58p-2}, |
77 | {0x1.c1c885adb21d3p-43, 0x1.5dfdcf1eeap-2}, |
78 | {0x1.3bb5921006679p-45, 0x1.6552b49986p-2}, |
79 | {0x1.1d406db502403p-43, 0x1.6cb0f6865cp-2}, |
80 | {0x1.55a63e278bad5p-43, 0x1.7b89f02cf2p-2}, |
81 | {-0x1.66ae2a7ada553p-49, 0x1.8304d90c12p-2}, |
82 | {-0x1.66cccab240e9p-45, 0x1.8a8980abfcp-2}, |
83 | {-0x1.62404772a151dp-45, 0x1.921800924ep-2}, |
84 | {0x1.ac9bca36fd02ep-44, 0x1.99b072a96cp-2}, |
85 | {0x1.4bc302ffa76fbp-43, 0x1.a8ff97181p-2}, |
86 | {0x1.01fea1ec47c71p-43, 0x1.b0b67f4f46p-2}, |
87 | {-0x1.f20203b3186a6p-43, 0x1.b877c57b1cp-2}, |
88 | {-0x1.2642415d47384p-45, 0x1.c043859e3p-2}, |
89 | {-0x1.bc76a2753b99bp-50, 0x1.c819dc2d46p-2}, |
90 | {-0x1.da93ae3a5f451p-43, 0x1.cffae611aep-2}, |
91 | {-0x1.50e785694a8c6p-43, 0x1.d7e6c0abc4p-2}, |
92 | {0x1.c56138c894641p-43, 0x1.dfdd89d586p-2}, |
93 | {0x1.5669df6a2b592p-43, 0x1.e7df5fe538p-2}, |
94 | {-0x1.ea92d9e0e8ac2p-48, 0x1.efec61b012p-2}, |
95 | {0x1.a0331af2e6feap-43, 0x1.f804ae8d0cp-2}, |
96 | {0x1.9518ce032f41dp-48, 0x1.0014332bep-1}, |
97 | {-0x1.b3b3864c60011p-44, 0x1.042bd4b9a8p-1}, |
98 | {-0x1.103e8f00d41c8p-45, 0x1.08494c66b9p-1}, |
99 | {0x1.65be75cc3da17p-43, 0x1.0c6caaf0c5p-1}, |
100 | {0x1.3676289cd3dd4p-43, 0x1.1096015deep-1}, |
101 | {-0x1.41dfc7d7c3321p-43, 0x1.14c560fe69p-1}, |
102 | {0x1.e0cda8bd74461p-44, 0x1.18fadb6e2dp-1}, |
103 | {0x1.2a606046ad444p-44, 0x1.1d368296b5p-1}, |
104 | {0x1.f9ea977a639cp-43, 0x1.217868b0c3p-1}, |
105 | {-0x1.50520a377c7ecp-45, 0x1.25c0a0463cp-1}, |
106 | {0x1.6e3cb71b554e7p-47, 0x1.2a0f3c3407p-1}, |
107 | {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1}, |
108 | {-0x1.4275f1035e5e8p-48, 0x1.2e644fac05p-1}, |
109 | {-0x1.979a5db68721dp-45, 0x1.32bfee370fp-1}, |
110 | {0x1.1ee969a95f529p-43, 0x1.37222bb707p-1}, |
111 | {0x1.bb4b69336b66ep-43, 0x1.3b8b1c68fap-1}, |
112 | {0x1.d5e6a8a4fb059p-45, 0x1.3ffad4e74fp-1}, |
113 | {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1}, |
114 | {0x1.3106e404cabb7p-44, 0x1.44716a2c08p-1}, |
115 | {-0x1.9bcaf1aa4168ap-43, 0x1.48eef19318p-1}, |
116 | {0x1.1646b761c48dep-44, 0x1.4d7380dcc4p-1}, |
117 | {0x1.2f0c0bfe9dbecp-43, 0x1.51ff2e3021p-1}, |
118 | {0x1.29904613e33cp-43, 0x1.5692101d9bp-1}, |
119 | {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1}, |
120 | {0x1.1d406db502403p-44, 0x1.5b2c3da197p-1}, |
121 | {-0x1.125d6cbcd1095p-44, 0x1.5fcdce2728p-1}, |
122 | {-0x1.bd9b32266d92cp-43, 0x1.6476d98adap-1}, |
123 | {0x1.54243b21709cep-44, 0x1.6927781d93p-1}, |
124 | {0x1.54243b21709cep-44, 0x1.6927781d93p-1}, |
125 | {-0x1.ce60916e52e91p-44, 0x1.6ddfc2a79p-1}, |
126 | {0x1.f1f5ae718f241p-43, 0x1.729fd26b7p-1}, |
127 | {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1}, |
128 | {-0x1.6eb9612e0b4f3p-43, 0x1.7767c12968p-1}, |
129 | {0x1.fed21f9cb2cc5p-43, 0x1.7c37a9227ep-1}, |
130 | {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1}, |
131 | {0x1.7f5dc57266758p-43, 0x1.810fa51bf6p-1}, |
132 | {0x1.5b338360c2ae2p-43, 0x1.85efd062c6p-1}, |
133 | {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1}, |
134 | {-0x1.96fc8f4b56502p-43, 0x1.8ad846cf37p-1}, |
135 | {-0x1.bdc81c4db3134p-44, 0x1.8fc924c89bp-1}, |
136 | {0x1.36c101ee1344p-43, 0x1.94c287492cp-1}, |
137 | {0x1.36c101ee1344p-43, 0x1.94c287492cp-1}, |
138 | {0x1.e41fa0a62e6aep-44, 0x1.99c48be206p-1}, |
139 | {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1}, |
140 | {-0x1.d97ee9124773bp-46, 0x1.9ecf50bf44p-1}, |
141 | {-0x1.3f94e00e7d6bcp-46, 0x1.a3e2f4ac44p-1}, |
142 | {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1}, |
143 | {-0x1.6879fa00b120ap-43, 0x1.a8ff971811p-1}, |
144 | {0x1.1659d8e2d7d38p-44, 0x1.ae255819fp-1}, |
145 | {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1}, |
146 | {0x1.1e5e0ae0d3f8ap-43, 0x1.b35458761dp-1}, |
147 | {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1}, |
148 | {0x1.484a15babcf88p-43, 0x1.b88cb9a2abp-1}, |
149 | {0x1.871a7610e40bdp-45, 0x1.bdce9dcc96p-1}, |
150 | {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1}, |
151 | {-0x1.2d90e5edaeceep-43, 0x1.c31a27dd01p-1}, |
152 | {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1}, |
153 | {-0x1.5dd31d962d373p-43, 0x1.c86f7b7ea5p-1}, |
154 | {-0x1.9ad57391924a7p-43, 0x1.cdcebd2374p-1}, |
155 | {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1}, |
156 | {-0x1.3167ccc538261p-44, 0x1.d338120a6ep-1}, |
157 | {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1}, |
158 | {0x1.c7a4ff65ddbc9p-45, 0x1.d8aba045bp-1}, |
159 | {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1}, |
160 | {-0x1.f9ab3cf74babap-44, 0x1.de298ec0bbp-1}, |
161 | {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1}, |
162 | {0x1.52842c1c1e586p-43, 0x1.e3b20546f5p-1}, |
163 | {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1}, |
164 | {0x1.3c6764fc87b4ap-48, 0x1.e9452c8a71p-1}, |
165 | {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1}, |
166 | {-0x1.a0976c0a2827dp-44, 0x1.eee32e2aedp-1}, |
167 | {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1}, |
168 | {-0x1.a45314dc4fc42p-43, 0x1.f48c34bd1fp-1}, |
169 | {0x1.ef5d00e390ap-44, 0x1.fa406bd244p-1}, |
170 | {0.0, 1.0}, |
171 | }; |
172 | |
173 | bool is_odd_integer(double x) { |
174 | using FPBits = fputil::FPBits<double>; |
175 | FPBits xbits(x); |
176 | uint64_t x_u = xbits.uintval(); |
177 | unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent()); |
178 | unsigned lsb = |
179 | static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK)); |
180 | constexpr unsigned UNIT_EXPONENT = |
181 | static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN); |
182 | return (x_e + lsb == UNIT_EXPONENT); |
183 | } |
184 | |
185 | bool is_integer(double x) { |
186 | using FPBits = fputil::FPBits<double>; |
187 | FPBits xbits(x); |
188 | uint64_t x_u = xbits.uintval(); |
189 | unsigned x_e = static_cast<unsigned>(xbits.get_biased_exponent()); |
190 | unsigned lsb = |
191 | static_cast<unsigned>(cpp::countr_zero(x_u | FPBits::EXP_MASK)); |
192 | constexpr unsigned UNIT_EXPONENT = |
193 | static_cast<unsigned>(FPBits::EXP_BIAS + FPBits::FRACTION_LEN); |
194 | return (x_e + lsb >= UNIT_EXPONENT); |
195 | } |
196 | |
197 | } // namespace |
198 | |
199 | LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) { |
200 | using FPBits = fputil::FPBits<double>; |
201 | |
202 | FPBits xbits(x), ybits(y); |
203 | |
204 | bool x_sign = xbits.sign() == Sign::NEG; |
205 | bool y_sign = ybits.sign() == Sign::NEG; |
206 | |
207 | FPBits x_abs = xbits.abs(); |
208 | FPBits y_abs = ybits.abs(); |
209 | |
210 | uint64_t x_mant = xbits.get_mantissa(); |
211 | uint64_t y_mant = ybits.get_mantissa(); |
212 | uint64_t x_u = xbits.uintval(); |
213 | uint64_t x_a = x_abs.uintval(); |
214 | uint64_t y_a = y_abs.uintval(); |
215 | |
216 | double e_x = static_cast<double>(xbits.get_exponent()); |
217 | uint64_t sign = 0; |
218 | |
219 | ///////// BEGIN - Check exceptional cases //////////////////////////////////// |
220 | // If x or y is signaling NaN |
221 | if (x_abs.is_signaling_nan() || y_abs.is_signaling_nan()) { |
222 | fputil::raise_except_if_required(FE_INVALID); |
223 | return FPBits::quiet_nan().get_val(); |
224 | } |
225 | |
226 | // The double precision number that is closest to 1 is (1 - 2^-53), which has |
227 | // log2(1 - 2^-53) ~ -1.715...p-53. |
228 | // So if |y| > |1075 / log2(1 - 2^-53)|, and x is finite: |
229 | // |y * log2(x)| = 0 or > 1075. |
230 | // Hence x^y will either overflow or underflow if x is not zero. |
231 | if (LIBC_UNLIKELY(y_mant == 0 || y_a > 0x43d7'4910'd52d'3052 || |
232 | x_u == FPBits::one().uintval() || |
233 | x_u >= FPBits::inf().uintval() || |
234 | x_u < FPBits::min_normal().uintval())) { |
235 | // Exceptional exponents. |
236 | if (y == 0.0) |
237 | return 1.0; |
238 | |
239 | switch (y_a) { |
240 | case 0x3fe0'0000'0000'0000: { // y = +-0.5 |
241 | // TODO: speed up x^(-1/2) with rsqrt(x) when available. |
242 | if (LIBC_UNLIKELY( |
243 | (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) { |
244 | // pow(-0, 1/2) = +0 |
245 | // pow(-inf, 1/2) = +inf |
246 | // Make sure it works correctly for FTZ/DAZ. |
247 | return y_sign ? 1.0 / (x * x) : (x * x); |
248 | } |
249 | return y_sign ? (1.0 / fputil::sqrt<double>(x)) : fputil::sqrt<double>(x); |
250 | } |
251 | case 0x3ff0'0000'0000'0000: // y = +-1.0 |
252 | return y_sign ? (1.0 / x) : x; |
253 | case 0x4000'0000'0000'0000: // y = +-2.0; |
254 | return y_sign ? (1.0 / (x * x)) : (x * x); |
255 | } |
256 | |
257 | // |y| > |1075 / log2(1 - 2^-53)|. |
258 | if (y_a > 0x43d7'4910'd52d'3052) { |
259 | if (y_a >= 0x7ff0'0000'0000'0000) { |
260 | // y is inf or nan |
261 | if (y_mant != 0) { |
262 | // y is NaN |
263 | // pow(1, NaN) = 1 |
264 | // pow(x, NaN) = NaN |
265 | return (x_u == FPBits::one().uintval()) ? 1.0 : y; |
266 | } |
267 | |
268 | // Now y is +-Inf |
269 | if (x_abs.is_nan()) { |
270 | // pow(NaN, +-Inf) = NaN |
271 | return x; |
272 | } |
273 | |
274 | if (x_a == 0x3ff0'0000'0000'0000) { |
275 | // pow(+-1, +-Inf) = 1.0 |
276 | return 1.0; |
277 | } |
278 | |
279 | if (x == 0.0 && y_sign) { |
280 | // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO |
281 | fputil::set_errno_if_required(EDOM); |
282 | fputil::raise_except_if_required(FE_DIVBYZERO); |
283 | return FPBits::inf().get_val(); |
284 | } |
285 | // pow (|x| < 1, -inf) = +inf |
286 | // pow (|x| < 1, +inf) = 0.0 |
287 | // pow (|x| > 1, -inf) = 0.0 |
288 | // pow (|x| > 1, +inf) = +inf |
289 | return ((x_a < FPBits::one().uintval()) == y_sign) |
290 | ? FPBits::inf().get_val() |
291 | : 0.0; |
292 | } |
293 | // x^y will overflow / underflow in double precision. Set y to a |
294 | // large enough exponent but not too large, so that the computations |
295 | // won't overflow in double precision. |
296 | y = y_sign ? -0x1.0p100 : 0x1.0p100; |
297 | } |
298 | |
299 | // y is finite and non-zero. |
300 | |
301 | if (x_u == FPBits::one().uintval()) { |
302 | // pow(1, y) = 1 |
303 | return 1.0; |
304 | } |
305 | |
306 | // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y). |
307 | |
308 | if (x == 0.0) { |
309 | bool out_is_neg = x_sign && is_odd_integer(y); |
310 | if (y_sign) { |
311 | // pow(0, negative number) = inf |
312 | fputil::set_errno_if_required(EDOM); |
313 | fputil::raise_except_if_required(FE_DIVBYZERO); |
314 | return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val(); |
315 | } |
316 | // pow(0, positive number) = 0 |
317 | return out_is_neg ? -0.0 : 0.0; |
318 | } |
319 | |
320 | if (x_a == FPBits::inf().uintval()) { |
321 | bool out_is_neg = x_sign && is_odd_integer(y); |
322 | if (y_sign) |
323 | return out_is_neg ? -0.0 : 0.0; |
324 | return FPBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val(); |
325 | } |
326 | |
327 | if (x_a > FPBits::inf().uintval()) { |
328 | // x is NaN. |
329 | // pow (aNaN, 0) is already taken care above. |
330 | return x; |
331 | } |
332 | |
333 | // Normalize denormal inputs. |
334 | if (x_a < FPBits::min_normal().uintval()) { |
335 | e_x -= 64.0; |
336 | x_mant = FPBits(x * 0x1.0p64).get_mantissa(); |
337 | } |
338 | |
339 | // x is finite and negative, and y is a finite integer. |
340 | if (x_sign) { |
341 | if (is_integer(y)) { |
342 | x = -x; |
343 | if (is_odd_integer(y)) |
344 | // sign = -1.0; |
345 | sign = 0x8000'0000'0000'0000; |
346 | } else { |
347 | // pow( negative, non-integer ) = NaN |
348 | fputil::set_errno_if_required(EDOM); |
349 | fputil::raise_except_if_required(FE_INVALID); |
350 | return FPBits::quiet_nan().get_val(); |
351 | } |
352 | } |
353 | } |
354 | |
355 | ///////// END - Check exceptional cases ////////////////////////////////////// |
356 | |
357 | // x^y = 2^( y * log2(x) ) |
358 | // = 2^( y * ( e_x + log2(m_x) ) ) |
359 | // First we compute log2(x) = e_x + log2(m_x) |
360 | |
361 | // Extract exponent field of x. |
362 | |
363 | // Use the highest 7 fractional bits of m_x as the index for look up tables. |
364 | unsigned idx_x = static_cast<unsigned>(x_mant >> (FPBits::FRACTION_LEN - 7)); |
365 | // Add the hidden bit to the mantissa. |
366 | // 1 <= m_x < 2 |
367 | FPBits m_x = FPBits(x_mant | 0x3ff0'0000'0000'0000); |
368 | |
369 | // Reduced argument for log2(m_x): |
370 | // dx = r * m_x - 1. |
371 | // The computation is exact, and -2^-8 <= dx < 2^-7. |
372 | // Then m_x = (1 + dx) / r, and |
373 | // log2(m_x) = log2( (1 + dx) / r ) |
374 | // = log2(1 + dx) - log2(r). |
375 | |
376 | // In order for the overall computations x^y = 2^(y * log2(x)) to have the |
377 | // relative errors < 2^-52 (1ULP), we will need to evaluate the exponent part |
378 | // y * log2(x) with absolute errors < 2^-52 (or better, 2^-53). Since the |
379 | // whole exponent range for double precision is bounded by |
380 | // |y * log2(x)| < 1076 ~ 2^10, we need to evaluate log2(x) with absolute |
381 | // errors < 2^-53 * 2^-10 = 2^-63. |
382 | |
383 | // With that requirement, we use the following degree-6 polynomial |
384 | // approximation: |
385 | // P(dx) ~ log2(1 + dx) / dx |
386 | // Generated by Sollya with: |
387 | // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P; |
388 | // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]); |
389 | // 0x1.d03cc...p-66 |
390 | constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b82e7p-1, |
391 | 0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2, |
392 | 0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3, |
393 | 0x1.9c4775eccf524p-3}; |
394 | // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66 |
395 | // Extra errors from various computations and rounding directions, the overall |
396 | // errors we can be bounded by 2^-65. |
397 | |
398 | double dx; |
399 | DoubleDouble dx_c0; |
400 | |
401 | // Perform exact range reduction and exact product dx * c0. |
402 | #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
403 | dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact |
404 | dx_c0 = fputil::exact_mult(COEFFS[0], dx); |
405 | #else |
406 | double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val(); |
407 | dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact |
408 | dx_c0 = fputil::exact_mult<double, 28>(dx, COEFFS[0]); // Exact |
409 | #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
410 | |
411 | double dx2 = dx * dx; |
412 | double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]); |
413 | double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]); |
414 | double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]); |
415 | |
416 | double p = fputil::polyeval(dx2, c0, c1, c2); |
417 | |
418 | // s = e_x - log2(r) + dx * P(dx) |
419 | // Absolute error bound: |
420 | // |log2(x) - log2_x.hi - log2_x.lo| < 2^-65. |
421 | |
422 | // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of |
423 | // e_x - log2(r).hi and the high part of the product dx * c0: |
424 | // log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi |
425 | DoubleDouble log2_x_hi = |
426 | fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi); |
427 | // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r). |
428 | double log2_x_lo = |
429 | fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo); |
430 | // Perform accurate sums. |
431 | DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo); |
432 | log2_x.lo += log2_x_hi.lo; |
433 | |
434 | // To compute 2^(y * log2(x)), we break the exponent into 3 parts: |
435 | // y * log(2) = hi + mid + lo, where |
436 | // hi is an integer |
437 | // mid * 2^6 is an integer |
438 | // |lo| <= 2^-7 |
439 | // Then: |
440 | // x^y = 2^(y * log2(x)) = 2^hi * 2^mid * 2^lo, |
441 | // In which 2^mid is obtained from a look-up table of size 2^6 = 64 elements, |
442 | // and 2^lo ~ 1 + lo * P(lo). |
443 | // Thus, we have: |
444 | // hi + mid = 2^-6 * round( 2^6 * y * log2(x) ) |
445 | // If we restrict the output such that |hi| < 150, (hi + mid) uses (8 + 6) |
446 | // bits, hence, if we use double precision to perform |
447 | // round( 2^6 * y * log2(x)) |
448 | // the lo part is bounded by 2^-7 + 2^(-(52 - 14)) = 2^-7 + 2^-38 |
449 | |
450 | // In the following computations: |
451 | // y6 = 2^6 * y |
452 | // hm = 2^6 * (hi + mid) = round(2^6 * y * log2(x)) ~ round(y6 * s) |
453 | // lo6 = 2^6 * lo = 2^6 * (y - (hi + mid)) = y6 * log2(x) - hm. |
454 | double y6 = y * 0x1.0p6; // Exact. |
455 | |
456 | DoubleDouble y6_log2_x = fputil::exact_mult(y6, log2_x.hi); |
457 | y6_log2_x.lo = fputil::multiply_add(y6, log2_x.lo, y6_log2_x.lo); |
458 | |
459 | // Check overflow/underflow. |
460 | double scale = 1.0; |
461 | |
462 | // |2^(hi + mid) - exp2_hi_mid| <= ulp(exp2_hi_mid) / 2 |
463 | // Clamp the exponent part into smaller range that fits double precision. |
464 | // For those exponents that are out of range, the final conversion will round |
465 | // them correctly to inf/max float or 0/min float accordingly. |
466 | constexpr double UPPER_EXP_BOUND = 512.0 * 0x1.0p6; |
467 | if (LIBC_UNLIKELY(FPBits(y6_log2_x.hi).abs().get_val() >= UPPER_EXP_BOUND)) { |
468 | if (FPBits(y6_log2_x.hi).sign() == Sign::POS) { |
469 | scale = 0x1.0p512; |
470 | y6_log2_x.hi -= 512.0 * 64.0; |
471 | if (y6_log2_x.hi > 513.0 * 64.0) |
472 | y6_log2_x.hi = 513.0 * 64.0; |
473 | } else { |
474 | scale = 0x1.0p-512; |
475 | y6_log2_x.hi += 512.0 * 64.0; |
476 | if (y6_log2_x.hi < (-1076.0 + 512.0) * 64.0) |
477 | y6_log2_x.hi = -564.0 * 64.0; |
478 | } |
479 | } |
480 | |
481 | double hm = fputil::nearest_integer(y6_log2_x.hi); |
482 | |
483 | // lo6 = 2^6 * lo. |
484 | double lo6_hi = y6_log2_x.hi - hm; |
485 | double lo6 = lo6_hi + y6_log2_x.lo; |
486 | |
487 | int hm_i = static_cast<int>(hm); |
488 | unsigned idx_y = static_cast<unsigned>(hm_i) & 0x3f; |
489 | |
490 | // 2^hi |
491 | int64_t exp2_hi_i = static_cast<int64_t>( |
492 | static_cast<uint64_t>(static_cast<int64_t>(hm_i >> 6)) |
493 | << FPBits::FRACTION_LEN); |
494 | // 2^mid |
495 | int64_t exp2_mid_hi_i = |
496 | static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].hi).uintval()); |
497 | int64_t exp2_mid_lo_i = |
498 | static_cast<int64_t>(FPBits(EXP2_MID1[idx_y].mid).uintval()); |
499 | // (-1)^sign * 2^hi * 2^mid |
500 | // Error <= 2^hi * 2^-53 |
501 | uint64_t exp2_hm_hi_i = |
502 | static_cast<uint64_t>(exp2_hi_i + exp2_mid_hi_i) + sign; |
503 | // The low part could be 0. |
504 | uint64_t exp2_hm_lo_i = |
505 | idx_y != 0 ? static_cast<uint64_t>(exp2_hi_i + exp2_mid_lo_i) + sign |
506 | : sign; |
507 | double exp2_hm_hi = FPBits(exp2_hm_hi_i).get_val(); |
508 | double exp2_hm_lo = FPBits(exp2_hm_lo_i).get_val(); |
509 | |
510 | // Degree-5 polynomial approximation P(lo6) ~ 2^(lo6 / 2^6) = 2^(lo). |
511 | // Generated by Sollya with: |
512 | // > P = fpminimax(2^(x/64), 5, [|1, D...|], [-2^-1, 2^-1]); |
513 | // > dirtyinfnorm(2^(x/64) - P, [-0.5, 0.5]); |
514 | // 0x1.a2b77e618f5c4c176fd11b7659016cde5de83cb72p-60 |
515 | constexpr double EXP2_COEFFS[] = {0x1p0, |
516 | 0x1.62e42fefa39efp-7, |
517 | 0x1.ebfbdff82a23ap-15, |
518 | 0x1.c6b08d7076268p-23, |
519 | 0x1.3b2ad33f8b48bp-31, |
520 | 0x1.5d870c4d84445p-40}; |
521 | |
522 | double lo6_sqr = lo6 * lo6; |
523 | |
524 | double d0 = fputil::multiply_add(lo6, EXP2_COEFFS[2], EXP2_COEFFS[1]); |
525 | double d1 = fputil::multiply_add(lo6, EXP2_COEFFS[4], EXP2_COEFFS[3]); |
526 | double pp = fputil::polyeval(lo6_sqr, d0, d1, EXP2_COEFFS[5]); |
527 | |
528 | double r = fputil::multiply_add(exp2_hm_hi * lo6, pp, exp2_hm_lo); |
529 | r += exp2_hm_hi; |
530 | |
531 | return r * scale; |
532 | } |
533 | |
534 | } // namespace LIBC_NAMESPACE_DECL |
535 | |