1 | //===-- Single-precision general exp/log functions ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
10 | #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
11 | |
12 | #include "common_constants.h" |
13 | #include "src/__support/CPP/bit.h" |
14 | #include "src/__support/CPP/optional.h" |
15 | #include "src/__support/FPUtil/FEnvImpl.h" |
16 | #include "src/__support/FPUtil/FPBits.h" |
17 | #include "src/__support/FPUtil/PolyEval.h" |
18 | #include "src/__support/FPUtil/nearest_integer.h" |
19 | #include "src/__support/common.h" |
20 | #include "src/__support/macros/config.h" |
21 | #include "src/__support/macros/properties/cpu_features.h" |
22 | |
23 | namespace LIBC_NAMESPACE_DECL { |
24 | |
25 | struct ExpBase { |
26 | // Base = e |
27 | static constexpr int MID_BITS = 5; |
28 | static constexpr int MID_MASK = (1 << MID_BITS) - 1; |
29 | // log2(e) * 2^5 |
30 | static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS); |
31 | // High and low parts of -log(2) * 2^(-5) |
32 | static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS); |
33 | static constexpr double M_LOGB_2_LO = |
34 | -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS); |
35 | // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya |
36 | // with: |
37 | // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN)); |
38 | static constexpr int64_t EXP_2_MID[1 << MID_BITS] = { |
39 | 0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f, |
40 | 0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa, |
41 | 0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715, |
42 | 0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d, |
43 | 0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429, |
44 | 0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74, |
45 | 0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db, |
46 | 0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d, |
47 | 0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c, |
48 | 0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f, |
49 | 0x3ffea4afa2a490da, 0x3fff50765b6e4540, |
50 | }; |
51 | |
52 | // Approximating e^dx with degree-5 minimax polynomial generated by Sollya: |
53 | // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]); |
54 | // Then: |
55 | // e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5. |
56 | static constexpr double COEFFS[4] = { |
57 | 0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5, |
58 | 0x1.11112a0e34bdbp-7}; |
59 | |
60 | LIBC_INLINE static double powb_lo(double dx) { |
61 | using fputil::multiply_add; |
62 | double dx2 = dx * dx; |
63 | double c0 = 1.0 + dx; |
64 | // c1 = COEFFS[0] + COEFFS[1] * dx |
65 | double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]); |
66 | // c2 = COEFFS[2] + COEFFS[3] * dx |
67 | double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]); |
68 | // r = c4 + c5 * dx^4 |
69 | // = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7 |
70 | return fputil::polyeval(dx2, c0, c1, c2); |
71 | } |
72 | }; |
73 | |
74 | struct Exp10Base : public ExpBase { |
75 | // log2(10) * 2^5 |
76 | static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS); |
77 | // High and low parts of -log10(2) * 2^(-5). |
78 | // Notice that since |x * log2(10)| < 150: |
79 | // |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13 |
80 | // So when the FMA instructions are not available, in order for the product |
81 | // k * M_LOGB_2_HI |
82 | // to be exact, we only store the high part of log10(2) up to 38 bits |
83 | // (= 53 - 15) of precision. |
84 | // It is generated by Sollya with: |
85 | // > round(log10(2), 44, RN); |
86 | static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS); |
87 | // > round(log10(2) - 0x1.34413509f8p-2, D, RN); |
88 | static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS); |
89 | |
90 | // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya: |
91 | // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]); |
92 | // Then: |
93 | // 10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. |
94 | static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1, |
95 | 0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0, |
96 | 0x1.1429e74a98f43p-1}; |
97 | |
98 | static double powb_lo(double dx) { |
99 | using fputil::multiply_add; |
100 | double dx2 = dx * dx; |
101 | // c0 = 1 + COEFFS[0] * dx |
102 | double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0); |
103 | // c1 = COEFFS[1] + COEFFS[2] * dx |
104 | double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]); |
105 | // c2 = COEFFS[3] + COEFFS[4] * dx |
106 | double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]); |
107 | // r = c0 + dx^2 * (c1 + c2 * dx^2) |
108 | // = c0 + c1 * dx^2 + c2 * dx^4 |
109 | // = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5. |
110 | return fputil::polyeval(dx2, c0, c1, c2); |
111 | } |
112 | }; |
113 | |
114 | constexpr int LOG_P1_BITS = 6; |
115 | constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS; |
116 | |
117 | // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40] |
118 | extern const double LOG_P1_LOG2[LOG_P1_SIZE]; |
119 | |
120 | // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40] |
121 | extern const double LOG_P1_1_OVER[LOG_P1_SIZE]; |
122 | |
123 | // Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers |
124 | // K_LOG2_ODD starts from x^3 |
125 | extern const double K_LOG2_ODD[4]; |
126 | extern const double K_LOG2_EVEN[4]; |
127 | |
128 | // Output of range reduction for exp_b: (2^(mid + hi), lo) |
129 | // where: |
130 | // b^x = 2^(mid + hi) * b^lo |
131 | struct exp_b_reduc_t { |
132 | double mh; // 2^(mid + hi) |
133 | double lo; |
134 | }; |
135 | |
136 | // The function correctly calculates b^x value with at least float precision |
137 | // in a limited range. |
138 | // Range reduction: |
139 | // b^x = 2^(hi + mid) * b^lo |
140 | // where: |
141 | // x = (hi + mid) * log_b(2) + lo |
142 | // hi is an integer, |
143 | // 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer |
144 | // -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1) |
145 | // Base class needs to provide the following constants: |
146 | // - MID_BITS : number of bits after decimal points used for mid |
147 | // - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits |
148 | // - LOG2_B : log2(b) * 2^MID_BITS for scaling |
149 | // - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS) |
150 | // - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS) |
151 | // - EXP_2_MID : look up table for bit fields of 2^mid |
152 | // Return: |
153 | // { 2^(hi + mid), lo } |
154 | template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) { |
155 | double xd = static_cast<double>(x); |
156 | // kd = round((hi + mid) * log2(b) * 2^MID_BITS) |
157 | double kd = fputil::nearest_integer(Base::LOG2_B * xd); |
158 | // k = round((hi + mid) * log2(b) * 2^MID_BITS) |
159 | int k = static_cast<int>(kd); |
160 | // hi = floor(kd * 2^(-MID_BITS)) |
161 | // exp_hi = shift hi to the exponent field of double precision. |
162 | uint64_t exp_hi = static_cast<uint64_t>(k >> Base::MID_BITS) |
163 | << fputil::FPBits<double>::FRACTION_LEN; |
164 | // mh = 2^hi * 2^mid |
165 | // mh_bits = bit field of mh |
166 | uint64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi; |
167 | double mh = fputil::FPBits<double>(mh_bits).get_val(); |
168 | // dx = lo = x - (hi + mid) * log(2) |
169 | double dx = fputil::multiply_add( |
170 | kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd)); |
171 | return {mh, dx}; |
172 | } |
173 | |
174 | // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x) |
175 | // and exp(-x) simultaneously. |
176 | // To compute e^x, we perform the following range |
177 | // reduction: find hi, mid, lo such that: |
178 | // x = (hi + mid) * log(2) + lo, in which |
179 | // hi is an integer, |
180 | // 0 <= mid * 2^5 < 32 is an integer |
181 | // -2^(-6) <= lo * log2(e) <= 2^-6. |
182 | // In particular, |
183 | // hi + mid = round(x * log2(e) * 2^5) * 2^(-5). |
184 | // Then, |
185 | // e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. |
186 | // 2^mid is stored in the lookup table of 32 elements. |
187 | // e^lo is computed using a degree-5 minimax polynomial |
188 | // generated by Sollya: |
189 | // e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 |
190 | // = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) |
191 | // = P_even + lo * P_odd |
192 | // We perform 2^hi * 2^mid by simply add hi to the exponent field |
193 | // of 2^mid. |
194 | // To compute e^(-x), notice that: |
195 | // e^(-x) = 2^(-(hi + mid)) * e^(-lo) |
196 | // ~ 2^(-(hi + mid)) * P(-lo) |
197 | // = 2^(-(hi + mid)) * (P_even - lo * P_odd) |
198 | // So: |
199 | // sinh(x) = (e^x - e^(-x)) / 2 |
200 | // ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - |
201 | // 2^(-(hi + mid)) * (P_even - lo * P_odd)) |
202 | // = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + |
203 | // lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) |
204 | // And similarly: |
205 | // cosh(x) = (e^x + e^(-x)) / 2 |
206 | // ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + |
207 | // lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) |
208 | // The main point of these formulas is that the expensive part of calculating |
209 | // the polynomials approximating lower parts of e^(x) and e^(-x) are shared |
210 | // and only done once. |
211 | template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) { |
212 | double xd = static_cast<double>(x); |
213 | |
214 | // kd = round(x * log2(e) * 2^5) |
215 | // k_p = round(x * log2(e) * 2^5) |
216 | // k_m = round(-x * log2(e) * 2^5) |
217 | double kd; |
218 | int k_p, k_m; |
219 | |
220 | #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT |
221 | kd = fputil::nearest_integer(ExpBase::LOG2_B * xd); |
222 | k_p = static_cast<int>(kd); |
223 | k_m = -k_p; |
224 | #else |
225 | constexpr double HALF_WAY[2] = {0.5, -0.5}; |
226 | |
227 | k_p = static_cast<int>( |
228 | fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f])); |
229 | k_m = -k_p; |
230 | kd = static_cast<double>(k_p); |
231 | #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT |
232 | |
233 | // hi = floor(kf * 2^(-5)) |
234 | // exp_hi = shift hi to the exponent field of double precision. |
235 | int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS)) |
236 | << fputil::FPBits<double>::FRACTION_LEN; |
237 | int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS)) |
238 | << fputil::FPBits<double>::FRACTION_LEN; |
239 | // mh_p = 2^(hi + mid) |
240 | // mh_m = 2^(-(hi + mid)) |
241 | // mh_bits_* = bit field of mh_* |
242 | int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p; |
243 | int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m; |
244 | double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val(); |
245 | double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val(); |
246 | // mh_sum = 2^(hi + mid) + 2^(-(hi + mid)) |
247 | double mh_sum = mh_p + mh_m; |
248 | // mh_diff = 2^(hi + mid) - 2^(-(hi + mid)) |
249 | double mh_diff = mh_p - mh_m; |
250 | |
251 | // dx = lo = x - (hi + mid) * log(2) |
252 | double dx = |
253 | fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO, |
254 | fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd)); |
255 | double dx2 = dx * dx; |
256 | |
257 | // c0 = 1 + COEFFS[0] * lo^2 |
258 | // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2 |
259 | double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5, |
260 | ExpBase::COEFFS[2] * 0.5); |
261 | // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2 |
262 | double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5, |
263 | ExpBase::COEFFS[3] * 0.5); |
264 | |
265 | double r; |
266 | if constexpr (is_sinh) |
267 | r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff); |
268 | else |
269 | r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum); |
270 | return r; |
271 | } |
272 | |
273 | // x should be positive, normal finite value |
274 | LIBC_INLINE static double log2_eval(double x) { |
275 | using FPB = fputil::FPBits<double>; |
276 | FPB bs(x); |
277 | |
278 | double result = 0; |
279 | result += bs.get_exponent(); |
280 | |
281 | int p1 = (bs.get_mantissa() >> (FPB::FRACTION_LEN - LOG_P1_BITS)) & |
282 | (LOG_P1_SIZE - 1); |
283 | |
284 | bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> LOG_P1_BITS)); |
285 | bs.set_biased_exponent(FPB::EXP_BIAS); |
286 | double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1]; |
287 | |
288 | // Taylor series for log(2,1+x) |
289 | double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]); |
290 | double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]); |
291 | double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]); |
292 | double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]); |
293 | |
294 | // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1] |
295 | double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]); |
296 | result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4); |
297 | return result; |
298 | } |
299 | |
300 | // x should be positive, normal finite value |
301 | // TODO: Simplify range reduction and polynomial degree for float16. |
302 | // See issue #137190. |
303 | LIBC_INLINE static float log_eval_f(float x) { |
304 | // For x = 2^ex * (1 + mx), logf(x) = ex * logf(2) + logf(1 + mx). |
305 | using FPBits = fputil::FPBits<float>; |
306 | FPBits xbits(x); |
307 | |
308 | float ex = static_cast<float>(xbits.get_exponent()); |
309 | // p1 is the leading 7 bits of mx, i.e. |
310 | // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). |
311 | int p1 = static_cast<int>(xbits.get_mantissa() >> (FPBits::FRACTION_LEN - 7)); |
312 | |
313 | // Set bits to (1 + (mx - p1*2^(-7))) |
314 | xbits.set_uintval(xbits.uintval() & (FPBits::FRACTION_MASK >> 7)); |
315 | xbits.set_biased_exponent(FPBits::EXP_BIAS); |
316 | // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). |
317 | float dx = (xbits.get_val() - 1.0f) * ONE_OVER_F_FLOAT[p1]; |
318 | |
319 | // Minimax polynomial for log(1 + dx), generated using Sollya: |
320 | // > P = fpminimax(log(1 + x)/x, 6, [|SG...|], [0, 2^-7]); |
321 | // > Q = (P - 1) / x; |
322 | // > for i from 0 to degree(Q) do print(coeff(Q, i)); |
323 | constexpr float COEFFS[6] = {-0x1p-1f, 0x1.555556p-2f, -0x1.00022ep-2f, |
324 | 0x1.9ea056p-3f, -0x1.e50324p-2f, 0x1.c018fp3f}; |
325 | |
326 | float dx2 = dx * dx; |
327 | |
328 | float c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
329 | float c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
330 | float c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
331 | |
332 | float p = fputil::polyeval(dx2, dx, c1, c2, c3); |
333 | |
334 | // Generated by Sollya with the following commands: |
335 | // > display = hexadecimal; |
336 | // > round(log(2), SG, RN); |
337 | constexpr float LOGF_2 = 0x1.62e43p-1f; |
338 | |
339 | float result = fputil::multiply_add(ex, LOGF_2, LOG_F_FLOAT[p1] + p); |
340 | return result; |
341 | } |
342 | |
343 | // x should be positive, normal finite value |
344 | LIBC_INLINE static double log_eval(double x) { |
345 | // For x = 2^ex * (1 + mx) |
346 | // log(x) = ex * log(2) + log(1 + mx) |
347 | using FPB = fputil::FPBits<double>; |
348 | FPB bs(x); |
349 | |
350 | double ex = static_cast<double>(bs.get_exponent()); |
351 | |
352 | // p1 is the leading 7 bits of mx, i.e. |
353 | // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7). |
354 | int p1 = static_cast<int>(bs.get_mantissa() >> (FPB::FRACTION_LEN - 7)); |
355 | |
356 | // Set bs to (1 + (mx - p1*2^(-7)) |
357 | bs.set_uintval(bs.uintval() & (FPB::FRACTION_MASK >> 7)); |
358 | bs.set_biased_exponent(FPB::EXP_BIAS); |
359 | // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)). |
360 | double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1]; |
361 | |
362 | // Minimax polynomial of log(1 + dx) generated by Sollya with: |
363 | // > P = fpminimax(log(1 + x)/x, 6, [|D...|], [0, 2^-7]); |
364 | const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2, |
365 | -0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3, |
366 | -0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3}; |
367 | double dx2 = dx * dx; |
368 | double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]); |
369 | double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]); |
370 | double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]); |
371 | |
372 | double p = fputil::polyeval(dx2, dx, c1, c2, c3); |
373 | double result = |
374 | fputil::multiply_add(ex, /*log(2)*/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p); |
375 | return result; |
376 | } |
377 | |
378 | // Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We |
379 | // assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid. |
380 | // Notice that, if 0 < x < 2^-1022, |
381 | // double(2^-1022 + x) - 2^-1022 = double(x). |
382 | // So if we scale x up by 2^1022, we can use |
383 | // double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range. |
384 | template <bool SKIP_ZIV_TEST = false> |
385 | LIBC_INLINE static cpp::optional<double> |
386 | ziv_test_denorm(int hi, double mid, double lo, double err) { |
387 | using FPBits = typename fputil::FPBits<double>; |
388 | |
389 | // Scaling factor = 1/(min normal number) = 2^1022 |
390 | int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FPBits::FRACTION_LEN; |
391 | double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid)); |
392 | double lo_scaled = |
393 | (lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo)) |
394 | : 0.0; |
395 | |
396 | double extra_factor = 0.0; |
397 | uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field. |
398 | |
399 | // Result is denormal if (mid_hi + lo_scale < 1.0). |
400 | if ((1.0 - mid_hi) > lo_scaled) { |
401 | // Extra rounding step is needed, which adds more rounding errors. |
402 | err += 0x1.0p-52; |
403 | extra_factor = 1.0; |
404 | scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field. |
405 | } |
406 | |
407 | // By adding 1.0, the results will have similar rounding points as denormal |
408 | // outputs. |
409 | if constexpr (SKIP_ZIV_TEST) { |
410 | double r = extra_factor + (mid_hi + lo_scaled); |
411 | return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(r) - scale_down); |
412 | } else { |
413 | double err_scaled = |
414 | cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err)); |
415 | |
416 | double lo_u = lo_scaled + err_scaled; |
417 | double lo_l = lo_scaled - err_scaled; |
418 | |
419 | double upper = extra_factor + (mid_hi + lo_u); |
420 | double lower = extra_factor + (mid_hi + lo_l); |
421 | |
422 | if (LIBC_LIKELY(upper == lower)) { |
423 | return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down); |
424 | } |
425 | |
426 | return cpp::nullopt; |
427 | } |
428 | } |
429 | |
430 | } // namespace LIBC_NAMESPACE_DECL |
431 | |
432 | #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H |
433 | |