| 1 | //===-- Single-precision log10(x) function --------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "src/math/log10f.h" |
| 10 | #include "common_constants.h" // Lookup table for (1/f) |
| 11 | #include "src/__support/FPUtil/FEnvImpl.h" |
| 12 | #include "src/__support/FPUtil/FMA.h" |
| 13 | #include "src/__support/FPUtil/FPBits.h" |
| 14 | #include "src/__support/FPUtil/PolyEval.h" |
| 15 | #include "src/__support/FPUtil/except_value_utils.h" |
| 16 | #include "src/__support/FPUtil/multiply_add.h" |
| 17 | #include "src/__support/common.h" |
| 18 | #include "src/__support/macros/config.h" |
| 19 | #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY |
| 20 | #include "src/__support/macros/properties/cpu_features.h" |
| 21 | |
| 22 | // This is an algorithm for log10(x) in single precision which is |
| 23 | // correctly rounded for all rounding modes, based on the implementation of |
| 24 | // log10(x) from the RLIBM project at: |
| 25 | // https://people.cs.rutgers.edu/~sn349/rlibm |
| 26 | |
| 27 | // Step 1 - Range reduction: |
| 28 | // For x = 2^m * 1.mant, log(x) = m * log10(2) + log10(1.m) |
| 29 | // If x is denormal, we normalize it by multiplying x by 2^23 and subtracting |
| 30 | // m by 23. |
| 31 | |
| 32 | // Step 2 - Another range reduction: |
| 33 | // To compute log(1.mant), let f be the highest 8 bits including the hidden |
| 34 | // bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the |
| 35 | // mantissa. Then we have the following approximation formula: |
| 36 | // log10(1.mant) = log10(f) + log10(1.mant / f) |
| 37 | // = log10(f) + log10(1 + d/f) |
| 38 | // ~ log10(f) + P(d/f) |
| 39 | // since d/f is sufficiently small. |
| 40 | // log10(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables. |
| 41 | |
| 42 | // Step 3 - Polynomial approximation: |
| 43 | // To compute P(d/f), we use a single degree-5 polynomial in double precision |
| 44 | // which provides correct rounding for all but few exception values. |
| 45 | // For more detail about how this polynomial is obtained, please refer to the |
| 46 | // papers: |
| 47 | // Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce |
| 48 | // Correctly Rounded Results of an Elementary Function for Multiple |
| 49 | // Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN |
| 50 | // Symposium on Principles of Programming Languages (POPL-2022), Philadelphia, |
| 51 | // USA, Jan. 16-22, 2022. |
| 52 | // https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf |
| 53 | // Aanjaneya, M., Lim, J., and Nagarakatte, S., "RLibm-Prog: Progressive |
| 54 | // Polynomial Approximations for Fast Correctly Rounded Math Libraries", |
| 55 | // Dept. of Comp. Sci., Rutgets U., Technical Report DCS-TR-758, Nov. 2021. |
| 56 | // https://arxiv.org/pdf/2111.12852.pdf. |
| 57 | |
| 58 | namespace LIBC_NAMESPACE_DECL { |
| 59 | |
| 60 | // Lookup table for -log10(r) where r is defined in common_constants.cpp. |
| 61 | static constexpr double LOG10_R[128] = { |
| 62 | 0x0.0000000000000p+0, 0x1.be76bd77b4fc3p-9, 0x1.c03a80ae5e054p-8, |
| 63 | 0x1.51824c7587ebp-7, 0x1.c3d0837784c41p-7, 0x1.1b85d6044e9aep-6, |
| 64 | 0x1.559bd2406c3bap-6, 0x1.902c31d62a843p-6, 0x1.cb38fccd8bfdbp-6, |
| 65 | 0x1.e8eeb09f2f6cbp-6, 0x1.125d0432ea20ep-5, 0x1.30838cdc2fbfdp-5, |
| 66 | 0x1.3faf7c663060ep-5, 0x1.5e3966b7e9295p-5, 0x1.7d070145f4fd7p-5, |
| 67 | 0x1.8c878eeb05074p-5, 0x1.abbcebd84fcap-5, 0x1.bb7209d1e24e5p-5, |
| 68 | 0x1.db11ed766abf4p-5, 0x1.eafd05035bd3bp-5, 0x1.0585283764178p-4, |
| 69 | 0x1.0d966cc6500fap-4, 0x1.1dd5460c8b16fp-4, 0x1.2603072a25f82p-4, |
| 70 | 0x1.367ba3aaa1883p-4, 0x1.3ec6ad5407868p-4, 0x1.4f7aad9bbcbafp-4, |
| 71 | 0x1.57e3d47c3af7bp-4, 0x1.605735ee985f1p-4, 0x1.715d0ce367afcp-4, |
| 72 | 0x1.79efb57b0f803p-4, 0x1.828cfed29a215p-4, 0x1.93e7de0fc3e8p-4, |
| 73 | 0x1.9ca5aa1729f45p-4, 0x1.a56e8325f5c87p-4, 0x1.ae4285509950bp-4, |
| 74 | 0x1.b721cd17157e3p-4, 0x1.c902a19e65111p-4, 0x1.d204698cb42bdp-4, |
| 75 | 0x1.db11ed766abf4p-4, 0x1.e42b4c16caaf3p-4, 0x1.ed50a4a26eafcp-4, |
| 76 | 0x1.ffbfc2bbc7803p-4, 0x1.0484e4942aa43p-3, 0x1.093025a19976cp-3, |
| 77 | 0x1.0de1b56356b04p-3, 0x1.1299a4fb3e306p-3, 0x1.175805d1587c1p-3, |
| 78 | 0x1.1c1ce9955c0c6p-3, 0x1.20e8624038fedp-3, 0x1.25ba8215af7fcp-3, |
| 79 | 0x1.2a935ba5f1479p-3, 0x1.2f7301cf4e87bp-3, 0x1.345987bfeea91p-3, |
| 80 | 0x1.394700f7953fdp-3, 0x1.3e3b8149739d4p-3, 0x1.43371cde076c2p-3, |
| 81 | 0x1.4839e83506c87p-3, 0x1.4d43f8275a483p-3, 0x1.525561e9256eep-3, |
| 82 | 0x1.576e3b0bde0a7p-3, 0x1.5c8e998072fe2p-3, 0x1.61b6939983048p-3, |
| 83 | 0x1.66e6400da3f77p-3, 0x1.6c1db5f9bb336p-3, 0x1.6c1db5f9bb336p-3, |
| 84 | 0x1.715d0ce367afcp-3, 0x1.76a45cbb7e6ffp-3, 0x1.7bf3bde099f3p-3, |
| 85 | 0x1.814b4921bd52bp-3, 0x1.86ab17c10bc7fp-3, 0x1.86ab17c10bc7fp-3, |
| 86 | 0x1.8c13437695532p-3, 0x1.9183e673394fap-3, 0x1.96fd1b639fc09p-3, |
| 87 | 0x1.9c7efd734a2f9p-3, 0x1.a209a84fbcff8p-3, 0x1.a209a84fbcff8p-3, |
| 88 | 0x1.a79d382bc21d9p-3, 0x1.ad39c9c2c608p-3, 0x1.b2df7a5c50299p-3, |
| 89 | 0x1.b2df7a5c50299p-3, 0x1.b88e67cf9798p-3, 0x1.be46b087354bcp-3, |
| 90 | 0x1.c4087384f4f8p-3, 0x1.c4087384f4f8p-3, 0x1.c9d3d065c5b42p-3, |
| 91 | 0x1.cfa8e765cbb72p-3, 0x1.cfa8e765cbb72p-3, 0x1.d587d96494759p-3, |
| 92 | 0x1.db70c7e96e7f3p-3, 0x1.db70c7e96e7f3p-3, 0x1.e163d527e68cfp-3, |
| 93 | 0x1.e76124046b3f3p-3, 0x1.e76124046b3f3p-3, 0x1.ed68d819191fcp-3, |
| 94 | 0x1.f37b15bab08d1p-3, 0x1.f37b15bab08d1p-3, 0x1.f99801fdb749dp-3, |
| 95 | 0x1.ffbfc2bbc7803p-3, 0x1.ffbfc2bbc7803p-3, 0x1.02f93f4c87101p-2, |
| 96 | 0x1.06182e84fd4acp-2, 0x1.06182e84fd4acp-2, 0x1.093cc32c90f84p-2, |
| 97 | 0x1.093cc32c90f84p-2, 0x1.0c6711d6abd7ap-2, 0x1.0f972f87ff3d6p-2, |
| 98 | 0x1.0f972f87ff3d6p-2, 0x1.12cd31b9c99ffp-2, 0x1.12cd31b9c99ffp-2, |
| 99 | 0x1.16092e5d3a9a6p-2, 0x1.194b3bdef6b9ep-2, 0x1.194b3bdef6b9ep-2, |
| 100 | 0x1.1c93712abc7ffp-2, 0x1.1c93712abc7ffp-2, 0x1.1fe1e5af2c141p-2, |
| 101 | 0x1.1fe1e5af2c141p-2, 0x1.2336b161b3337p-2, 0x1.2336b161b3337p-2, |
| 102 | 0x1.2691ecc29f042p-2, 0x1.2691ecc29f042p-2, 0x1.29f3b0e15584bp-2, |
| 103 | 0x1.29f3b0e15584bp-2, 0x1.2d5c1760b86bbp-2, 0x1.2d5c1760b86bbp-2, |
| 104 | 0x1.30cb3a7bb3625p-2, 0x1.34413509f79ffp-2}; |
| 105 | |
| 106 | LLVM_LIBC_FUNCTION(float, log10f, (float x)) { |
| 107 | constexpr double LOG10_2 = 0x1.34413509f79ffp-2; |
| 108 | |
| 109 | using FPBits = typename fputil::FPBits<float>; |
| 110 | |
| 111 | FPBits xbits(x); |
| 112 | uint32_t x_u = xbits.uintval(); |
| 113 | |
| 114 | // Exact powers of 10 and other hard-to-round cases. |
| 115 | if (LIBC_UNLIKELY((x_u & 0x3FF) == 0)) { |
| 116 | switch (x_u) { |
| 117 | case 0x3f80'0000U: // x = 1 |
| 118 | return 0.0f; |
| 119 | case 0x4120'0000U: // x = 10 |
| 120 | return 1.0f; |
| 121 | case 0x42c8'0000U: // x = 100 |
| 122 | return 2.0f; |
| 123 | case 0x447a'0000U: // x = 1,000 |
| 124 | return 3.0f; |
| 125 | case 0x461c'4000U: // x = 10,000 |
| 126 | return 4.0f; |
| 127 | case 0x47c3'5000U: // x = 100,000 |
| 128 | return 5.0f; |
| 129 | case 0x4974'2400U: // x = 1,000,000 |
| 130 | return 6.0f; |
| 131 | } |
| 132 | } else { |
| 133 | switch (x_u) { |
| 134 | case 0x4b18'9680U: // x = 10,000,000 |
| 135 | return 7.0f; |
| 136 | case 0x4cbe'bc20U: // x = 100,000,000 |
| 137 | return 8.0f; |
| 138 | case 0x4e6e'6b28U: // x = 1,000,000,000 |
| 139 | return 9.0f; |
| 140 | case 0x5015'02f9U: // x = 10,000,000,000 |
| 141 | return 10.0f; |
| 142 | #ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 143 | case 0x0efe'ee7aU: // x = 0x1.fddcf4p-98f |
| 144 | return fputil::round_result_slightly_up(-0x1.d33a46p+4f); |
| 145 | case 0x3f5f'de1bU: // x = 0x1.bfbc36p-1f |
| 146 | return fputil::round_result_slightly_up(-0x1.dd2c6ep-5f); |
| 147 | case 0x3f80'70d8U: // x = 0x1.00e1bp0f |
| 148 | return fputil::round_result_slightly_up(0x1.8762c4p-10f); |
| 149 | #ifndef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 150 | case 0x08ae'a356U: // x = 0x1.5d46acp-110f |
| 151 | return fputil::round_result_slightly_up(-0x1.07d3b4p+5f); |
| 152 | case 0x120b'93dcU: // x = 0x1.1727b8p-91f |
| 153 | return fputil::round_result_slightly_down(-0x1.b5b2aep+4f); |
| 154 | case 0x13ae'78d3U: // x = 0x1.5cf1a6p-88f |
| 155 | return fputil::round_result_slightly_down(-0x1.a5b2aep+4f); |
| 156 | case 0x4f13'4f83U: // x = 2471461632.0 |
| 157 | return fputil::round_result_slightly_down(0x1.2c9314p+3f); |
| 158 | case 0x7956'ba5eU: // x = 69683218960000541503257137270226944.0 |
| 159 | return fputil::round_result_slightly_up(0x1.16bebap+5f); |
| 160 | #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 161 | #endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | int m = -FPBits::EXP_BIAS; |
| 166 | |
| 167 | if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() || |
| 168 | x_u > FPBits::max_normal().uintval())) { |
| 169 | if (x == 0.0f) { |
| 170 | // Return -inf and raise FE_DIVBYZERO |
| 171 | fputil::set_errno_if_required(ERANGE); |
| 172 | fputil::raise_except_if_required(FE_DIVBYZERO); |
| 173 | return FPBits::inf(Sign::NEG).get_val(); |
| 174 | } |
| 175 | if (xbits.is_neg() && !xbits.is_nan()) { |
| 176 | // Return NaN and raise FE_INVALID |
| 177 | fputil::set_errno_if_required(EDOM); |
| 178 | fputil::raise_except_if_required(FE_INVALID); |
| 179 | return FPBits::quiet_nan().get_val(); |
| 180 | } |
| 181 | if (xbits.is_inf_or_nan()) { |
| 182 | return x; |
| 183 | } |
| 184 | // Normalize denormal inputs. |
| 185 | xbits = FPBits(xbits.get_val() * 0x1.0p23f); |
| 186 | m -= 23; |
| 187 | x_u = xbits.uintval(); |
| 188 | } |
| 189 | |
| 190 | // Add unbiased exponent. |
| 191 | m += static_cast<int>(x_u >> 23); |
| 192 | // Extract 7 leading fractional bits of the mantissa |
| 193 | int index = (x_u >> 16) & 0x7F; |
| 194 | // Set bits to 1.m |
| 195 | xbits.set_biased_exponent(0x7F); |
| 196 | |
| 197 | float u = xbits.get_val(); |
| 198 | double v; |
| 199 | #ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT |
| 200 | v = static_cast<double>(fputil::multiply_add(u, R[index], -1.0f)); // Exact. |
| 201 | #else |
| 202 | v = fputil::multiply_add(static_cast<double>(u), |
| 203 | static_cast<double>(R[index]), -1.0); // Exact |
| 204 | #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT |
| 205 | |
| 206 | // Degree-5 polynomial approximation of log10 generated by: |
| 207 | // > P = fpminimax(log10(1 + x)/x, 4, [|D...|], [-2^-8, 2^-7]); |
| 208 | constexpr double COEFFS[5] = {0x1.bcb7b1526e2e5p-2, -0x1.bcb7b1528d43dp-3, |
| 209 | 0x1.287a77eb4ca0dp-3, -0x1.bcb8110a181b5p-4, |
| 210 | 0x1.60e7e3e747129p-4}; |
| 211 | double v2 = v * v; // Exact |
| 212 | double p2 = fputil::multiply_add(v, COEFFS[4], COEFFS[3]); |
| 213 | double p1 = fputil::multiply_add(v, COEFFS[2], COEFFS[1]); |
| 214 | double p0 = fputil::multiply_add(v, COEFFS[0], LOG10_R[index]); |
| 215 | double r = fputil::multiply_add(static_cast<double>(m), LOG10_2, |
| 216 | fputil::polyeval(v2, p0, p1, p2)); |
| 217 | |
| 218 | return static_cast<float>(r); |
| 219 | } |
| 220 | |
| 221 | } // namespace LIBC_NAMESPACE_DECL |
| 222 | |