Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===-- Extra range reduction steps for accurate pass of logarithms -------===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H |
| 10 | #define LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H |
| 11 | |
| 12 | #include "common_constants.h" |
| 13 | #include "src/__support/FPUtil/dyadic_float.h" |
| 14 | #include "src/__support/macros/config.h" |
| 15 | #include "src/__support/uint128.h" |
| 16 | |
| 17 | namespace LIBC_NAMESPACE_DECL { |
| 18 | |
| 19 | // Struct to store -log*(r) for 4 range reduction steps. |
| 20 | struct LogRR { |
| 21 | fputil::DyadicFloat<128> step_1[128]; |
| 22 | fputil::DyadicFloat<128> step_2[193]; |
| 23 | fputil::DyadicFloat<128> step_3[161]; |
| 24 | fputil::DyadicFloat<128> step_4[130]; |
| 25 | }; |
| 26 | |
| 27 | // Perform logarithm range reduction steps 2-4. |
| 28 | // Inputs from the first step of range reduction: |
| 29 | // m_x : the reduced argument after the first step of range reduction |
| 30 | // satisfying -2^-8 <= m_x < 2^-7 and ulp(m_x) >= 2^-60. |
| 31 | // idx1: index of the -log(r1) table from the first step. |
| 32 | // Outputs of the extra range reduction steps: |
| 33 | // sum: adding -log(r1) - log(r2) - log(r3) - log(r4) to the resulted sum. |
| 34 | // return value: the reduced argument v satisfying: |
| 35 | // -0x1.0002143p-29 <= v < 0x1p-29, and ulp(v) >= 2^(-125). |
| 36 | LIBC_INLINE fputil::DyadicFloat<128> |
| 37 | log_range_reduction(double m_x, const LogRR &log_table, |
| 38 | fputil::DyadicFloat<128> &sum) { |
| 39 | using Float128 = typename fputil::DyadicFloat<128>; |
| 40 | using MType = typename Float128::MantissaType; |
| 41 | |
| 42 | int64_t v = static_cast<int64_t>(m_x * 0x1.0p60); // ulp = 2^-60 |
| 43 | |
| 44 | // Range reduction - Step 2 |
| 45 | // Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15]. |
| 46 | // idx2 = trunc(2^14 * (v + 2^-8 + 2^-15)) |
| 47 | size_t idx2 = static_cast<size_t>((v + 0x10'2000'0000'0000) >> 46); |
| 48 | sum = fputil::quick_add(sum, log_table.step_2[idx2]); |
| 49 | |
| 50 | int64_t s2 = static_cast<int64_t>(S2[idx2]); // |s| <= 2^-7, ulp = 2^-16 |
| 51 | int64_t sv2 = s2 * v; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76 |
| 52 | int64_t spv2 = (s2 << 44) + v; // |s + v| < 2^-14, ulp = 2^-60 |
| 53 | int64_t vv2 = (spv2 << 16) + sv2; // |vv2| < 2^-14, ulp = 2^-76 |
| 54 | |
| 55 | // Range reduction - Step 3 |
| 56 | // Output range: vv3 in [-0x1.01928p-22 , 0x1p-22] |
| 57 | // idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22)) |
| 58 | size_t idx3 = static_cast<size_t>((vv2 + 0x2840'0000'0000'0000) >> 55); |
| 59 | sum = fputil::quick_add(sum, log_table.step_3[idx3]); |
| 60 | |
| 61 | int64_t s3 = static_cast<int64_t>(S3[idx3]); // |s| < 2^-13, ulp = 2^-21 |
| 62 | int64_t spv3 = (s3 << 55) + vv2; // |s + v| < 2^-21, ulp = 2^-76 |
| 63 | // |s*v| < 2^-27, ulp = 2^(-76-21) = 2^-97 |
| 64 | Int128 sv3 = static_cast<Int128>(s3) * static_cast<Int128>(vv2); |
| 65 | // |vv3| < 2^-21, ulp = 2^-97 |
| 66 | Int128 vv3 = (static_cast<Int128>(spv3) << 21) + sv3; |
| 67 | |
| 68 | // Range reduction - Step 4 |
| 69 | // Output range: vv4 in [-0x1.0002143p-29 , 0x1p-29] |
| 70 | // idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29)) |
| 71 | size_t idx4 = static_cast<size_t>((static_cast<int>(vv3 >> 68) + 131) >> 1); |
| 72 | |
| 73 | sum = fputil::quick_add(sum, log_table.step_4[idx4]); |
| 74 | |
| 75 | Int128 s4 = static_cast<Int128>(S4[idx4]); // |s| < 2^-21, ulp = 2^-28 |
| 76 | // |s + v| < 2^-28, ulp = 2^-97 |
| 77 | Int128 spv4 = (s4 << 69) + vv3; |
| 78 | // |s*v| < 2^-42, ulp = 2^(-97-28) = 2^-125 |
| 79 | Int128 sv4 = s4 * vv3; |
| 80 | // |vv4| < 2^-28, ulp = 2^-125 |
| 81 | Int128 vv4 = (spv4 << 28) + sv4; |
| 82 | |
| 83 | return (vv4 < 0) ? Float128(Sign::NEG, -125, |
| 84 | MType({static_cast<uint64_t>(-vv4), |
| 85 | static_cast<uint64_t>((-vv4) >> 64)})) |
| 86 | : Float128(Sign::POS, -125, |
| 87 | MType({static_cast<uint64_t>(vv4), |
| 88 | static_cast<uint64_t>(vv4 >> 64)})); |
| 89 | } |
| 90 | |
| 91 | } // namespace LIBC_NAMESPACE_DECL |
| 92 | |
| 93 | #endif // LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H |
| 94 |
Warning: This file is not a C or C++ file. It does not have highlighting.
