| 1 | //===-- Collection of utils for sinf/cosf/sincosf ---------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H |
| 10 | #define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H |
| 11 | |
| 12 | #include "src/__support/FPUtil/FPBits.h" |
| 13 | #include "src/__support/FPUtil/PolyEval.h" |
| 14 | #include "src/__support/macros/config.h" |
| 15 | #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA |
| 16 | |
| 17 | #if defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) |
| 18 | #include "range_reduction_fma.h" |
| 19 | // using namespace LIBC_NAMESPACE::fma; |
| 20 | using LIBC_NAMESPACE::fma::FAST_PASS_BOUND; |
| 21 | using LIBC_NAMESPACE::fma::large_range_reduction; |
| 22 | using LIBC_NAMESPACE::fma::small_range_reduction; |
| 23 | |
| 24 | #else |
| 25 | #include "range_reduction.h" |
| 26 | // using namespace LIBC_NAMESPACE::generic; |
| 27 | using LIBC_NAMESPACE::generic::FAST_PASS_BOUND; |
| 28 | using LIBC_NAMESPACE::generic::large_range_reduction; |
| 29 | using LIBC_NAMESPACE::generic::small_range_reduction; |
| 30 | #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| 31 | |
| 32 | namespace LIBC_NAMESPACE_DECL { |
| 33 | |
| 34 | // Lookup table for sin(k * pi / 32) with k = 0, ..., 63. |
| 35 | // Table is generated with Sollya as follow: |
| 36 | // > display = hexadecimal; |
| 37 | // > for k from 0 to 63 do { D(sin(k * pi/32)); }; |
| 38 | const double SIN_K_PI_OVER_32[64] = { |
| 39 | 0x0.0000000000000p+0, 0x1.917a6bc29b42cp-4, 0x1.8f8b83c69a60bp-3, |
| 40 | 0x1.294062ed59f06p-2, 0x1.87de2a6aea963p-2, 0x1.e2b5d3806f63bp-2, |
| 41 | 0x1.1c73b39ae68c8p-1, 0x1.44cf325091dd6p-1, 0x1.6a09e667f3bcdp-1, |
| 42 | 0x1.8bc806b151741p-1, 0x1.a9b66290ea1a3p-1, 0x1.c38b2f180bdb1p-1, |
| 43 | 0x1.d906bcf328d46p-1, 0x1.e9f4156c62ddap-1, 0x1.f6297cff75cbp-1, |
| 44 | 0x1.fd88da3d12526p-1, 0x1.0000000000000p+0, 0x1.fd88da3d12526p-1, |
| 45 | 0x1.f6297cff75cbp-1, 0x1.e9f4156c62ddap-1, 0x1.d906bcf328d46p-1, |
| 46 | 0x1.c38b2f180bdb1p-1, 0x1.a9b66290ea1a3p-1, 0x1.8bc806b151741p-1, |
| 47 | 0x1.6a09e667f3bcdp-1, 0x1.44cf325091dd6p-1, 0x1.1c73b39ae68c8p-1, |
| 48 | 0x1.e2b5d3806f63bp-2, 0x1.87de2a6aea963p-2, 0x1.294062ed59f06p-2, |
| 49 | 0x1.8f8b83c69a60bp-3, 0x1.917a6bc29b42cp-4, 0x0.0000000000000p+0, |
| 50 | -0x1.917a6bc29b42cp-4, -0x1.8f8b83c69a60bp-3, -0x1.294062ed59f06p-2, |
| 51 | -0x1.87de2a6aea963p-2, -0x1.e2b5d3806f63bp-2, -0x1.1c73b39ae68c8p-1, |
| 52 | -0x1.44cf325091dd6p-1, -0x1.6a09e667f3bcdp-1, -0x1.8bc806b151741p-1, |
| 53 | -0x1.a9b66290ea1a3p-1, -0x1.c38b2f180bdb1p-1, -0x1.d906bcf328d46p-1, |
| 54 | -0x1.e9f4156c62ddap-1, -0x1.f6297cff75cbp-1, -0x1.fd88da3d12526p-1, |
| 55 | -0x1.0000000000000p+0, -0x1.fd88da3d12526p-1, -0x1.f6297cff75cbp-1, |
| 56 | -0x1.e9f4156c62ddap-1, -0x1.d906bcf328d46p-1, -0x1.c38b2f180bdb1p-1, |
| 57 | -0x1.a9b66290ea1a3p-1, -0x1.8bc806b151741p-1, -0x1.6a09e667f3bcdp-1, |
| 58 | -0x1.44cf325091dd6p-1, -0x1.1c73b39ae68c8p-1, -0x1.e2b5d3806f63bp-2, |
| 59 | -0x1.87de2a6aea963p-2, -0x1.294062ed59f06p-2, -0x1.8f8b83c69a60bp-3, |
| 60 | -0x1.917a6bc29b42cp-4, |
| 61 | }; |
| 62 | |
| 63 | static LIBC_INLINE void sincosf_poly_eval(int64_t k, double y, double &sin_k, |
| 64 | double &cos_k, double &sin_y, |
| 65 | double &cosm1_y) { |
| 66 | // After range reduction, k = round(x * 32 / pi) and y = (x * 32 / pi) - k. |
| 67 | // So k is an integer and -0.5 <= y <= 0.5. |
| 68 | // Then sin(x) = sin((k + y)*pi/32) |
| 69 | // = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32) |
| 70 | |
| 71 | sin_k = SIN_K_PI_OVER_32[k & 63]; |
| 72 | // cos(k * pi/32) = sin(k * pi/32 + pi/2) = sin((k + 16) * pi/32). |
| 73 | // cos_k = cos(k * pi/32) |
| 74 | cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; |
| 75 | |
| 76 | double ysq = y * y; |
| 77 | |
| 78 | // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya |
| 79 | // with: |
| 80 | // > Q = fpminimax(sin(y*pi/32)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]); |
| 81 | sin_y = |
| 82 | y * fputil::polyeval(ysq, 0x1.921fb54442d18p-4, -0x1.4abbce625abb1p-13, |
| 83 | 0x1.466bc624f2776p-24, -0x1.32c3a619d4a7ep-36); |
| 84 | // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with: |
| 85 | // > P = fpminimax(cos(x*pi/32), [|0, 2, 4, 6|], [|1, D...|], [0, 0.5]); |
| 86 | // Note that cosm1_y = cos(y*pi/32) - 1. |
| 87 | cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be430bp-8, |
| 88 | 0x1.03c1f070c2e27p-18, -0x1.55cc84bd942p-30); |
| 89 | } |
| 90 | |
| 91 | LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k, |
| 92 | double &cos_k, double &sin_y, double &cosm1_y) { |
| 93 | int64_t k; |
| 94 | double y; |
| 95 | |
| 96 | if (LIBC_LIKELY(x_abs < FAST_PASS_BOUND)) { |
| 97 | k = small_range_reduction(xd, y); |
| 98 | } else { |
| 99 | fputil::FPBits<float> x_bits(x_abs); |
| 100 | k = large_range_reduction(xd, x_bits.get_exponent(), y); |
| 101 | } |
| 102 | |
| 103 | sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); |
| 104 | } |
| 105 | |
| 106 | // Return k and y, where |
| 107 | // k = round(x * 32) and y = (x * 32) - k. |
| 108 | // => pi * x = (k + y) * pi / 32 |
| 109 | static LIBC_INLINE int64_t range_reduction_sincospi(double x, double &y) { |
| 110 | double kd = fputil::nearest_integer(x * 32); |
| 111 | y = fputil::multiply_add(x, 32.0, -kd); |
| 112 | |
| 113 | return static_cast<int64_t>(kd); |
| 114 | } |
| 115 | |
| 116 | LIBC_INLINE void sincospif_eval(double xd, double &sin_k, double &cos_k, |
| 117 | double &sin_y, double &cosm1_y) { |
| 118 | double y; |
| 119 | int64_t k = range_reduction_sincospi(xd, y); |
| 120 | sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y); |
| 121 | } |
| 122 | |
| 123 | } // namespace LIBC_NAMESPACE_DECL |
| 124 | |
| 125 | #endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H |
| 126 | |