| 1 | // This file is part of Eigen, a lightweight C++ template library |
| 2 | // for linear algebra. |
| 3 | // |
| 4 | // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) |
| 5 | // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> |
| 6 | // |
| 7 | // This Source Code Form is subject to the terms of the Mozilla |
| 8 | // Public License v. 2.0. If a copy of the MPL was not distributed |
| 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 10 | |
| 11 | #ifndef EIGEN_MATHFUNCTIONSIMPL_H |
| 12 | #define EIGEN_MATHFUNCTIONSIMPL_H |
| 13 | |
| 14 | namespace Eigen { |
| 15 | |
| 16 | namespace internal { |
| 17 | |
| 18 | /** \internal \returns the hyperbolic tan of \a a (coeff-wise) |
| 19 | Doesn't do anything fancy, just a 13/6-degree rational interpolant which |
| 20 | is accurate up to a couple of ulps in the (approximate) range [-8, 8], |
| 21 | outside of which tanh(x) = +/-1 in single precision. The input is clamped |
| 22 | to the range [-c, c]. The value c is chosen as the smallest value where |
| 23 | the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] |
| 24 | the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero. |
| 25 | |
| 26 | This implementation works on both scalars and packets. |
| 27 | */ |
| 28 | template<typename T> |
| 29 | T generic_fast_tanh_float(const T& a_x) |
| 30 | { |
| 31 | // Clamp the inputs to the range [-c, c] |
| 32 | #ifdef EIGEN_VECTORIZE_FMA |
| 33 | const T plus_clamp = pset1<T>(7.99881172180175781f); |
| 34 | const T minus_clamp = pset1<T>(-7.99881172180175781f); |
| 35 | #else |
| 36 | const T plus_clamp = pset1<T>(7.90531110763549805f); |
| 37 | const T minus_clamp = pset1<T>(-7.90531110763549805f); |
| 38 | #endif |
| 39 | const T tiny = pset1<T>(0.0004f); |
| 40 | const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); |
| 41 | const T tiny_mask = pcmp_lt(pabs(a_x), tiny); |
| 42 | // The monomial coefficients of the numerator polynomial (odd). |
| 43 | const T alpha_1 = pset1<T>(4.89352455891786e-03f); |
| 44 | const T alpha_3 = pset1<T>(6.37261928875436e-04f); |
| 45 | const T alpha_5 = pset1<T>(1.48572235717979e-05f); |
| 46 | const T alpha_7 = pset1<T>(5.12229709037114e-08f); |
| 47 | const T alpha_9 = pset1<T>(-8.60467152213735e-11f); |
| 48 | const T alpha_11 = pset1<T>(2.00018790482477e-13f); |
| 49 | const T alpha_13 = pset1<T>(-2.76076847742355e-16f); |
| 50 | |
| 51 | // The monomial coefficients of the denominator polynomial (even). |
| 52 | const T beta_0 = pset1<T>(4.89352518554385e-03f); |
| 53 | const T beta_2 = pset1<T>(2.26843463243900e-03f); |
| 54 | const T beta_4 = pset1<T>(1.18534705686654e-04f); |
| 55 | const T beta_6 = pset1<T>(1.19825839466702e-06f); |
| 56 | |
| 57 | // Since the polynomials are odd/even, we need x^2. |
| 58 | const T x2 = pmul(x, x); |
| 59 | |
| 60 | // Evaluate the numerator polynomial p. |
| 61 | T p = pmadd(x2, alpha_13, alpha_11); |
| 62 | p = pmadd(x2, p, alpha_9); |
| 63 | p = pmadd(x2, p, alpha_7); |
| 64 | p = pmadd(x2, p, alpha_5); |
| 65 | p = pmadd(x2, p, alpha_3); |
| 66 | p = pmadd(x2, p, alpha_1); |
| 67 | p = pmul(x, p); |
| 68 | |
| 69 | // Evaluate the denominator polynomial q. |
| 70 | T q = pmadd(x2, beta_6, beta_4); |
| 71 | q = pmadd(x2, q, beta_2); |
| 72 | q = pmadd(x2, q, beta_0); |
| 73 | |
| 74 | // Divide the numerator by the denominator. |
| 75 | return pselect(tiny_mask, x, pdiv(p, q)); |
| 76 | } |
| 77 | |
| 78 | template<typename RealScalar> |
| 79 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
| 80 | RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) |
| 81 | { |
| 82 | // IEEE IEC 6059 special cases. |
| 83 | if ((numext::isinf)(x) || (numext::isinf)(y)) |
| 84 | return NumTraits<RealScalar>::infinity(); |
| 85 | if ((numext::isnan)(x) || (numext::isnan)(y)) |
| 86 | return NumTraits<RealScalar>::quiet_NaN(); |
| 87 | |
| 88 | EIGEN_USING_STD(sqrt); |
| 89 | RealScalar p, qp; |
| 90 | p = numext::maxi(x,y); |
| 91 | if(p==RealScalar(0)) return RealScalar(0); |
| 92 | qp = numext::mini(y,x) / p; |
| 93 | return p * sqrt(RealScalar(1) + qp*qp); |
| 94 | } |
| 95 | |
| 96 | template<typename Scalar> |
| 97 | struct hypot_impl |
| 98 | { |
| 99 | typedef typename NumTraits<Scalar>::Real RealScalar; |
| 100 | static EIGEN_DEVICE_FUNC |
| 101 | inline RealScalar run(const Scalar& x, const Scalar& y) |
| 102 | { |
| 103 | EIGEN_USING_STD(abs); |
| 104 | return positive_real_hypot<RealScalar>(abs(x), abs(y)); |
| 105 | } |
| 106 | }; |
| 107 | |
| 108 | // Generic complex sqrt implementation that correctly handles corner cases |
| 109 | // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt |
| 110 | template<typename T> |
| 111 | EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) { |
| 112 | // Computes the principal sqrt of the input. |
| 113 | // |
| 114 | // For a complex square root of the number x + i*y. We want to find real |
| 115 | // numbers u and v such that |
| 116 | // (u + i*v)^2 = x + i*y <=> |
| 117 | // u^2 - v^2 + i*2*u*v = x + i*v. |
| 118 | // By equating the real and imaginary parts we get: |
| 119 | // u^2 - v^2 = x |
| 120 | // 2*u*v = y. |
| 121 | // |
| 122 | // For x >= 0, this has the numerically stable solution |
| 123 | // u = sqrt(0.5 * (x + sqrt(x^2 + y^2))) |
| 124 | // v = y / (2 * u) |
| 125 | // and for x < 0, |
| 126 | // v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2))) |
| 127 | // u = y / (2 * v) |
| 128 | // |
| 129 | // Letting w = sqrt(0.5 * (|x| + |z|)), |
| 130 | // if x == 0: u = w, v = sign(y) * w |
| 131 | // if x > 0: u = w, v = y / (2 * w) |
| 132 | // if x < 0: u = |y| / (2 * w), v = sign(y) * w |
| 133 | |
| 134 | const T x = numext::real(z); |
| 135 | const T y = numext::imag(z); |
| 136 | const T zero = T(0); |
| 137 | const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y))); |
| 138 | |
| 139 | return |
| 140 | (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y) |
| 141 | : x == zero ? std::complex<T>(w, y < zero ? -w : w) |
| 142 | : x > zero ? std::complex<T>(w, y / (2 * w)) |
| 143 | : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w ); |
| 144 | } |
| 145 | |
| 146 | // Generic complex rsqrt implementation. |
| 147 | template<typename T> |
| 148 | EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) { |
| 149 | // Computes the principal reciprocal sqrt of the input. |
| 150 | // |
| 151 | // For a complex reciprocal square root of the number z = x + i*y. We want to |
| 152 | // find real numbers u and v such that |
| 153 | // (u + i*v)^2 = 1 / (x + i*y) <=> |
| 154 | // u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2. |
| 155 | // By equating the real and imaginary parts we get: |
| 156 | // u^2 - v^2 = x/|z|^2 |
| 157 | // 2*u*v = y/|z|^2. |
| 158 | // |
| 159 | // For x >= 0, this has the numerically stable solution |
| 160 | // u = sqrt(0.5 * (x + |z|)) / |z| |
| 161 | // v = -y / (2 * u * |z|) |
| 162 | // and for x < 0, |
| 163 | // v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z| |
| 164 | // u = -y / (2 * v * |z|) |
| 165 | // |
| 166 | // Letting w = sqrt(0.5 * (|x| + |z|)), |
| 167 | // if x == 0: u = w / |z|, v = -sign(y) * w / |z| |
| 168 | // if x > 0: u = w / |z|, v = -y / (2 * w * |z|) |
| 169 | // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z| |
| 170 | |
| 171 | const T x = numext::real(z); |
| 172 | const T y = numext::imag(z); |
| 173 | const T zero = T(0); |
| 174 | |
| 175 | const T abs_z = numext::hypot(x, y); |
| 176 | const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z)); |
| 177 | const T woz = w / abs_z; |
| 178 | // Corner cases consistent with 1/sqrt(z) on gcc/clang. |
| 179 | return |
| 180 | abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN()) |
| 181 | : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero) |
| 182 | : x == zero ? std::complex<T>(woz, y < zero ? woz : -woz) |
| 183 | : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z)) |
| 184 | : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz ); |
| 185 | } |
| 186 | |
| 187 | template<typename T> |
| 188 | EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) { |
| 189 | // Computes complex log. |
| 190 | T a = numext::abs(z); |
| 191 | EIGEN_USING_STD(atan2); |
| 192 | T b = atan2(z.imag(), z.real()); |
| 193 | return std::complex<T>(numext::log(a), b); |
| 194 | } |
| 195 | |
| 196 | } // end namespace internal |
| 197 | |
| 198 | } // end namespace Eigen |
| 199 | |
| 200 | #endif // EIGEN_MATHFUNCTIONSIMPL_H |
| 201 | |