1 | // This file is part of Eigen, a lightweight C++ template library |
2 | // for linear algebra. |
3 | // |
4 | // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) |
5 | // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> |
6 | // |
7 | // This Source Code Form is subject to the terms of the Mozilla |
8 | // Public License v. 2.0. If a copy of the MPL was not distributed |
9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10 | |
11 | #ifndef EIGEN_MATHFUNCTIONSIMPL_H |
12 | #define EIGEN_MATHFUNCTIONSIMPL_H |
13 | |
14 | namespace Eigen { |
15 | |
16 | namespace internal { |
17 | |
18 | /** \internal \returns the hyperbolic tan of \a a (coeff-wise) |
19 | Doesn't do anything fancy, just a 13/6-degree rational interpolant which |
20 | is accurate up to a couple of ulps in the (approximate) range [-8, 8], |
21 | outside of which tanh(x) = +/-1 in single precision. The input is clamped |
22 | to the range [-c, c]. The value c is chosen as the smallest value where |
23 | the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] |
24 | the approxmation tanh(x) ~= x is used for better accuracy as x tends to zero. |
25 | |
26 | This implementation works on both scalars and packets. |
27 | */ |
28 | template<typename T> |
29 | T generic_fast_tanh_float(const T& a_x) |
30 | { |
31 | // Clamp the inputs to the range [-c, c] |
32 | #ifdef EIGEN_VECTORIZE_FMA |
33 | const T plus_clamp = pset1<T>(7.99881172180175781f); |
34 | const T minus_clamp = pset1<T>(-7.99881172180175781f); |
35 | #else |
36 | const T plus_clamp = pset1<T>(7.90531110763549805f); |
37 | const T minus_clamp = pset1<T>(-7.90531110763549805f); |
38 | #endif |
39 | const T tiny = pset1<T>(0.0004f); |
40 | const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); |
41 | const T tiny_mask = pcmp_lt(pabs(a_x), tiny); |
42 | // The monomial coefficients of the numerator polynomial (odd). |
43 | const T alpha_1 = pset1<T>(4.89352455891786e-03f); |
44 | const T alpha_3 = pset1<T>(6.37261928875436e-04f); |
45 | const T alpha_5 = pset1<T>(1.48572235717979e-05f); |
46 | const T alpha_7 = pset1<T>(5.12229709037114e-08f); |
47 | const T alpha_9 = pset1<T>(-8.60467152213735e-11f); |
48 | const T alpha_11 = pset1<T>(2.00018790482477e-13f); |
49 | const T alpha_13 = pset1<T>(-2.76076847742355e-16f); |
50 | |
51 | // The monomial coefficients of the denominator polynomial (even). |
52 | const T beta_0 = pset1<T>(4.89352518554385e-03f); |
53 | const T beta_2 = pset1<T>(2.26843463243900e-03f); |
54 | const T beta_4 = pset1<T>(1.18534705686654e-04f); |
55 | const T beta_6 = pset1<T>(1.19825839466702e-06f); |
56 | |
57 | // Since the polynomials are odd/even, we need x^2. |
58 | const T x2 = pmul(x, x); |
59 | |
60 | // Evaluate the numerator polynomial p. |
61 | T p = pmadd(x2, alpha_13, alpha_11); |
62 | p = pmadd(x2, p, alpha_9); |
63 | p = pmadd(x2, p, alpha_7); |
64 | p = pmadd(x2, p, alpha_5); |
65 | p = pmadd(x2, p, alpha_3); |
66 | p = pmadd(x2, p, alpha_1); |
67 | p = pmul(x, p); |
68 | |
69 | // Evaluate the denominator polynomial q. |
70 | T q = pmadd(x2, beta_6, beta_4); |
71 | q = pmadd(x2, q, beta_2); |
72 | q = pmadd(x2, q, beta_0); |
73 | |
74 | // Divide the numerator by the denominator. |
75 | return pselect(tiny_mask, x, pdiv(p, q)); |
76 | } |
77 | |
78 | template<typename RealScalar> |
79 | EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE |
80 | RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) |
81 | { |
82 | // IEEE IEC 6059 special cases. |
83 | if ((numext::isinf)(x) || (numext::isinf)(y)) |
84 | return NumTraits<RealScalar>::infinity(); |
85 | if ((numext::isnan)(x) || (numext::isnan)(y)) |
86 | return NumTraits<RealScalar>::quiet_NaN(); |
87 | |
88 | EIGEN_USING_STD(sqrt); |
89 | RealScalar p, qp; |
90 | p = numext::maxi(x,y); |
91 | if(p==RealScalar(0)) return RealScalar(0); |
92 | qp = numext::mini(y,x) / p; |
93 | return p * sqrt(RealScalar(1) + qp*qp); |
94 | } |
95 | |
96 | template<typename Scalar> |
97 | struct hypot_impl |
98 | { |
99 | typedef typename NumTraits<Scalar>::Real RealScalar; |
100 | static EIGEN_DEVICE_FUNC |
101 | inline RealScalar run(const Scalar& x, const Scalar& y) |
102 | { |
103 | EIGEN_USING_STD(abs); |
104 | return positive_real_hypot<RealScalar>(abs(x), abs(y)); |
105 | } |
106 | }; |
107 | |
108 | // Generic complex sqrt implementation that correctly handles corner cases |
109 | // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt |
110 | template<typename T> |
111 | EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) { |
112 | // Computes the principal sqrt of the input. |
113 | // |
114 | // For a complex square root of the number x + i*y. We want to find real |
115 | // numbers u and v such that |
116 | // (u + i*v)^2 = x + i*y <=> |
117 | // u^2 - v^2 + i*2*u*v = x + i*v. |
118 | // By equating the real and imaginary parts we get: |
119 | // u^2 - v^2 = x |
120 | // 2*u*v = y. |
121 | // |
122 | // For x >= 0, this has the numerically stable solution |
123 | // u = sqrt(0.5 * (x + sqrt(x^2 + y^2))) |
124 | // v = y / (2 * u) |
125 | // and for x < 0, |
126 | // v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2))) |
127 | // u = y / (2 * v) |
128 | // |
129 | // Letting w = sqrt(0.5 * (|x| + |z|)), |
130 | // if x == 0: u = w, v = sign(y) * w |
131 | // if x > 0: u = w, v = y / (2 * w) |
132 | // if x < 0: u = |y| / (2 * w), v = sign(y) * w |
133 | |
134 | const T x = numext::real(z); |
135 | const T y = numext::imag(z); |
136 | const T zero = T(0); |
137 | const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y))); |
138 | |
139 | return |
140 | (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y) |
141 | : x == zero ? std::complex<T>(w, y < zero ? -w : w) |
142 | : x > zero ? std::complex<T>(w, y / (2 * w)) |
143 | : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w ); |
144 | } |
145 | |
146 | // Generic complex rsqrt implementation. |
147 | template<typename T> |
148 | EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) { |
149 | // Computes the principal reciprocal sqrt of the input. |
150 | // |
151 | // For a complex reciprocal square root of the number z = x + i*y. We want to |
152 | // find real numbers u and v such that |
153 | // (u + i*v)^2 = 1 / (x + i*y) <=> |
154 | // u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2. |
155 | // By equating the real and imaginary parts we get: |
156 | // u^2 - v^2 = x/|z|^2 |
157 | // 2*u*v = y/|z|^2. |
158 | // |
159 | // For x >= 0, this has the numerically stable solution |
160 | // u = sqrt(0.5 * (x + |z|)) / |z| |
161 | // v = -y / (2 * u * |z|) |
162 | // and for x < 0, |
163 | // v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z| |
164 | // u = -y / (2 * v * |z|) |
165 | // |
166 | // Letting w = sqrt(0.5 * (|x| + |z|)), |
167 | // if x == 0: u = w / |z|, v = -sign(y) * w / |z| |
168 | // if x > 0: u = w / |z|, v = -y / (2 * w * |z|) |
169 | // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z| |
170 | |
171 | const T x = numext::real(z); |
172 | const T y = numext::imag(z); |
173 | const T zero = T(0); |
174 | |
175 | const T abs_z = numext::hypot(x, y); |
176 | const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z)); |
177 | const T woz = w / abs_z; |
178 | // Corner cases consistent with 1/sqrt(z) on gcc/clang. |
179 | return |
180 | abs_z == zero ? std::complex<T>(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN()) |
181 | : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero) |
182 | : x == zero ? std::complex<T>(woz, y < zero ? woz : -woz) |
183 | : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z)) |
184 | : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz ); |
185 | } |
186 | |
187 | template<typename T> |
188 | EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) { |
189 | // Computes complex log. |
190 | T a = numext::abs(z); |
191 | EIGEN_USING_STD(atan2); |
192 | T b = atan2(z.imag(), z.real()); |
193 | return std::complex<T>(numext::log(a), b); |
194 | } |
195 | |
196 | } // end namespace internal |
197 | |
198 | } // end namespace Eigen |
199 | |
200 | #endif // EIGEN_MATHFUNCTIONSIMPL_H |
201 | |