atan.cpp source code [libc/src/math/generic/atan.cpp]

1	//===-- Double-precision atan function ------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/atan.h"
10	#include "atan_utils.h"
11	#include "src/__support/FPUtil/FEnvImpl.h"
12	#include "src/__support/FPUtil/FPBits.h"
13	#include "src/__support/FPUtil/double_double.h"
14	#include "src/__support/FPUtil/multiply_add.h"
15	#include "src/__support/FPUtil/nearest_integer.h"
16	#include "src/__support/macros/config.h"
17	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18
19	namespace LIBC_NAMESPACE_DECL {
20
21	// To compute atan(x), we divided it into the following cases:
22	// \|x\| < 2^-26:*
23	// Since \|x\| > atan(\|x\|) > \|x\| - \|x\|^3/3, and \|x\|^3/3 < ulp(x)/2, we simply
24	// return atan(x) = x - sign(x) epsilon.*
25	// 2^-26 <= \|x\| < 1:*
26	// We perform range reduction mod 2^-6 = 1/64 as follow:
27	// Let k = 2^(-6) round(\|x\| * 2^6), then*
28	// atan(x) = sign(x) atan(\|x\|)*
29	// = sign(x) (atan(k) + atan((\|x\| - k) / (1 + \|x\|k)).
30	// We store atan(k) in a look up table, and perform intermediate steps in
31	// double-double.
32	// 1 < \|x\| < 2^53:*
33	// First we perform the transformation y = 1/\|x\|:
34	// atan(x) = sign(x) (pi/2 - atan(1/\|x\|))*
35	// = sign(x) (pi/2 - atan(y)).*
36	// Then we compute atan(y) using range reduction mod 2^-6 = 1/64 as the
37	// previous case:
38	// Let k = 2^(-6) round(y * 2^6), then*
39	// atan(y) = atan(k) + atan((y - k) / (1 + yk))*
40	// = atan(k) + atan((1/\|x\| - k) / (1 + k/\|x\|)
41	// = atan(k) + atan((1 - k\|x\|) / (\|x\| + k)).*
42	// \|x\| >= 2^53:*
43	// Using the reciprocal transformation:
44	// atan(x) = sign(x) (pi/2 - atan(1/\|x\|)).*
45	// We have that:
46	// atan(1/\|x\|) <= 1/\|x\| <= 2^-53,
47	// which is smaller than ulp(pi/2) / 2.
48	// So we can return:
49	// atan(x) = sign(x) (pi/2 - epsilon)*
50
51	LLVM_LIBC_FUNCTION(double, atan, (double x)) {
52	using FPBits = fputil::FPBits<double>;
53
54	constexpr double IS_NEG[`2`] = {`1.0`, -`1.0`};
55	constexpr DoubleDouble PI_OVER_2 = {`0x1.1a62633145c07p-54`,
56	`0x1.921fb54442d18p0`};
57	constexpr DoubleDouble MPI_OVER_2 = {-`0x1.1a62633145c07p-54`,
58	-`0x1.921fb54442d18p0`};
59
60	FPBits xbits(x);
61	bool x_sign = xbits.is_neg();
62	xbits = xbits.abs();
63	uint64_t x_abs = xbits.uintval();
64	int x_exp =
65	static_cast<int>(x_abs >> FPBits::FRACTION_LEN) - FPBits::EXP_BIAS;
66
67	// \|x\| < 1.
68	if (x_exp < `0`) {
69	if (LIBC_UNLIKELY(x_exp < -`26`)) {
70	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
71	return x;
72	#else
73	if (x == `0.0`)
74	return x;
75	// \|x\| < 2^-26
76	return fputil::multiply_add(-`0x1.0p-54`, x, x);
77	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
78	}
79
80	double x_d = xbits.get_val();
81	// k = 2^-6 round(2^6 * \|x\|)*
82	double k = fputil::nearest_integer(`0x1.0p6` * x_d);
83	unsigned idx = static_cast<unsigned>(k);
84	k *= `0x1.0p-6`;
85
86	// numerator = \|x\| - k
87	DoubleDouble num, den;
88	num.lo = `0.0`;
89	num.hi = x_d - k;
90
91	// denominator = 1 - k \|x\|*
92	den.hi = fputil::multiply_add(x_d, k, `1.0`);
93	DoubleDouble prod = fputil::exact_mult(x_d, k);
94	// Using Dekker's 2SUM algorithm to compute the lower part.
95	den.lo = ((`1.0` - den.hi) + prod.hi) + prod.lo;
96
97	// x_r = (\|x\| - k) / (1 + k \|x\|)*
98	DoubleDouble x_r = fputil::div(num, den);
99
100	// Approximating atan(x_r) using Taylor polynomial.
101	DoubleDouble p = atan_eval(x_r);
102
103	// atan(x) = sign(x) (atan(k) + atan(x_r))*
104	// = sign(x) (atan(k) + atan( (\|x\| - k) / (1 + k * \|x\|) ))*
105	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
106	return IS_NEG[x_sign] * (ATAN_I[idx].hi + (p.hi + (p.lo + ATAN_I[idx].lo)));
107	#else
108
109	DoubleDouble c0 = fputil::exact_add(ATAN_I[idx].hi, p.hi);
110	double c1 = c0.lo + (ATAN_I[idx].lo + p.lo);
111	double r = IS_NEG[x_sign] * (c0.hi + c1);
112
113	return r;
114	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
115	}
116
117	// \|x\| >= 2^53 or x is NaN.
118	if (LIBC_UNLIKELY(x_exp >= `53`)) {
119	// x is nan
120	if (xbits.is_nan()) {
121	if (xbits.is_signaling_nan()) {
122	fputil::raise_except_if_required(FE_INVALID);
123	return FPBits::quiet_nan().get_val();
124	}
125	return x;
126	}
127	// \|x\| >= 2^53
128	// atan(x) ~ sign(x) pi/2.*
129	if (x_exp >= `53`)
130	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
131	return IS_NEG[x_sign] * PI_OVER_2.hi;
132	#else
133	return fputil::multiply_add(IS_NEG[x_sign], PI_OVER_2.hi,
134	IS_NEG[x_sign] * PI_OVER_2.lo);
135	#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
136	}
137
138	double x_d = xbits.get_val();
139	double y = `1.0` / x_d;
140
141	// k = 2^-6 round(2^6 / \|x\|)*
142	double k = fputil::nearest_integer(`0x1.0p6` * y);
143	unsigned idx = static_cast<unsigned>(k);
144	k *= `0x1.0p-6`;
145
146	// denominator = \|x\| + k
147	DoubleDouble den = fputil::exact_add(x_d, k);
148	// numerator = 1 - k \|x\|*
149	DoubleDouble num;
150	num.hi = fputil::multiply_add(-x_d, k, `1.0`);
151	DoubleDouble prod = fputil::exact_mult(x_d, k);
152	// Using Dekker's 2SUM algorithm to compute the lower part.
153	num.lo = ((`1.0` - num.hi) - prod.hi) - prod.lo;
154
155	// x_r = (1/\|x\| - k) / (1 - k/\|x\|)
156	// = (1 - k \|x\|) / (\|x\| - k)*
157	DoubleDouble x_r = fputil::div(num, den);
158
159	// Approximating atan(x_r) using Taylor polynomial.
160	DoubleDouble p = atan_eval(x_r);
161
162	// atan(x) = sign(x) (pi/2 - atan(1/\|x\|))*
163	// = sign(x) (pi/2 - atan(k) - atan(x_r))*
164	// = (-sign(x)) (-pi/2 + atan(k) + atan((1 - k\|x\|)/(\|x\| - k)))
165	#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
166	double lo_part = p.lo + ATAN_I[idx].lo + MPI_OVER_2.lo;
167	return IS_NEG[!x_sign] * (MPI_OVER_2.hi + ATAN_I[idx].hi + (p.hi + lo_part));
168	#else
169	DoubleDouble c0 = fputil::exact_add(MPI_OVER_2.hi, ATAN_I[idx].hi);
170	DoubleDouble c1 = fputil::exact_add(c0.hi, p.hi);
171	double c2 = c1.lo + (c0.lo + p.lo) + (ATAN_I[idx].lo + MPI_OVER_2.lo);
172
173	double r = IS_NEG[!x_sign] * (c1.hi + c2);
174
175	return r;
176	#endif
177	}
178
179	} // namespace LIBC_NAMESPACE_DECL
180

source code of libc/src/math/generic/atan.cpp