atan2.cpp source code [libc/src/math/generic/atan2.cpp]

1	//===-- Double-precision atan2 function -----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/atan2.h"
10	#include "atan_utils.h"
11	#include "src/__support/FPUtil/FEnvImpl.h"
12	#include "src/__support/FPUtil/FPBits.h"
13	#include "src/__support/FPUtil/double_double.h"
14	#include "src/__support/FPUtil/multiply_add.h"
15	#include "src/__support/FPUtil/nearest_integer.h"
16	#include "src/__support/macros/config.h"
17	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18
19	namespace LIBC_NAMESPACE_DECL {
20
21	// There are several range reduction steps we can take for atan2(y, x) as
22	// follow:
23
24	// Range reduction 1: signness*
25	// atan2(y, x) will return a number between -PI and PI representing the angle
26	// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
27	// In particular, we have that:
28	// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
29	// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant)
30	// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
31	// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
32	// Since atan function is odd, we can use the formula:
33	// atan(-u) = -atan(u)
34	// to adjust the above conditions a bit further:
35	// atan2(y, x) = atan( \|y\|/\|x\| ) if x >= 0 and y >= 0 (I-quadrant)
36	// = pi - atan( \|y\|/\|x\| ) if x < 0 and y >= 0 (II-quadrant)
37	// = -pi + atan( \|y\|/\|x\| ) if x < 0 and y < 0 (III-quadrant)
38	// = -atan( \|y\|/\|x\| ) if x >= 0 and y < 0 (IV-quadrant)
39	// Which can be simplified to:
40	// atan2(y, x) = sign(y) atan( \|y\|/\|x\| ) if x >= 0*
41	// = sign(y) (pi - atan( \|y\|/\|x\| )) if x < 0*
42
43	// Range reduction 2: reciprocal*
44	// Now that the argument inside atan is positive, we can use the formula:
45	// atan(1/x) = pi/2 - atan(x)
46	// to make the argument inside atan <= 1 as follow:
47	// atan2(y, x) = sign(y) atan( \|y\|/\|x\|) if 0 <= \|y\| <= x*
48	// = sign(y) (pi/2 - atan( \|x\|/\|y\| ) if 0 <= x < \|y\|*
49	// = sign(y) (pi - atan( \|y\|/\|x\| )) if 0 <= \|y\| <= -x*
50	// = sign(y) (pi/2 + atan( \|x\|/\|y\| )) if 0 <= -x < \|y\|*
51
52	// Range reduction 3: look up table.*
53	// After the previous two range reduction steps, we reduce the problem to
54	// compute atan(u) with 0 <= u <= 1, or to be precise:
55	// atan( n / d ) where n = min(\|x\|, \|y\|) and d = max(\|x\|, \|y\|).
56	// An accurate polynomial approximation for the whole [0, 1] input range will
57	// require a very large degree. To make it more efficient, we reduce the input
58	// range further by finding an integer idx such that:
59	// \| n/d - idx/64 \| <= 1/128.
60	// In particular,
61	// idx := round(2^6 n/d)*
62	// Then for the fast pass, we find a polynomial approximation for:
63	// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) Q(n/d - idx/64)*
64	// For the accurate pass, we use the addition formula:
65	// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (nidx)/(64d)) )
66	// = atan( (n - d(idx/64))/(d + n(idx/64)) )
67	// And for the fast pass, we use degree-9 Taylor polynomial to compute the RHS:
68	// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9
69	// with absolute errors bounded by:
70	// \|atan(u) - P(u)\| < \|u\|^11 / 11 < 2^-80
71	// and relative errors bounded by:
72	// \|(atan(u) - P(u)) / P(u)\| < u^10 / 11 < 2^-73.
73
74	LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) {
75	using FPBits = fputil::FPBits<double>;
76
77	constexpr double IS_NEG[`2`] = {`1.0`, -`1.0`};
78	constexpr DoubleDouble ZERO = {`0.0`, `0.0`};
79	constexpr DoubleDouble MZERO = {-`0.0`, -`0.0`};
80	constexpr DoubleDouble PI = {`0x1.1a62633145c07p-53`, `0x1.921fb54442d18p+1`};
81	constexpr DoubleDouble MPI = {-`0x1.1a62633145c07p-53`, -`0x1.921fb54442d18p+1`};
82	constexpr DoubleDouble PI_OVER_2 = {`0x1.1a62633145c07p-54`,
83	`0x1.921fb54442d18p0`};
84	constexpr DoubleDouble MPI_OVER_2 = {-`0x1.1a62633145c07p-54`,
85	-`0x1.921fb54442d18p0`};
86	constexpr DoubleDouble PI_OVER_4 = {`0x1.1a62633145c07p-55`,
87	`0x1.921fb54442d18p-1`};
88	constexpr DoubleDouble THREE_PI_OVER_4 = {`0x1.a79394c9e8a0ap-54`,
89	`0x1.2d97c7f3321d2p+1`};
90	// Adjustment for constant term:
91	// CONST_ADJ[x_sign][y_sign][recip]
92	constexpr DoubleDouble CONST_ADJ[`2`][`2`][`2`] = {
93	{{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}},
94	{{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}};
95
96	FPBits x_bits(x), y_bits(y);
97	bool x_sign = x_bits.sign().is_neg();
98	bool y_sign = y_bits.sign().is_neg();
99	x_bits = x_bits.abs();
100	y_bits = y_bits.abs();
101	uint64_t x_abs = x_bits.uintval();
102	uint64_t y_abs = y_bits.uintval();
103	bool recip = x_abs < y_abs;
104	uint64_t min_abs = recip ? x_abs : y_abs;
105	uint64_t max_abs = !recip ? x_abs : y_abs;
106	unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
107	unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);
108
109	double num = FPBits(min_abs).get_val();
110	double den = FPBits(max_abs).get_val();
111
112	// Check for exceptional cases, whether inputs are 0, inf, nan, or close to
113	// overflow, or close to underflow.
114	if (LIBC_UNLIKELY(max_exp > `0x7ffU` - `128U` \|\| min_exp < `128U`)) {
115	if (x_bits.is_nan() \|\| y_bits.is_nan()) {
116	if (x_bits.is_signaling_nan() \|\| y_bits.is_signaling_nan())
117	fputil::raise_except_if_required(FE_INVALID);
118	return FPBits::quiet_nan().get_val();
119	}
120	unsigned x_except = x == `0.0` ? `0` : (FPBits(x_abs).is_inf() ? `2` : `1`);
121	unsigned y_except = y == `0.0` ? `0` : (FPBits(y_abs).is_inf() ? `2` : `1`);
122
123	// Exceptional cases:
124	// EXCEPT[y_except][x_except][x_is_neg]
125	// with x_except & y_except:
126	// 0: zero
127	// 1: finite, non-zero
128	// 2: infinity
129	constexpr DoubleDouble EXCEPTS[`3`][`3`][`2`] = {
130	{{ZERO, PI}, {ZERO, PI}, {ZERO, PI}},
131	{{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}},
132	{{PI_OVER_2, PI_OVER_2},
133	{PI_OVER_2, PI_OVER_2},
134	{PI_OVER_4, THREE_PI_OVER_4}},
135	};
136
137	if ((x_except != `1`) \|\| (y_except != `1`)) {
138	DoubleDouble r = EXCEPTS[y_except][x_except][x_sign];
139	return fputil::multiply_add(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
140	}
141	bool scale_up = min_exp < `128U`;
142	bool scale_down = max_exp > `0x7ffU` - `128U`;
143	// At least one input is denormal, multiply both numerator and denominator
144	// by some large enough power of 2 to normalize denormal inputs.
145	if (scale_up) {
146	num *= `0x1.0p64`;
147	if (!scale_down)
148	den *= `0x1.0p64`;
149	} else if (scale_down) {
150	den *= `0x1.0p-64`;
151	if (!scale_up)
152	num *= `0x1.0p-64`;
153	}
154
155	min_abs = FPBits(num).uintval();
156	max_abs = FPBits(den).uintval();
157	min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
158	max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);
159	}
160
161	double final_sign = IS_NEG[(x_sign != y_sign) != recip];
162	DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
163	unsigned exp_diff = max_exp - min_exp;
164	// We have the following bound for normalized n and d:
165	// 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
166	if (LIBC_UNLIKELY(exp_diff > `54`)) {
167	return fputil::multiply_add(final_sign, const_term.hi,
168	final_sign * (const_term.lo + num / den));
169	}
170
171	double k = fputil::nearest_integer(`64.0` * num / den);
172	unsigned idx = static_cast<unsigned>(k);
173	// k = idx / 64
174	k *= `0x1.0p-6`;
175
176	// Range reduction:
177	// atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) (k/64)))*
178	// = atan((n - d k/64)) / (d + n * k/64))*
179	DoubleDouble num_k = fputil::exact_mult(num, k);
180	DoubleDouble den_k = fputil::exact_mult(den, k);
181
182	// num_dd = n - d k*
183	DoubleDouble num_dd = fputil::exact_add(num - den_k.hi, -den_k.lo);
184	// den_dd = d + n k*
185	DoubleDouble den_dd = fputil::exact_add(den, num_k.hi);
186	den_dd.lo += num_k.lo;
187
188	// q = (n - d k) / (d + n * k)*
189	DoubleDouble q = fputil::div(num_dd, den_dd);
190	// p ~ atan(q)
191	DoubleDouble p = atan_eval(q);
192
193	DoubleDouble r = fputil::add(const_term, fputil::add(ATAN_I[idx], p));
194	r.hi *= final_sign;
195	r.lo *= final_sign;
196
197	return r.hi + r.lo;
198	}
199
200	} // namespace LIBC_NAMESPACE_DECL
201

source code of libc/src/math/generic/atan2.cpp