atan2f128.cpp source code [libc/src/math/generic/atan2f128.cpp]

1	//===-- Quad-precision atan2 function -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/atan2f128.h"
10	#include "atan_utils.h"
11	#include "src/__support/FPUtil/FPBits.h"
12	#include "src/__support/FPUtil/dyadic_float.h"
13	#include "src/__support/FPUtil/multiply_add.h"
14	#include "src/__support/FPUtil/nearest_integer.h"
15	#include "src/__support/integer_literals.h"
16	#include "src/__support/macros/config.h"
17	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18	#include "src/__support/macros/properties/types.h"
19	#include "src/__support/uint128.h"
20
21	namespace LIBC_NAMESPACE_DECL {
22
23	namespace {
24
25	using Float128 = fputil::DyadicFloat<`128`>;
26
27	static constexpr Float128 ZERO = {Sign::POS, `0`, `0_u128`};
28	static constexpr Float128 MZERO = {Sign::NEG, `0`, `0_u128`};
29	static constexpr Float128 PI = {Sign::POS, -`126`,
30	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
31	static constexpr Float128 MPI = {Sign::NEG, -`126`,
32	`0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
33	static constexpr Float128 PI_OVER_2 = {
34	Sign::POS, -`127`, `0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
35	static constexpr Float128 MPI_OVER_2 = {
36	Sign::NEG, -`127`, `0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
37	static constexpr Float128 PI_OVER_4 = {
38	Sign::POS, -`128`, `0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128`};
39	static constexpr Float128 THREE_PI_OVER_4 = {
40	Sign::POS, -`128`, `0x96cbe3f9'990e91a7'9394c9e8'a0a5159d_u128`};
41
42	// Adjustment for constant term:
43	// CONST_ADJ[x_sign][y_sign][recip]
44	static constexpr Float128 CONST_ADJ[`2`][`2`][`2`] = {
45	{{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}},
46	{{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}};
47
48	} // anonymous namespace
49
50	// There are several range reduction steps we can take for atan2(y, x) as
51	// follow:
52
53	// Range reduction 1: signness*
54	// atan2(y, x) will return a number between -PI and PI representing the angle
55	// forming by the 0x axis and the vector (x, y) on the 0xy-plane.
56	// In particular, we have that:
57	// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant)
58	// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant)
59	// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant)
60	// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant)
61	// Since atan function is odd, we can use the formula:
62	// atan(-u) = -atan(u)
63	// to adjust the above conditions a bit further:
64	// atan2(y, x) = atan( \|y\|/\|x\| ) if x >= 0 and y >= 0 (I-quadrant)
65	// = pi - atan( \|y\|/\|x\| ) if x < 0 and y >= 0 (II-quadrant)
66	// = -pi + atan( \|y\|/\|x\| ) if x < 0 and y < 0 (III-quadrant)
67	// = -atan( \|y\|/\|x\| ) if x >= 0 and y < 0 (IV-quadrant)
68	// Which can be simplified to:
69	// atan2(y, x) = sign(y) atan( \|y\|/\|x\| ) if x >= 0*
70	// = sign(y) (pi - atan( \|y\|/\|x\| )) if x < 0*
71
72	// Range reduction 2: reciprocal*
73	// Now that the argument inside atan is positive, we can use the formula:
74	// atan(1/x) = pi/2 - atan(x)
75	// to make the argument inside atan <= 1 as follow:
76	// atan2(y, x) = sign(y) atan( \|y\|/\|x\|) if 0 <= \|y\| <= x*
77	// = sign(y) (pi/2 - atan( \|x\|/\|y\| ) if 0 <= x < \|y\|*
78	// = sign(y) (pi - atan( \|y\|/\|x\| )) if 0 <= \|y\| <= -x*
79	// = sign(y) (pi/2 + atan( \|x\|/\|y\| )) if 0 <= -x < \|y\|*
80
81	// Range reduction 3: look up table.*
82	// After the previous two range reduction steps, we reduce the problem to
83	// compute atan(u) with 0 <= u <= 1, or to be precise:
84	// atan( n / d ) where n = min(\|x\|, \|y\|) and d = max(\|x\|, \|y\|).
85	// An accurate polynomial approximation for the whole [0, 1] input range will
86	// require a very large degree. To make it more efficient, we reduce the input
87	// range further by finding an integer idx such that:
88	// \| n/d - idx/64 \| <= 1/128.
89	// In particular,
90	// idx := round(2^6 n/d)*
91	// Then for the fast pass, we find a polynomial approximation for:
92	// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) Q(n/d - idx/64)*
93	// For the accurate pass, we use the addition formula:
94	// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (nidx)/(64d)) )
95	// = atan( (n - d(idx/64))/(d + n(idx/64)) )
96	// And for the fast pass, we use degree-13 minimax polynomial to compute the
97	// RHS:
98	// atan(u) ~ P(u) = u - c_3 u^3 + c_5 * u^5 - c_7 * u^7 + c_9 u^9 -
99	// - c_11 u^11 + c_13 * u^13*
100	// with absolute errors bounded by:
101	// \|atan(u) - P(u)\| < 2^-121
102	// and relative errors bounded by:
103	// \|(atan(u) - P(u)) / P(u)\| < 2^-114.
104
105	LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) {
106	using FPBits = fputil::FPBits<float128>;
107	using Float128 = fputil::DyadicFloat<`128`>;
108
109	FPBits x_bits(x), y_bits(y);
110	bool x_sign = x_bits.sign().is_neg();
111	bool y_sign = y_bits.sign().is_neg();
112	x_bits = x_bits.abs();
113	y_bits = y_bits.abs();
114	UInt128 x_abs = x_bits.uintval();
115	UInt128 y_abs = y_bits.uintval();
116	bool recip = x_abs < y_abs;
117	UInt128 min_abs = recip ? x_abs : y_abs;
118	UInt128 max_abs = !recip ? x_abs : y_abs;
119	unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN);
120	unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN);
121
122	Float128 num(FPBits(min_abs).get_val());
123	Float128 den(FPBits(max_abs).get_val());
124
125	// Check for exceptional cases, whether inputs are 0, inf, nan, or close to
126	// overflow, or close to underflow.
127	if (LIBC_UNLIKELY(max_exp >= `0x7fffU` \|\| min_exp == `0U`)) {
128	if (x_bits.is_nan() \|\| y_bits.is_nan())
129	return FPBits::quiet_nan().get_val();
130	unsigned x_except = x == `0` ? `0` : (FPBits(x_abs).is_inf() ? `2` : `1`);
131	unsigned y_except = y == `0` ? `0` : (FPBits(y_abs).is_inf() ? `2` : `1`);
132
133	// Exceptional cases:
134	// EXCEPT[y_except][x_except][x_is_neg]
135	// with x_except & y_except:
136	// 0: zero
137	// 1: finite, non-zero
138	// 2: infinity
139	constexpr Float128 EXCEPTS[`3`][`3`][`2`] = {
140	{{ZERO, PI}, {ZERO, PI}, {ZERO, PI}},
141	{{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}},
142	{{PI_OVER_2, PI_OVER_2},
143	{PI_OVER_2, PI_OVER_2},
144	{PI_OVER_4, THREE_PI_OVER_4}},
145	};
146
147	if ((x_except != `1`) \|\| (y_except != `1`)) {
148	Float128 r = EXCEPTS[y_except][x_except][x_sign];
149	if (y_sign)
150	r.sign = r.sign.negate();
151	return static_cast<float128>(r);
152	}
153	}
154
155	bool final_sign = ((x_sign != y_sign) != recip);
156	Float128 const_term = CONST_ADJ[x_sign][y_sign][recip];
157	int exp_diff = den.exponent - num.exponent;
158	// We have the following bound for normalized n and d:
159	// 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
160	if (LIBC_UNLIKELY(exp_diff > FPBits::FRACTION_LEN + `2`)) {
161	if (final_sign)
162	const_term.sign = const_term.sign.negate();
163	return static_cast<float128>(const_term);
164	}
165
166	// Take 24 leading bits of num and den to convert to float for fast division.
167	// We also multiply the numerator by 64 using integer addition directly to the
168	// exponent field.
169	float num_f =
170	cpp::bit_cast<float>(static_cast<uint32_t>(num.mantissa >> `104`) +
171	(`6U` << fputil::FPBits<float>::FRACTION_LEN));
172	float den_f = cpp::bit_cast<float>(
173	static_cast<uint32_t>(den.mantissa >> `104`) +
174	(static_cast<uint32_t>(exp_diff) << fputil::FPBits<float>::FRACTION_LEN));
175
176	float k = fputil::nearest_integer(num_f / den_f);
177	unsigned idx = static_cast<unsigned>(k);
178
179	// k_f128 = idx / 64
180	Float128 k_f128(Sign::POS, -`6`, Float128::MantissaType(idx));
181
182	// Range reduction:
183	// atan(n/d) - atan(k) = atan((n/d - k/64) / (1 + (n/d) (k/64)))*
184	// = atan((n - d k/64)) / (d + n * k/64))*
185	// num_f128 = n - d k/64*
186	Float128 num_f128 = fputil::multiply_add(den, -k_f128, num);
187	// den_f128 = d + n k/64*
188	Float128 den_f128 = fputil::multiply_add(num, k_f128, den);
189
190	// q = (n - d k) / (d + n * k)*
191	Float128 q = fputil::quick_mul(num_f128, fputil::approx_reciprocal(den_f128));
192	// p ~ atan(q)
193	Float128 p = atan_eval(q);
194
195	Float128 r =
196	fputil::quick_add(const_term, fputil::quick_add(ATAN_I_F128[idx], p));
197	if (final_sign)
198	r.sign = r.sign.negate();
199
200	return static_cast<float128>(r);
201	}
202
203	} // namespace LIBC_NAMESPACE_DECL
204

source code of libc/src/math/generic/atan2f128.cpp