1 | //===-- Collection of utils for atan/atan2 ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H |
10 | #define LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H |
11 | |
12 | #include "src/__support/FPUtil/PolyEval.h" |
13 | #include "src/__support/FPUtil/double_double.h" |
14 | #include "src/__support/FPUtil/dyadic_float.h" |
15 | #include "src/__support/FPUtil/multiply_add.h" |
16 | #include "src/__support/integer_literals.h" |
17 | #include "src/__support/macros/config.h" |
18 | |
19 | namespace LIBC_NAMESPACE_DECL { |
20 | |
21 | namespace { |
22 | |
23 | using DoubleDouble = fputil::DoubleDouble; |
24 | using Float128 = fputil::DyadicFloat<128>; |
25 | |
26 | // atan(i/64) with i = 0..64, generated by Sollya with: |
27 | // > for i from 0 to 64 do { |
28 | // a = round(atan(i/64), D, RN); |
29 | // b = round(atan(i/64) - a, D, RN); |
30 | // print("{", b, ",", a, "},"); |
31 | // }; |
32 | constexpr DoubleDouble ATAN_I[65] = { |
33 | {0.0, 0.0}, |
34 | {-0x1.220c39d4dff5p-61, 0x1.fff555bbb729bp-7}, |
35 | {-0x1.5ec431444912cp-60, 0x1.ffd55bba97625p-6}, |
36 | {-0x1.86ef8f794f105p-63, 0x1.7fb818430da2ap-5}, |
37 | {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5}, |
38 | {0x1.ac4ce285df847p-58, 0x1.3f59f0e7c559dp-4}, |
39 | {-0x1.cfb654c0c3d98p-58, 0x1.7ee182602f10fp-4}, |
40 | {0x1.f7b8f29a05987p-58, 0x1.be39ebe6f07c3p-4}, |
41 | {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4}, |
42 | {-0x1.b485914dacf8cp-59, 0x1.1e1fafb043727p-3}, |
43 | {0x1.61a3b0ce9281bp-57, 0x1.3d6eee8c6626cp-3}, |
44 | {-0x1.054ab2c010f3dp-58, 0x1.5c9811e3ec26ap-3}, |
45 | {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3}, |
46 | {0x1.cf601e7b4348ep-59, 0x1.9a6a8e96c8626p-3}, |
47 | {0x1.17b10d2e0e5abp-61, 0x1.b90d7529260a2p-3}, |
48 | {0x1.c648d1534597ep-57, 0x1.d77d5df205736p-3}, |
49 | {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3}, |
50 | {0x1.62e47390cb865p-56, 0x1.09dc597d86362p-2}, |
51 | {0x1.30ca4748b1bf9p-57, 0x1.18bf5a30bf178p-2}, |
52 | {-0x1.077cdd36dfc81p-56, 0x1.278372057ef46p-2}, |
53 | {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2}, |
54 | {-0x1.5d5e43c55b3bap-56, 0x1.44aa436c2af0ap-2}, |
55 | {-0x1.2566480884082p-57, 0x1.530ad9951cd4ap-2}, |
56 | {-0x1.a725715711fp-56, 0x1.614840309cfe2p-2}, |
57 | {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2}, |
58 | {0x1.69c885c2b249ap-56, 0x1.7d5604b63b3f7p-2}, |
59 | {0x1.b6d0ba3748fa8p-56, 0x1.8b24d394a1b25p-2}, |
60 | {0x1.9e6c988fd0a77p-56, 0x1.98cd5454d6b18p-2}, |
61 | {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2}, |
62 | {0x1.ae187b1ca504p-56, 0x1.b3a911da65c6cp-2}, |
63 | {-0x1.cc1ce70934c34p-56, 0x1.c0db4c94ec9fp-2}, |
64 | {-0x1.a2cfa4418f1adp-56, 0x1.cde53432c1351p-2}, |
65 | {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2}, |
66 | {0x1.0e53dc1bf3435p-56, 0x1.e77eb7f175a34p-2}, |
67 | {-0x1.a3992dc382a23p-57, 0x1.f40dd0b541418p-2}, |
68 | {-0x1.b32c949c9d593p-55, 0x1.0039c73c1a40cp-1}, |
69 | {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1}, |
70 | {0x1.974fa13b5404fp-58, 0x1.0c6145b5b43dap-1}, |
71 | {-0x1.2bdaee1c0ee35p-58, 0x1.1255d9bfbd2a9p-1}, |
72 | {0x1.c621cec00c301p-55, 0x1.1835a88be7c13p-1}, |
73 | {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1}, |
74 | {0x1.c421c9f38224ep-57, 0x1.23b71e2cc9e6ap-1}, |
75 | {-0x1.09e73b0c6c087p-56, 0x1.2958e59308e31p-1}, |
76 | {0x1.c5d5e9ff0cf8dp-55, 0x1.2ee628406cbcap-1}, |
77 | {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1}, |
78 | {-0x1.2304331d8bf46p-55, 0x1.39c391cd4171ap-1}, |
79 | {0x1.ecf8b492644fp-56, 0x1.3f13fb89e96f4p-1}, |
80 | {-0x1.f76d0163f79c8p-56, 0x1.445065b795b56p-1}, |
81 | {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1}, |
82 | {0x1.4a33dbeb3796cp-55, 0x1.4e8de5bb6ec04p-1}, |
83 | {-0x1.1bb74abda520cp-55, 0x1.538f57b89061fp-1}, |
84 | {-0x1.5e5c9d8c5a95p-56, 0x1.587d81f732fbbp-1}, |
85 | {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1}, |
86 | {-0x1.2b785350ee8c1p-57, 0x1.6220d115d7b8ep-1}, |
87 | {-0x1.6ea6febe8bbbap-56, 0x1.66d663923e087p-1}, |
88 | {-0x1.a80386188c50ep-55, 0x1.6b798920b3d99p-1}, |
89 | {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1}, |
90 | {0x1.7b2a6165884a1p-59, 0x1.748978fba8e0fp-1}, |
91 | {0x1.406a08980374p-55, 0x1.78f6bbd5d315ep-1}, |
92 | {0x1.560821e2f3aa9p-55, 0x1.7d528289fa093p-1}, |
93 | {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1}, |
94 | {0x1.6b66e7fc8b8c3p-57, 0x1.85d69576cc2c5p-1}, |
95 | {-0x1.55b9a5e177a1bp-55, 0x1.89ff5ff57f1f8p-1}, |
96 | {-0x1.ec182ab042f61p-56, 0x1.8e17aa99cc05ep-1}, |
97 | {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1}, |
98 | }; |
99 | |
100 | // Approximate atan(x) for |x| <= 2^-7. |
101 | // Using degree-9 Taylor polynomial: |
102 | // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9; |
103 | // Then the absolute error is bounded by: |
104 | // |atan(x) - P(x)| < |x|^11/11 < 2^(-7*11) / 11 < 2^-80. |
105 | // And the relative error is bounded by: |
106 | // |(atan(x) - P(x))/atan(x)| < |x|^10 / 10 < 2^-73. |
107 | // For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than |
108 | // ulp(x_hi^3 / 3) gives us: |
109 | // P(x) ~ x_hi - x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + |
110 | // + x_lo * (1 - x_hi^2 + x_hi^4) |
111 | // Since p.lo is ~ x^3/3, the relative error from rounding is bounded by: |
112 | // |(atan(x) - P(x))/atan(x)| < ulp(x^2) <= 2^(-14-52) = 2^-66. |
113 | [[maybe_unused]] DoubleDouble atan_eval(const DoubleDouble &x) { |
114 | DoubleDouble p; |
115 | p.hi = x.hi; |
116 | double x_hi_sq = x.hi * x.hi; |
117 | // c0 ~ x_hi^2 * 1/5 - 1/3 |
118 | double c0 = fputil::multiply_add(x_hi_sq, 0x1.999999999999ap-3, |
119 | -0x1.5555555555555p-2); |
120 | // c1 ~ x_hi^2 * 1/9 - 1/7 |
121 | double c1 = fputil::multiply_add(x_hi_sq, 0x1.c71c71c71c71cp-4, |
122 | -0x1.2492492492492p-3); |
123 | // x_hi^3 |
124 | double x_hi_3 = x_hi_sq * x.hi; |
125 | // x_hi^4 |
126 | double x_hi_4 = x_hi_sq * x_hi_sq; |
127 | // d0 ~ 1/3 - x_hi^2 / 5 + x_hi^4 / 7 - x_hi^6 / 9 |
128 | double d0 = fputil::multiply_add(x_hi_4, c1, c0); |
129 | // x_lo - x_lo * x_hi^2 + x_lo * x_hi^4 |
130 | double d1 = fputil::multiply_add(x_hi_4 - x_hi_sq, x.lo, x.lo); |
131 | // p.lo ~ -x_hi^3/3 + x_hi^5/5 - x_hi^7/7 + x_hi^9/9 + |
132 | // + x_lo * (1 - x_hi^2 + x_hi^4) |
133 | p.lo = fputil::multiply_add(x_hi_3, d0, d1); |
134 | return p; |
135 | } |
136 | |
137 | // Float128 versions. |
138 | // atan(i/64) with i = 0..64, generated by Sollya with: |
139 | // > for i from 1 to 64 do { |
140 | // a = round(atan(i/64), 128, RN); |
141 | // ll = ceil(log2(a)); |
142 | // b = 2^ll + a; |
143 | // print("{Sign::POS, ", 2^(ll - 128), ",", b, "},"); |
144 | // }; |
145 | constexpr Float128 ATAN_I_F128[65] = { |
146 | {Sign::POS, 0, 0_u128}, |
147 | {Sign::POS, -134, 0xfffaaadd'db94d5bb'e78c5640'15f76048_u128}, |
148 | {Sign::POS, -133, 0xffeaaddd'4bb12542'779d776d'da8c6214_u128}, |
149 | {Sign::POS, -132, 0xbfdc0c21'86d14fcf'220e10d6'1df56ec7_u128}, |
150 | {Sign::POS, -132, 0xffaaddb9'67ef4e36'cb2792dc'0e2e0d51_u128}, |
151 | {Sign::POS, -131, 0x9facf873'e2aceb58'99c50bbf'08e6cdf6_u128}, |
152 | {Sign::POS, -131, 0xbf70c130'17887460'93567e78'4cf83676_u128}, |
153 | {Sign::POS, -131, 0xdf1cf5f3'783e1bef'71e5340b'30e5d9ef_u128}, |
154 | {Sign::POS, -131, 0xfeadd4d5'617b6e32'c897989f'3e888ef8_u128}, |
155 | {Sign::POS, -130, 0x8f0fd7d8'21b93725'bd375929'83a0af9a_u128}, |
156 | {Sign::POS, -130, 0x9eb77746'331362c3'47619d25'0360fe85_u128}, |
157 | {Sign::POS, -130, 0xae4c08f1'f6134efa'b54d3fef'0c2de994_u128}, |
158 | {Sign::POS, -130, 0xbdcbda5e'72d81134'7b0b4f88'1c9c7488_u128}, |
159 | {Sign::POS, -130, 0xcd35474b'643130e7'b00f3da1'a46eeb3b_u128}, |
160 | {Sign::POS, -130, 0xdc86ba94'93051022'f621a5c1'cb552f03_u128}, |
161 | {Sign::POS, -130, 0xebbeaef9'02b9b38c'91a2a68b'2fbd78e8_u128}, |
162 | {Sign::POS, -130, 0xfadbafc9'6406eb15'6dc79ef5'f7a217e6_u128}, |
163 | {Sign::POS, -129, 0x84ee2cbe'c31b12c5'c8e72197'0cabd3a3_u128}, |
164 | {Sign::POS, -129, 0x8c5fad18'5f8bc130'ca4748b1'bf88298d_u128}, |
165 | {Sign::POS, -129, 0x93c1b902'bf7a2df1'06459240'6fe1447a_u128}, |
166 | {Sign::POS, -129, 0x9b13b9b8'3f5e5e69'c5abb498'd27af328_u128}, |
167 | {Sign::POS, -129, 0xa25521b6'15784d45'43787549'88b8d9e3_u128}, |
168 | {Sign::POS, -129, 0xa9856cca'8e6a4eda'99b7f77b'f7d9e8c1_u128}, |
169 | {Sign::POS, -129, 0xb0a42018'4e7f0cb1'b51d51dc'200a0fc3_u128}, |
170 | {Sign::POS, -129, 0xb7b0ca0f'26f78473'8aa32122'dcfe4483_u128}, |
171 | {Sign::POS, -129, 0xbeab025b'1d9fbad3'910b8564'93411026_u128}, |
172 | {Sign::POS, -129, 0xc59269ca'50d92b6d'a1746e91'f50a28de_u128}, |
173 | {Sign::POS, -129, 0xcc66aa2a'6b58c33c'd9311fa1'4ed9b7c4_u128}, |
174 | {Sign::POS, -129, 0xd327761e'611fe5b6'427c95e9'001e7136_u128}, |
175 | {Sign::POS, -129, 0xd9d488ed'32e3635c'30f6394a'0806345d_u128}, |
176 | {Sign::POS, -129, 0xe06da64a'764f7c67'c631ed96'798cb804_u128}, |
177 | {Sign::POS, -129, 0xe6f29a19'609a84ba'60b77ce1'ca6dc2c8_u128}, |
178 | {Sign::POS, -129, 0xed63382b'0dda7b45'6fe445ec'bc3a8d03_u128}, |
179 | {Sign::POS, -129, 0xf3bf5bf8'bad1a21c'a7b837e6'86adf3fa_u128}, |
180 | {Sign::POS, -129, 0xfa06e85a'a0a0be5c'66d23c7d'5dc8ecc2_u128}, |
181 | {Sign::POS, -128, 0x801ce39e'0d205c99'a6d6c6c5'4d938596_u128}, |
182 | {Sign::POS, -128, 0x832bf4a6'd9867e2a'4b6a09cb'61a515c1_u128}, |
183 | {Sign::POS, -128, 0x8630a2da'da1ed065'd3e84ed5'013ca37e_u128}, |
184 | {Sign::POS, -128, 0x892aecdf'de9547b5'094478fc'472b4afc_u128}, |
185 | {Sign::POS, -128, 0x8c1ad445'f3e09b8c'439d8018'60205921_u128}, |
186 | {Sign::POS, -128, 0x8f005d5e'f7f59f9b'5c835e16'65c43748_u128}, |
187 | {Sign::POS, -128, 0x91db8f16'64f350e2'10e4f9c1'126e0220_u128}, |
188 | {Sign::POS, -128, 0x94ac72c9'847186f6'18c4f393'f78a32f9_u128}, |
189 | {Sign::POS, -128, 0x97731420'365e538b'abd3fe19'f1aeb6b3_u128}, |
190 | {Sign::POS, -128, 0x9a2f80e6'71bdda20'4226f8e2'204ff3bd_u128}, |
191 | {Sign::POS, -128, 0x9ce1c8e6'a0b8cdb9'f799c4e8'174cf11c_u128}, |
192 | {Sign::POS, -128, 0x9f89fdc4'f4b7a1ec'f8b49264'4f0701e0_u128}, |
193 | {Sign::POS, -128, 0xa22832db'cadaae08'92fe9c08'637af0e6_u128}, |
194 | {Sign::POS, -128, 0xa4bc7d19'34f70924'19a87f2a'457dac9f_u128}, |
195 | {Sign::POS, -128, 0xa746f2dd'b7602294'67b7d66f'2d74e019_u128}, |
196 | {Sign::POS, -128, 0xa9c7abdc'4830f5c8'916a84b5'be7933f6_u128}, |
197 | {Sign::POS, -128, 0xac3ec0fb'997dd6a1'a36273a5'6afa8ef4_u128}, |
198 | {Sign::POS, -128, 0xaeac4c38'b4d8c080'14725e2f'3e52070a_u128}, |
199 | {Sign::POS, -128, 0xb110688a'ebdc6f6a'43d65788'b9f6a7b5_u128}, |
200 | {Sign::POS, -128, 0xb36b31c9'1f043691'59014174'4462f93a_u128}, |
201 | {Sign::POS, -128, 0xb5bcc490'59ecc4af'f8f3cee7'5e3907d5_u128}, |
202 | {Sign::POS, -128, 0xb8053e2b'c2319e73'cb2da552'10a4443d_u128}, |
203 | {Sign::POS, -128, 0xba44bc7d'd470782f'654c2cb1'0942e386_u128}, |
204 | {Sign::POS, -128, 0xbc7b5dea'e98af280'd4113006'e80fb290_u128}, |
205 | {Sign::POS, -128, 0xbea94144'fd049aac'1043c5e7'55282e7d_u128}, |
206 | {Sign::POS, -128, 0xc0ce85b8'ac526640'89dd62c4'6e92fa25_u128}, |
207 | {Sign::POS, -128, 0xc2eb4abb'661628b5'b373fe45'c61bb9fb_u128}, |
208 | {Sign::POS, -128, 0xc4ffaffa'bf8fbd54'8cb43d10'bc9e0221_u128}, |
209 | {Sign::POS, -128, 0xc70bd54c'e602ee13'e7d54fbd'09f2be38_u128}, |
210 | {Sign::POS, -128, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}, |
211 | }; |
212 | |
213 | // Degree-13 minimax polynomial generated by Sollya with: |
214 | // > P = fpminimax(atan(x), [|1, 3, 5, 7, 9, 11, 13|], [|1, 128...|], |
215 | // [0, 2^-7]); |
216 | // > dirtyinfnorm(atan(x) - P, [0, 2^-7]); |
217 | // 0x1.26016ad97f323875760f869684c0898d7b7bb8bep-122 |
218 | constexpr Float128 ATAN_POLY_F128[] = { |
219 | {Sign::NEG, -129, 0xaaaaaaaa'aaaaaaaa'aaaaaaa6'003c5d1d_u128}, |
220 | {Sign::POS, -130, 0xcccccccc'cccccccc'cca00232'8776b063_u128}, |
221 | {Sign::NEG, -130, 0x92492492'49249201'27f5268a'cb24aec0_u128}, |
222 | {Sign::POS, -131, 0xe38e38e3'8dce3d96'626a1643'f8eb68f3_u128}, |
223 | {Sign::NEG, -131, 0xba2e8b7a'ea4ad00f'005a35c7'6ef609b1_u128}, |
224 | {Sign::POS, -131, 0x9d82765e'd22a7d92'ac09c405'c0a69214_u128}, |
225 | }; |
226 | |
227 | // Approximate atan for |x| <= 2^-7. |
228 | [[maybe_unused]] Float128 atan_eval(const Float128 &x) { |
229 | Float128 x_sq = fputil::quick_mul(x, x); |
230 | Float128 x3 = fputil::quick_mul(x, x_sq); |
231 | Float128 p = fputil::polyeval(x_sq, ATAN_POLY_F128[0], ATAN_POLY_F128[1], |
232 | ATAN_POLY_F128[2], ATAN_POLY_F128[3], |
233 | ATAN_POLY_F128[4], ATAN_POLY_F128[5]); |
234 | return fputil::multiply_add(x3, p, x); |
235 | } |
236 | |
237 | } // anonymous namespace |
238 | |
239 | } // namespace LIBC_NAMESPACE_DECL |
240 | |
241 | #endif // LLVM_LIBC_SRC_MATH_GENERIC_ATAN_UTILS_H |
242 | |