sincosf.cpp source code [libc/src/math/generic/sincosf.cpp]

1	//===-- Single-precision sincos function ----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/sincosf.h"
10	#include "sincosf_utils.h"
11	#include "src/__support/FPUtil/FEnvImpl.h"
12	#include "src/__support/FPUtil/FPBits.h"
13	#include "src/__support/FPUtil/multiply_add.h"
14	#include "src/__support/FPUtil/rounding_mode.h"
15	#include "src/__support/common.h"
16	#include "src/__support/macros/config.h"
17	#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18	#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
19
20	namespace LIBC_NAMESPACE_DECL {
21
22	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
23	// Exceptional values
24	static constexpr int N_EXCEPTS = `6`;
25
26	static constexpr uint32_t EXCEPT_INPUTS[N_EXCEPTS] = {
27	`0x46199998`, // x = 0x1.33333p13 x
28	`0x55325019`, // x = 0x1.64a032p43 x
29	`0x5922aa80`, // x = 0x1.4555p51 x
30	`0x5f18b878`, // x = 0x1.3170fp63 x
31	`0x6115cb11`, // x = 0x1.2b9622p67 x
32	`0x7beef5ef`, // x = 0x1.ddebdep120 x
33	};
34
35	static constexpr uint32_t EXCEPT_OUTPUTS_SIN[N_EXCEPTS][`4`] = {
36	{`0xbeb1fa5d`, `0`, `1`, `0`}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
37	{`0xbf171adf`, `0`, `1`, `1`}, // x = 0x1.64a032p43, sin(x) = -0x1.2e35bep-1 (RZ)
38	{`0xbf587521`, `0`, `1`, `1`}, // x = 0x1.4555p51, sin(x) = -0x1.b0ea42p-1 (RZ)
39	{`0x3dad60f6`, `1`, `0`, `1`}, // x = 0x1.3170fp63, sin(x) = 0x1.5ac1ecp-4 (RZ)
40	{`0xbe7cc1e0`, `0`, `1`, `1`}, // x = 0x1.2b9622p67, sin(x) = -0x1.f983cp-3 (RZ)
41	{`0xbf587d1b`, `0`, `1`, `1`}, // x = 0x1.ddebdep120, sin(x) = -0x1.b0fa36p-1 (RZ)
42	};
43
44	static constexpr uint32_t EXCEPT_OUTPUTS_COS[N_EXCEPTS][`4`] = {
45	{`0xbf70090b`, `0`, `1`, `0`}, // x = 0x1.33333p13, cos(x) = -0x1.e01216p-1 (RZ)
46	{`0x3f4ea5d2`, `1`, `0`, `0`}, // x = 0x1.64a032p43, cos(x) = 0x1.9d4ba4p-1 (RZ)
47	{`0x3f08aebe`, `1`, `0`, `1`}, // x = 0x1.4555p51, cos(x) = 0x1.115d7cp-1 (RZ)
48	{`0x3f7f14bb`, `1`, `0`, `0`}, // x = 0x1.3170fp63, cos(x) = 0x1.fe2976p-1 (RZ)
49	{`0x3f78142e`, `1`, `0`, `1`}, // x = 0x1.2b9622p67, cos(x) = 0x1.f0285cp-1 (RZ)
50	{`0x3f08a21c`, `1`, `0`, `0`}, // x = 0x1.ddebdep120, cos(x) = 0x1.114438p-1 (RZ)
51	};
52	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
53
54	LLVM_LIBC_FUNCTION(void, sincosf, (float x, float sinp, float* *cosp)) {
55	using FPBits = typename fputil::FPBits<float>;
56	FPBits xbits(x);
57
58	uint32_t x_abs = xbits.uintval() & `0x7fff'ffffU`;
59	double xd = static_cast<double>(x);
60
61	// Range reduction:
62	// For \|x\| >= 2^-12, we perform range reduction as follows:
63	// Find k and y such that:
64	// x = (k + y) pi/32*
65	// k is an integer
66	// \|y\| < 0.5
67	// For small range (\|x\| < 2^45 when FMA instructions are available, 2^22
68	// otherwise), this is done by performing:
69	// k = round(x 32/pi)*
70	// y = x 32/pi - k*
71	// For large range, we will omit all the higher parts of 32/pi such that the
72	// least significant bits of their full products with x are larger than 63,
73	// since:
74	// sin((k + y + 64i) * pi/32) = sin(x + i * 2pi) = sin(x), and*
75	// cos((k + y + 64i) * pi/32) = cos(x + i * 2pi) = cos(x).*
76	//
77	// When FMA instructions are not available, we store the digits of 32/pi in
78	// chunks of 28-bit precision. This will make sure that the products:
79	// x THIRTYTWO_OVER_PI_28[i] are all exact.*
80	// When FMA instructions are available, we simply store the digits of326/pi in
81	// chunks of doubles (53-bit of precision).
82	// So when multiplying by the largest values of single precision, the
83	// resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
84	// worst-case analysis of range reduction, \|y\| >= 2^-38, so this should give
85	// us more than 40 bits of accuracy. For the worst-case estimation of range
86	// reduction, see for instances:
87	// Elementary Functions by J-M. Muller, Chapter 11,
88	// Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
89	// Chapter 10.2.
90	//
91	// Once k and y are computed, we then deduce the answer by the sine and cosine
92	// of sum formulas:
93	// sin(x) = sin((k + y)pi/32)*
94	// = sin(ypi/32) * cos(kpi/32) + cos(ypi/32) * sin(kpi/32)
95	// cos(x) = cos((k + y)pi/32)*
96	// = cos(ypi/32) * cos(kpi/32) - sin(ypi/32) * sin(kpi/32)
97	// The values of sin(kpi/32) and cos(kpi/32) for k = 0..63 are precomputed
98	// and stored using a vector of 32 doubles. Sin(ypi/32) and cos(ypi/32) are
99	// computed using degree-7 and degree-6 minimax polynomials generated by
100	// Sollya respectively.
101
102	// \|x\| < 0x1.0p-12f
103	if (LIBC_UNLIKELY(x_abs < `0x3980'0000U`)) {
104	if (LIBC_UNLIKELY(x_abs == `0U`)) {
105	// For signed zeros.
106	*sinp = x;
107	*cosp = `1.0f`;
108	return;
109	}
110	// When \|x\| < 2^-12, the relative errors of the approximations
111	// sin(x) ~ x, cos(x) ~ 1
112	// are:
113	// \|sin(x) - x\| / \|sin(x)\| < \|x^3\| / (6\|x\|)
114	// = x^2 / 6
115	// < 2^-25
116	// < epsilon(1)/2.
117	// \|cos(x) - 1\| < \|x^2 / 2\| = 2^-25 < epsilon(1)/2.
118	// So the correctly rounded values of sin(x) and cos(x) are:
119	// sin(x) = x - sign(x)eps(x) if rounding mode = FE_TOWARDZERO,*
120	// or (rounding mode = FE_UPWARD and x is
121	// negative),
122	// = x otherwise.
123	// cos(x) = 1 - eps(x) if rounding mode = FE_TOWARDZERO or FE_DOWWARD,
124	// = 1 otherwise.
125	// To simplify the rounding decision and make it more efficient and to
126	// prevent compiler to perform constant folding, we use
127	// sin(x) = fma(x, -2^-25, x),
128	// cos(x) = fma(x0.5f, -x, 1)*
129	// instead.
130	// Note: to use the formula x - 2^-25x to decide the correct rounding, we*
131	// do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25x when*
132	// \|x\| < 2^-125. For targets without FMA instructions, we simply use
133	// double for intermediate results as it is more efficient than using an
134	// emulated version of FMA.
135	#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT)
136	*sinp = fputil::multiply_add(x, -`0x1.0p-25f`, x);
137	*cosp = fputil::multiply_add(FPBits(x_abs).get_val(), -`0x1.0p-25f`, `1.0f`);
138	#else
139	sinp = static_cast<float*>(fputil::multiply_add(xd, -`0x1.0p-25`, xd));
140	cosp = static_cast<float*>(fputil::multiply_add(
141	static_cast<double>(FPBits(x_abs).get_val()), -`0x1.0p-25`, `1.0`));
142	#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
143	return;
144	}
145
146	// x is inf or nan.
147	if (LIBC_UNLIKELY(x_abs >= `0x7f80'0000U`)) {
148	if (xbits.is_signaling_nan()) {
149	fputil::raise_except_if_required(FE_INVALID);
150	sinp = cosp = FPBits::quiet_nan().get_val();
151	return;
152	}
153
154	if (x_abs == `0x7f80'0000U`) {
155	fputil::set_errno_if_required(EDOM);
156	fputil::raise_except_if_required(FE_INVALID);
157	}
158	*sinp = FPBits::quiet_nan().get_val();
159	cosp = sinp;
160	return;
161	}
162
163	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
164	// Check exceptional values.
165	for (int i = `0`; i < N_EXCEPTS; ++i) {
166	if (LIBC_UNLIKELY(x_abs == EXCEPT_INPUTS[i])) {
167	uint32_t s = EXCEPT_OUTPUTS_SIN[i][`0`]; // FE_TOWARDZERO
168	uint32_t c = EXCEPT_OUTPUTS_COS[i][`0`]; // FE_TOWARDZERO
169	bool x_sign = x < `0`;
170	switch (fputil::quick_get_round()) {
171	case FE_UPWARD:
172	s += x_sign ? EXCEPT_OUTPUTS_SIN[i][`2`] : EXCEPT_OUTPUTS_SIN[i][`1`];
173	c += EXCEPT_OUTPUTS_COS[i][`1`];
174	break;
175	case FE_DOWNWARD:
176	s += x_sign ? EXCEPT_OUTPUTS_SIN[i][`1`] : EXCEPT_OUTPUTS_SIN[i][`2`];
177	c += EXCEPT_OUTPUTS_COS[i][`2`];
178	break;
179	case FE_TONEAREST:
180	s += EXCEPT_OUTPUTS_SIN[i][`3`];
181	c += EXCEPT_OUTPUTS_COS[i][`3`];
182	break;
183	}
184	*sinp = x_sign ? -FPBits(s).get_val() : FPBits(s).get_val();
185	*cosp = FPBits(c).get_val();
186
187	return;
188	}
189	}
190	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
191
192	// Combine the results with the sine and cosine of sum formulas:
193	// sin(x) = sin((k + y)pi/32)*
194	// = sin(ypi/32) * cos(kpi/32) + cos(ypi/32) * sin(kpi/32)
195	// = sin_y cos_k + (1 + cosm1_y) * sin_k*
196	// = sin_y cos_k + (cosm1_y * sin_k + sin_k)*
197	// cos(x) = cos((k + y)pi/32)*
198	// = cos(ypi/32) * cos(kpi/32) - sin(ypi/32) * sin(kpi/32)
199	// = cosm1_y cos_k + sin_y * sin_k*
200	// = (cosm1_y cos_k + cos_k) + sin_y * sin_k*
201	double sin_k, cos_k, sin_y, cosm1_y;
202
203	sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
204
205	sinp = static_cast<float*>(fputil::multiply_add(
206	sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k)));
207	cosp = static_cast<float*>(fputil::multiply_add(
208	sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k)));
209	}
210
211	} // namespace LIBC_NAMESPACE_DECL
212

source code of libc/src/math/generic/sincosf.cpp