expxf16.h source code [libc/src/math/generic/expxf16.h]

1	//===-- Common utilities for half-precision exponential functions ---------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
10	#define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
11
12	#include "src/__support/FPUtil/FPBits.h"
13	#include "src/__support/FPUtil/cast.h"
14	#include "src/__support/FPUtil/multiply_add.h"
15	#include "src/__support/FPUtil/nearest_integer.h"
16	#include "src/__support/macros/attributes.h"
17	#include "src/__support/macros/config.h"
18	#include <stdint.h>
19
20	#include "src/__support/math/expf16_utils.h"
21
22	namespace LIBC_NAMESPACE_DECL {
23
24	// Generated by Sollya with the following commands:
25	// > display = hexadecimal;
26	// > for i from 0 to 7 do printsingle(round(2^(i 2^-3), SG, RN));*
27	constexpr cpp::array<uint32_t, `8`> EXP2_MID_BITS = {
28	`0x3f80'0000U`, `0x3f8b'95c2U`, `0x3f98'37f0U`, `0x3fa5'fed7U`,
29	`0x3fb5'04f3U`, `0x3fc5'672aU`, `0x3fd7'44fdU`, `0x3fea'c0c7U`,
30	};
31
32	LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
33	// For -25 < x < 16, to compute 2^x, we perform the following range reduction:
34	// find hi, mid, lo, such that:
35	// x = hi + mid + lo, in which
36	// hi is an integer,
37	// mid 2^3 is an integer,*
38	// -2^(-4) <= lo < 2^(-4).
39	// In particular,
40	// hi + mid = round(x 2^3) * 2^(-3).*
41	// Then,
42	// 2^x = 2^(hi + mid + lo) = 2^hi 2^mid * 2^lo.*
43	// We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi 2^mid*
44	// by adding hi to the exponent field of 2^mid. 2^lo is computed using a
45	// degree-3 minimax polynomial generated by Sollya.
46
47	float xf = x;
48	float kf = fputil::nearest_integer(xf * `0x1.0p+3f`);
49	int x_hi_mid = static_cast<int>(kf);
50	unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> `3`;
51	unsigned x_mid = static_cast<unsigned>(x_hi_mid) & `0x7`;
52	// lo = x - (hi + mid) = round(x 2^3) * (-2^(-3)) + x*
53	float lo = fputil::multiply_add(kf, -`0x1.0p-3f`, xf);
54
55	uint32_t exp2_hi_mid_bits =
56	EXP2_MID_BITS[x_mid] +
57	static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
58	float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
59	// Degree-3 minimax polynomial generated by Sollya with the following
60	// commands:
61	// > display = hexadecimal;
62	// > P = fpminimax((2^x - 1)/x, 2, [\|SG...\|], [-2^-4, 2^-4]);
63	// > 1 + x P;*
64	float exp2_lo = fputil::polyeval(lo, `0x1p+0f`, `0x1.62e43p-1f`, `0x1.ec0aa6p-3f`,
65	`0x1.c6b4a6p-5f`);
66	return {exp2_hi_mid, exp2_lo};
67	}
68
69	// Generated by Sollya with the following commands:
70	// > display = hexadecimal;
71	// > round(log2(10), SG, RN);
72	static constexpr float LOG2F_10 = `0x1.a934fp+1f`;
73
74	// Generated by Sollya with the following commands:
75	// > display = hexadecimal;
76	// > round(log10(2), SG, RN);
77	static constexpr float LOG10F_2 = `0x1.344136p-2f`;
78
79	LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
80	// For -8 < x < 5, to compute 10^x, we perform the following range reduction:
81	// find hi, mid, lo, such that:
82	// x = (hi + mid) log2(10) + lo, in which*
83	// hi is an integer,
84	// mid 2^3 is an integer,*
85	// -2^(-4) <= lo < 2^(-4).
86	// In particular,
87	// hi + mid = round(x 2^3) * 2^(-3).*
88	// Then,
89	// 10^x = 10^(hi + mid + lo) = 2^((hi + mid) log2(10)) + 10^lo*
90	// We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi 2^mid*
91	// by adding hi to the exponent field of 2^mid. 10^lo is computed using a
92	// degree-4 minimax polynomial generated by Sollya.
93
94	float xf = x;
95	float kf = fputil::nearest_integer(xf * (LOG2F_10 * `0x1.0p+3f`));
96	int x_hi_mid = static_cast<int>(kf);
97	unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> `3`;
98	unsigned x_mid = static_cast<unsigned>(x_hi_mid) & `0x7`;
99	// lo = x - (hi + mid) = round(x 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x*
100	float lo = fputil::multiply_add(kf, LOG10F_2 * -`0x1.0p-3f`, xf);
101
102	uint32_t exp2_hi_mid_bits =
103	EXP2_MID_BITS[x_mid] +
104	static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
105	float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
106	// Degree-4 minimax polynomial generated by Sollya with the following
107	// commands:
108	// > display = hexadecimal;
109	// > P = fpminimax((10^x - 1)/x, 3, [\|SG...\|], [-2^-4, 2^-4]);
110	// > 1 + x P;*
111	float exp10_lo = fputil::polyeval(lo, `0x1p+0f`, `0x1.26bb14p+1f`, `0x1.53526p+1f`,
112	`0x1.04b434p+1f`, `0x1.2bcf9ep+0f`);
113	return {exp2_hi_mid, exp10_lo};
114	}
115
116	// Generated by Sollya with the following commands:
117	// > display = hexadecimal;
118	// > round(log2(exp(1)), SG, RN);
119	static constexpr float LOG2F_E = `0x1.715476p+0f`;
120
121	// Generated by Sollya with the following commands:
122	// > display = hexadecimal;
123	// > round(log(2), SG, RN);
124	static constexpr float LOGF_2 = `0x1.62e43p-1f`;
125
126	// Generated by Sollya with the following commands:
127	// > display = hexadecimal;
128	// > for i from 0 to 31 do printsingle(round(2^(i 2^-5), SG, RN));*
129	static constexpr cpp::array<uint32_t, `32`> EXP2_MID_5_BITS = {
130	`0x3f80'0000U`, `0x3f82'cd87U`, `0x3f85'aac3U`, `0x3f88'980fU`, `0x3f8b'95c2U`,
131	`0x3f8e'a43aU`, `0x3f91'c3d3U`, `0x3f94'f4f0U`, `0x3f98'37f0U`, `0x3f9b'8d3aU`,
132	`0x3f9e'f532U`, `0x3fa2'7043U`, `0x3fa5'fed7U`, `0x3fa9'a15bU`, `0x3fad'583fU`,
133	`0x3fb1'23f6U`, `0x3fb5'04f3U`, `0x3fb8'fbafU`, `0x3fbd'08a4U`, `0x3fc1'2c4dU`,
134	`0x3fc5'672aU`, `0x3fc9'b9beU`, `0x3fce'248cU`, `0x3fd2'a81eU`, `0x3fd7'44fdU`,
135	`0x3fdb'fbb8U`, `0x3fe0'ccdfU`, `0x3fe5'b907U`, `0x3fea'c0c7U`, `0x3fef'e4baU`,
136	`0x3ff5'257dU`, `0x3ffa'83b3U`,
137	};
138
139	// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
140	// and exp(-x) simultaneously.
141	// To compute e^x, we perform the following range reduction:
142	// find hi, mid, lo such that:
143	// x = (hi + mid) log(2) + lo, in which*
144	// hi is an integer,
145	// 0 <= mid 2^5 < 32 is an integer*
146	// -2^(-5) <= lo log2(e) <= 2^-5.*
147	// In particular,
148	// hi + mid = round(x log2(e) * 2^5) * 2^(-5).*
149	// Then,
150	// e^x = 2^(hi + mid) e^lo = 2^hi * 2^mid * e^lo.*
151	// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi 2^mid*
152	// by adding hi to the exponent field of 2^mid.
153	// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
154	// e^lo ~ P(lo)
155	// = 1 + lo + c2 lo^2 + ... + c5 * lo^5*
156	// = (1 + c2lo^2 + c4lo^4) + lo (1 + c3lo^2 + c5lo^4)*
157	// = P_even + lo P_odd*
158	// To compute e^(-x), notice that:
159	// e^(-x) = 2^(-(hi + mid)) e^(-lo)*
160	// ~ 2^(-(hi + mid)) P(-lo)*
161	// = 2^(-(hi + mid)) (P_even - lo * P_odd)*
162	// So:
163	// sinh(x) = (e^x - e^(-x)) / 2
164	// ~ 0.5 (2^(hi + mid) * (P_even + lo * P_odd) -*
165	// 2^(-(hi + mid)) (P_even - lo * P_odd))*
166	// = 0.5 (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +*
167	// lo P_odd * (2^(hi + mid) + 2^(-(hi + mid))))*
168	// And similarly:
169	// cosh(x) = (e^x + e^(-x)) / 2
170	// ~ 0.5 (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +*
171	// lo P_odd * (2^(hi + mid) - 2^(-(hi + mid))))*
172	// The main point of these formulas is that the expensive part of calculating
173	// the polynomials approximating lower parts of e^x and e^(-x) is shared and
174	// only done once.
175	template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
176	float xf = x;
177	float kf = fputil::nearest_integer(xf * (LOG2F_E * `0x1.0p+5f`));
178	int x_hi_mid_p = static_cast<int>(kf);
179	int x_hi_mid_m = -x_hi_mid_p;
180
181	unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> `5`;
182	unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> `5`;
183	unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & `0x1f`;
184	unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & `0x1f`;
185
186	uint32_t exp2_hi_mid_bits_p =
187	EXP2_MID_5_BITS[x_mid_p] +
188	static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
189	uint32_t exp2_hi_mid_bits_m =
190	EXP2_MID_5_BITS[x_mid_m] +
191	static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
192	// exp2_hi_mid_p = 2^(hi + mid)
193	float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
194	// exp2_hi_mid_m = 2^(-(hi + mid))
195	float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();
196
197	// exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
198	float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
199	// exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
200	float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
201
202	// lo = x - (hi + mid) = round(x log2(e) * 2^5) * log(2) * (-2^(-5)) + x*
203	float lo = fputil::multiply_add(kf, LOGF_2 * -`0x1.0p-5f`, xf);
204	float lo_sq = lo * lo;
205
206	// Degree-3 minimax polynomial generated by Sollya with the following
207	// commands:
208	// > display = hexadecimal;
209	// > P = fpminimax(expm1(x)/x, 2, [\|SG...\|], [-2^-5, 2^-5]);
210	// > 1 + x P;*
211	constexpr cpp::array<float, `4`> COEFFS = {`0x1p+0f`, `0x1p+0f`, `0x1.0004p-1f`,
212	`0x1.555778p-3f`};
213	float half_p_odd =
214	fputil::polyeval(lo_sq, COEFFS[`1`] * `0.5f`, COEFFS[`3`] * `0.5f`);
215	float half_p_even =
216	fputil::polyeval(lo_sq, COEFFS[`0`] * `0.5f`, COEFFS[`2`] * `0.5f`);
217
218	// sinh(x) = lo (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +*
219	// (0.5 P_even * (2^(hi + mid) - 2^(-(hi + mid))))*
220	if constexpr (IsSinh)
221	return fputil::cast<float16>(fputil::multiply_add(
222	lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
223	// cosh(x) = lo (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +*
224	// (0.5 P_even * (2^(hi + mid) + 2^(-(hi + mid))))*
225	return fputil::cast<float16>(fputil::multiply_add(
226	lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
227	}
228
229	// Generated by Sollya with the following commands:
230	// > display = hexadecimal;
231	// > for i from 0 to 31 do print(round(log(1 + i 2^-5), SG, RN));*
232	constexpr cpp::array<float, `32`> LOGF_F = {
233	`0x0p+0f`, `0x1.f829bp-6f`, `0x1.f0a30cp-5f`, `0x1.6f0d28p-4f`,
234	`0x1.e27076p-4f`, `0x1.29553p-3f`, `0x1.5ff308p-3f`, `0x1.9525aap-3f`,
235	`0x1.c8ff7cp-3f`, `0x1.fb9186p-3f`, `0x1.1675cap-2f`, `0x1.2e8e2cp-2f`,
236	`0x1.4618bcp-2f`, `0x1.5d1bdcp-2f`, `0x1.739d8p-2f`, `0x1.89a338p-2f`,
237	`0x1.9f323ep-2f`, `0x1.b44f78p-2f`, `0x1.c8ff7cp-2f`, `0x1.dd46ap-2f`,
238	`0x1.f128f6p-2f`, `0x1.02552ap-1f`, `0x1.0be72ep-1f`, `0x1.154c3ep-1f`,
239	`0x1.1e85f6p-1f`, `0x1.2795e2p-1f`, `0x1.307d74p-1f`, `0x1.393e0ep-1f`,
240	`0x1.41d8fep-1f`, `0x1.4a4f86p-1f`, `0x1.52a2d2p-1f`, `0x1.5ad404p-1f`,
241	};
242
243	// Generated by Sollya with the following commands:
244	// > display = hexadecimal;
245	// > for i from 0 to 31 do print(round(log2(1 + i 2^-5), SG, RN));*
246	constexpr cpp::array<float, `32`> LOG2F_F = {
247	`0x0p+0f`, `0x1.6bad38p-5f`, `0x1.663f7p-4f`, `0x1.08c588p-3f`,
248	`0x1.5c01a4p-3f`, `0x1.acf5e2p-3f`, `0x1.fbc16cp-3f`, `0x1.24407ap-2f`,
249	`0x1.49a784p-2f`, `0x1.6e221cp-2f`, `0x1.91bba8p-2f`, `0x1.b47ecp-2f`,
250	`0x1.d6753ep-2f`, `0x1.f7a856p-2f`, `0x1.0c105p-1f`, `0x1.1bf312p-1f`,
251	`0x1.2b8034p-1f`, `0x1.3abb4p-1f`, `0x1.49a784p-1f`, `0x1.584822p-1f`,
252	`0x1.66a008p-1f`, `0x1.74b1fep-1f`, `0x1.82809ep-1f`, `0x1.900e62p-1f`,
253	`0x1.9d5dap-1f`, `0x1.aa709p-1f`, `0x1.b74948p-1f`, `0x1.c3e9cap-1f`,
254	`0x1.d053f6p-1f`, `0x1.dc899ap-1f`, `0x1.e88c6cp-1f`, `0x1.f45e08p-1f`,
255	};
256
257	// Generated by Sollya with the following commands:
258	// > display = hexadecimal;
259	// > for i from 0 to 31 do print(round(log10(1 + i 2^-5), SG, RN));*
260	constexpr cpp::array<float, `32`> LOG10F_F = {
261	`0x0p+0f`, `0x1.b5e908p-7f`, `0x1.af5f92p-6f`, `0x1.3ed11ap-5f`,
262	`0x1.a30a9ep-5f`, `0x1.02428cp-4f`, `0x1.31b306p-4f`, `0x1.5fe804p-4f`,
263	`0x1.8cf184p-4f`, `0x1.b8de4ep-4f`, `0x1.e3bc1ap-4f`, `0x1.06cbd6p-3f`,
264	`0x1.1b3e72p-3f`, `0x1.2f3b6ap-3f`, `0x1.42c7e8p-3f`, `0x1.55e8c6p-3f`,
265	`0x1.68a288p-3f`, `0x1.7af974p-3f`, `0x1.8cf184p-3f`, `0x1.9e8e7cp-3f`,
266	`0x1.afd3e4p-3f`, `0x1.c0c514p-3f`, `0x1.d1653p-3f`, `0x1.e1b734p-3f`,
267	`0x1.f1bdeep-3f`, `0x1.00be06p-2f`, `0x1.087a08p-2f`, `0x1.101432p-2f`,
268	`0x1.178da6p-2f`, `0x1.1ee778p-2f`, `0x1.2622bp-2f`, `0x1.2d404cp-2f`,
269	};
270
271	// Generated by Sollya with the following commands:
272	// > display = hexadecimal;
273	// > for i from 0 to 31 do print(round(1 / (1 + i 2^-5), SG, RN));*
274	constexpr cpp::array<float, `32`> ONE_OVER_F_F = {
275	`0x1p+0f`, `0x1.f07c2p-1f`, `0x1.e1e1e2p-1f`, `0x1.d41d42p-1f`,
276	`0x1.c71c72p-1f`, `0x1.bacf92p-1f`, `0x1.af286cp-1f`, `0x1.a41a42p-1f`,
277	`0x1.99999ap-1f`, `0x1.8f9c18p-1f`, `0x1.861862p-1f`, `0x1.7d05f4p-1f`,
278	`0x1.745d18p-1f`, `0x1.6c16c2p-1f`, `0x1.642c86p-1f`, `0x1.5c9882p-1f`,
279	`0x1.555556p-1f`, `0x1.4e5e0ap-1f`, `0x1.47ae14p-1f`, `0x1.414142p-1f`,
280	`0x1.3b13b2p-1f`, `0x1.3521dp-1f`, `0x1.2f684cp-1f`, `0x1.29e412p-1f`,
281	`0x1.24924ap-1f`, `0x1.1f7048p-1f`, `0x1.1a7b96p-1f`, `0x1.15b1e6p-1f`,
282	`0x1.111112p-1f`, `0x1.0c9714p-1f`, `0x1.08421p-1f`, `0x1.041042p-1f`,
283	};
284
285	} // namespace LIBC_NAMESPACE_DECL
286
287	#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
288

source code of libc/src/math/generic/expxf16.h