exp2m1f.cpp source code [libc/src/math/generic/exp2m1f.cpp]

1	//===-- Implementation of exp2m1f function --------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/exp2m1f.h"
10	#include "src/__support/FPUtil/FEnvImpl.h"
11	#include "src/__support/FPUtil/FPBits.h"
12	#include "src/__support/FPUtil/PolyEval.h"
13	#include "src/__support/FPUtil/except_value_utils.h"
14	#include "src/__support/FPUtil/multiply_add.h"
15	#include "src/__support/FPUtil/rounding_mode.h"
16	#include "src/__support/common.h"
17	#include "src/__support/libc_errno.h"
18	#include "src/__support/macros/config.h"
19	#include "src/__support/macros/optimization.h"
20	#include "src/__support/macros/properties/cpu_features.h"
21
22	#include "explogxf.h"
23
24	namespace LIBC_NAMESPACE_DECL {
25
26	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
27	static constexpr size_t N_EXCEPTS_LO = `8`;
28
29	static constexpr fputil::ExceptValues<float, N_EXCEPTS_LO> EXP2M1F_EXCEPTS_LO =
30	{{
31	// (input, RZ output, RU offset, RD offset, RN offset)
32	// x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ)
33	{`0x2d9b'6e47U`, `0x2d57'7909U`, `1U`, `0U`, `0U`},
34	// x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ)
35	{`0x3611'249bU`, `0x35c9'3607U`, `1U`, `0U`, `1U`},
36	// x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ)
37	{`0x35e8'b690U`, `0x35a1'4df6U`, `1U`, `0U`, `1U`},
38	// x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ)
39	{`0x388b'ca4fU`, `0x3841'cb80U`, `1U`, `0U`, `1U`},
40	// x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ)
41	{`0xacce'1f0fU`, `0xac8e'df56U`, `0U`, `1U`, `0U`},
42	// x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ)
43	{`0xafa6'c4daU`, `0xaf67'30dbU`, `0U`, `1U`, `1U`},
44	// x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ)
45	{`0xbad3'7562U`, `0xba92'7d6dU`, `0U`, `1U`, `1U`},
46	// x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ)
47	{`0xbcf3'a937U`, `0xbca7'29efU`, `0U`, `1U`, `1U`},
48	}};
49
50	static constexpr size_t N_EXCEPTS_HI = `3`;
51
52	static constexpr fputil::ExceptValues<float, N_EXCEPTS_HI> EXP2M1F_EXCEPTS_HI =
53	{{
54	// (input, RZ output, RU offset, RD offset, RN offset)
55	// x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ)
56	{`0x3f0b'54b9U`, `0x3eea'a2d9U`, `1U`, `0U`, `0U`},
57	// x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ)
58	{`0xbd4f'8956U`, `0xbd0d'5b46U`, `0U`, `1U`, `0U`},
59	// x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ)
60	{`0xbd6f'3dceU`, `0xbd22'847aU`, `0U`, `1U`, `1U`},
61	}};
62	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
63
64	LLVM_LIBC_FUNCTION(float, exp2m1f, (float x)) {
65	using FPBits = fputil::FPBits<float>;
66	FPBits xbits(x);
67
68	uint32_t x_u = xbits.uintval();
69	uint32_t x_abs = x_u & `0x7fff'ffffU`;
70
71	// When \|x\| >= 128, or x is nan, or \|x\| <= 2^-5
72	if (LIBC_UNLIKELY(x_abs >= `0x4300'0000U` \|\| x_abs <= `0x3d00'0000U`)) {
73	// \|x\| <= 2^-5
74	if (x_abs <= `0x3d00'0000U`) {
75	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
76	if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
77	return r.value();
78	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
79
80	// Minimax polynomial generated by Sollya with:
81	// > display = hexadecimal;
82	// > fpminimax((2^x - 1)/x, 5, [\|D...\|], [-2^-5, 2^-5]);
83	constexpr double COEFFS[] = {
84	`0x1.62e42fefa39f3p-1`, `0x1.ebfbdff82c57bp-3`, `0x1.c6b08d6f2d7aap-5`,
85	`0x1.3b2ab6fc92f5dp-7`, `0x1.5d897cfe27125p-10`, `0x1.43090e61e6af1p-13`};
86	double xd = x;
87	double xsq = xd * xd;
88	double c0 = fputil::multiply_add(xd, COEFFS[`1`], COEFFS[`0`]);
89	double c1 = fputil::multiply_add(xd, COEFFS[`3`], COEFFS[`2`]);
90	double c2 = fputil::multiply_add(xd, COEFFS[`5`], COEFFS[`4`]);
91	double p = fputil::polyeval(xsq, c0, c1, c2);
92	return static_cast<float>(p * xd);
93	}
94
95	// x >= 128, or x is nan
96	if (xbits.is_pos()) {
97	if (xbits.is_finite()) {
98	int rounding = fputil::quick_get_round();
99	if (rounding == FE_DOWNWARD \|\| rounding == FE_TOWARDZERO)
100	return FPBits::max_normal().get_val();
101
102	fputil::set_errno_if_required(ERANGE);
103	fputil::raise_except_if_required(FE_OVERFLOW);
104	}
105
106	// x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan
107	return x + FPBits::inf().get_val();
108	}
109	}
110
111	if (LIBC_UNLIKELY(x <= -`25.0f`)) {
112	// 2^(-inf) - 1 = -1
113	if (xbits.is_inf())
114	return -`1.0f`;
115	// 2^nan - 1 = nan
116	if (xbits.is_nan())
117	return x;
118
119	int rounding = fputil::quick_get_round();
120	if (rounding == FE_UPWARD \|\| rounding == FE_TOWARDZERO)
121	return -`0x1.ffff'fep-1f`; // -1.0f + 0x1.0p-24f
122
123	fputil::set_errno_if_required(ERANGE);
124	fputil::raise_except_if_required(FE_UNDERFLOW);
125	return -`1.0f`;
126	}
127
128	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
129	if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
130	return r.value();
131	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
132
133	// For -25 < x < 128, to compute 2^x, we perform the following range
134	// reduction: find hi, mid, lo such that:
135	// x = hi + mid + lo, in which:
136	// hi is an integer,
137	// 0 <= mid 2^5 < 32 is an integer,*
138	// -2^(-6) <= lo <= 2^(-6).
139	// In particular,
140	// hi + mid = round(x 2^5) * 2^(-5).*
141	// Then,
142	// 2^x = 2^(hi + mid + lo) = 2^hi 2^mid * 2^lo.*
143	// 2^mid is stored in the lookup table of 32 elements.
144	// 2^lo is computed using a degree-4 minimax polynomial generated by Sollya.
145	// We perform 2^hi 2^mid by simply add hi to the exponent field of 2^mid.*
146
147	// kf = (hi + mid) 2^5 = round(x * 2^5)*
148	float kf;
149	int k;
150	#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
151	kf = fputil::nearest_integer(x * `32.0f`);
152	k = static_cast<int>(kf);
153	#else
154	constexpr float HALF[`2`] = {`0.5f`, -`0.5f`};
155	k = static_cast<int>(fputil::multiply_add(x, `32.0f`, HALF[x < `0.0f`]));
156	kf = static_cast<float>(k);
157	#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
158
159	// lo = x - (hi + mid) = x - kf 2^(-5)*
160	double lo = fputil::multiply_add(-`0x1.0p-5f`, kf, x);
161
162	// hi = floor(kf 2^(-4))*
163	// exp2_hi = shift hi to the exponent field of double precision.
164	int64_t exp2_hi =
165	static_cast<int64_t>(static_cast<uint64_t>(k >> ExpBase::MID_BITS)
166	<< fputil::FPBits<double>::FRACTION_LEN);
167	// mh = 2^hi 2^mid*
168	// mh_bits = bit field of mh
169	int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi;
170	double mh = fputil::FPBits<double>(static_cast<uint64_t>(mh_bits)).get_val();
171
172	// Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with:
173	// > display = hexadecimal;
174	// > fpminimax((2^x - 1)/x, 4, [\|D...\|], [-2^-6, 2^-6]);
175	constexpr double COEFFS[`5`] = {`0x1.62e42fefa39efp-1`, `0x1.ebfbdff8131c4p-3`,
176	`0x1.c6b08d7061695p-5`, `0x1.3b2b1bee74b2ap-7`,
177	`0x1.5d88091198529p-10`};
178	double lo_sq = lo * lo;
179	double c1 = fputil::multiply_add(lo, COEFFS[`0`], `1.0`);
180	double c2 = fputil::multiply_add(lo, COEFFS[`2`], COEFFS[`1`]);
181	double c3 = fputil::multiply_add(lo, COEFFS[`4`], COEFFS[`3`]);
182	double exp2_lo = fputil::polyeval(lo_sq, c1, c2, c3);
183	// 2^x - 1 = 2^(hi + mid + lo) - 1
184	// = 2^(hi + mid) 2^lo - 1*
185	// ~ mh (1 + lo * P(lo)) - 1*
186	// = mh exp2_lo - 1*
187	return static_cast<float>(fputil::multiply_add(exp2_lo, mh, -`1.0`));
188	}
189
190	} // namespace LIBC_NAMESPACE_DECL
191

source code of libc/src/math/generic/exp2m1f.cpp