exp2m1f16.cpp source code [libc/src/math/generic/exp2m1f16.cpp]

1	//===-- Half-precision 2^x - 1 function -----------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/exp2m1f16.h"
10	#include "expxf16.h"
11	#include "hdr/errno_macros.h"
12	#include "hdr/fenv_macros.h"
13	#include "src/__support/FPUtil/FEnvImpl.h"
14	#include "src/__support/FPUtil/FPBits.h"
15	#include "src/__support/FPUtil/PolyEval.h"
16	#include "src/__support/FPUtil/cast.h"
17	#include "src/__support/FPUtil/except_value_utils.h"
18	#include "src/__support/FPUtil/multiply_add.h"
19	#include "src/__support/FPUtil/rounding_mode.h"
20	#include "src/__support/common.h"
21	#include "src/__support/macros/config.h"
22	#include "src/__support/macros/optimization.h"
23	#include "src/__support/macros/properties/cpu_features.h"
24
25	namespace LIBC_NAMESPACE_DECL {
26
27	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
28	static constexpr fputil::ExceptValues<float16, `6`> EXP2M1F16_EXCEPTS_LO = {{
29	// (input, RZ output, RU offset, RD offset, RN offset)
30	// x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
31	{`0x0b3dU`, `0x0904U`, `1U`, `0U`, `1U`},
32	// x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
33	{`0x0d3fU`, `0x0b45U`, `1U`, `0U`, `1U`},
34	// x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
35	{`0x118cU`, `0x0fb1U`, `1U`, `0U`, `0U`},
36	// x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
37	{`0x21bcU`, `0x1ffaU`, `1U`, `0U`, `1U`},
38	// x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
39	{`0x9718U`, `0x94eaU`, `0U`, `1U`, `0U`},
40	// x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
41	{`0x973fU`, `0x9505U`, `0U`, `1U`, `0U`},
42	}};
43
44	#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
45	static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = `6`;
46	#else
47	static constexpr size_t N_EXP2M1F16_EXCEPTS_HI = `7`;
48	#endif
49
50	static constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
51	EXP2M1F16_EXCEPTS_HI = {{
52	// (input, RZ output, RU offset, RD offset, RN offset)
53	// x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
54	{`0x3396U`, `0x31b7U`, `1U`, `0U`, `0U`},
55	#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
56	// x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
57	{`0x34baU`, `0x3345U`, `1U`, `0U`, `0U`},
58	#endif
59	// x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
60	{`0x36b6U`, `0x3566U`, `1U`, `0U`, `0U`},
61	#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
62	// x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
63	{`0x37b7U`, `0x3659U`, `1U`, `0U`, `1U`},
64	#endif
65	// x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
66	{`0xb201U`, `0xafcdU`, `0U`, `1U`, `1U`},
67	// x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
68	{`0xb3ccU`, `0xb0f9U`, `0U`, `1U`, `0U`},
69	// x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
70	{`0xb8a5U`, `0xb54cU`, `0U`, `1U`, `1U`},
71	#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
72	// x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
73	{`0xba8dU`, `0xb6edU`, `0U`, `1U`, `1U`},
74	#endif
75	}};
76	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
77
78	LLVM_LIBC_FUNCTION(float16, exp2m1f16, (float16 x)) {
79	using FPBits = fputil::FPBits<float16>;
80	FPBits x_bits(x);
81
82	uint16_t x_u = x_bits.uintval();
83	uint16_t x_abs = x_u & `0x7fffU`;
84
85	// When \|x\| <= 2^(-3), or \|x\| >= 11, or x is NaN.
86	if (LIBC_UNLIKELY(x_abs <= `0x3000U` \|\| x_abs >= `0x4980U`)) {
87	// exp2m1(NaN) = NaN
88	if (x_bits.is_nan()) {
89	if (x_bits.is_signaling_nan()) {
90	fputil::raise_except_if_required(FE_INVALID);
91	return FPBits::quiet_nan().get_val();
92	}
93
94	return x;
95	}
96
97	// When x >= 16.
98	if (x_u >= `0x4c00` && x_bits.is_pos()) {
99	// exp2m1(+inf) = +inf
100	if (x_bits.is_inf())
101	return FPBits::inf().get_val();
102
103	switch (fputil::quick_get_round()) {
104	case FE_TONEAREST:
105	case FE_UPWARD:
106	fputil::set_errno_if_required(ERANGE);
107	fputil::raise_except_if_required(FE_OVERFLOW \| FE_INEXACT);
108	return FPBits::inf().get_val();
109	default:
110	return FPBits::max_normal().get_val();
111	}
112	}
113
114	// When x < -11.
115	if (x_u > `0xc980U`) {
116	// exp2m1(-inf) = -1
117	if (x_bits.is_inf())
118	return FPBits::one(Sign::NEG).get_val();
119
120	// When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
121	if (x_u < `0xca00U`)
122	return fputil::round_result_slightly_down(
123	fputil::cast<float16>(-`0x1.ffcp-1`));
124
125	// When x <= -12, round(2^x - 1, HP, RN) = -1.
126	switch (fputil::quick_get_round()) {
127	case FE_TONEAREST:
128	case FE_DOWNWARD:
129	return FPBits::one(Sign::NEG).get_val();
130	default:
131	return fputil::cast<float16>(-`0x1.ffcp-1`);
132	}
133	}
134
135	// When \|x\| <= 2^(-3).
136	if (x_abs <= `0x3000U`) {
137	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
138	if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
139	LIBC_UNLIKELY(r.has_value()))
140	return r.value();
141	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
142
143	float xf = x;
144	// Degree-5 minimax polynomial generated by Sollya with the following
145	// commands:
146	// > display = hexadecimal;
147	// > P = fpminimax((2^x - 1)/x, 4, [\|SG...\|], [-2^-3, 2^-3]);
148	// > x P;*
149	return fputil::cast<float16>(
150	xf * fputil::polyeval(xf, `0x1.62e43p-1f`, `0x1.ebfbdep-3f`,
151	`0x1.c6af88p-5f`, `0x1.3b45d6p-7f`,
152	`0x1.641e7cp-10f`));
153	}
154	}
155
156	#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
157	if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
158	return r.value();
159	#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
160
161	// exp2(x) = exp2(hi + mid) exp2(lo)*
162	auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
163	// exp2m1(x) = exp2(hi + mid) exp2(lo) - 1*
164	return fputil::cast<float16>(
165	fputil::multiply_add(exp2_hi_mid, exp2_lo, -`1.0f`));
166	}
167
168	} // namespace LIBC_NAMESPACE_DECL
169

source code of libc/src/math/generic/exp2m1f16.cpp