1//===-- Common utilities for half-precision exponential functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
10#define LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
11
12#include "src/__support/FPUtil/FPBits.h"
13#include "src/__support/FPUtil/cast.h"
14#include "src/__support/FPUtil/multiply_add.h"
15#include "src/__support/FPUtil/nearest_integer.h"
16#include "src/__support/macros/attributes.h"
17#include "src/__support/macros/config.h"
18#include <stdint.h>
19
20#include "src/__support/math/expf16_utils.h"
21
22namespace LIBC_NAMESPACE_DECL {
23
24// Generated by Sollya with the following commands:
25// > display = hexadecimal;
26// > for i from 0 to 7 do printsingle(round(2^(i * 2^-3), SG, RN));
27constexpr cpp::array<uint32_t, 8> EXP2_MID_BITS = {
28 0x3f80'0000U, 0x3f8b'95c2U, 0x3f98'37f0U, 0x3fa5'fed7U,
29 0x3fb5'04f3U, 0x3fc5'672aU, 0x3fd7'44fdU, 0x3fea'c0c7U,
30};
31
32LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) {
33 // For -25 < x < 16, to compute 2^x, we perform the following range reduction:
34 // find hi, mid, lo, such that:
35 // x = hi + mid + lo, in which
36 // hi is an integer,
37 // mid * 2^3 is an integer,
38 // -2^(-4) <= lo < 2^(-4).
39 // In particular,
40 // hi + mid = round(x * 2^3) * 2^(-3).
41 // Then,
42 // 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo.
43 // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
44 // by adding hi to the exponent field of 2^mid. 2^lo is computed using a
45 // degree-3 minimax polynomial generated by Sollya.
46
47 float xf = x;
48 float kf = fputil::nearest_integer(xf * 0x1.0p+3f);
49 int x_hi_mid = static_cast<int>(kf);
50 unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
51 unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
52 // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x
53 float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf);
54
55 uint32_t exp2_hi_mid_bits =
56 EXP2_MID_BITS[x_mid] +
57 static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
58 float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
59 // Degree-3 minimax polynomial generated by Sollya with the following
60 // commands:
61 // > display = hexadecimal;
62 // > P = fpminimax((2^x - 1)/x, 2, [|SG...|], [-2^-4, 2^-4]);
63 // > 1 + x * P;
64 float exp2_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.62e43p-1f, 0x1.ec0aa6p-3f,
65 0x1.c6b4a6p-5f);
66 return {exp2_hi_mid, exp2_lo};
67}
68
69// Generated by Sollya with the following commands:
70// > display = hexadecimal;
71// > round(log2(10), SG, RN);
72static constexpr float LOG2F_10 = 0x1.a934fp+1f;
73
74// Generated by Sollya with the following commands:
75// > display = hexadecimal;
76// > round(log10(2), SG, RN);
77static constexpr float LOG10F_2 = 0x1.344136p-2f;
78
79LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) {
80 // For -8 < x < 5, to compute 10^x, we perform the following range reduction:
81 // find hi, mid, lo, such that:
82 // x = (hi + mid) * log2(10) + lo, in which
83 // hi is an integer,
84 // mid * 2^3 is an integer,
85 // -2^(-4) <= lo < 2^(-4).
86 // In particular,
87 // hi + mid = round(x * 2^3) * 2^(-3).
88 // Then,
89 // 10^x = 10^(hi + mid + lo) = 2^((hi + mid) * log2(10)) + 10^lo
90 // We store 2^mid in the lookup table EXP2_MID_BITS, and compute 2^hi * 2^mid
91 // by adding hi to the exponent field of 2^mid. 10^lo is computed using a
92 // degree-4 minimax polynomial generated by Sollya.
93
94 float xf = x;
95 float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f));
96 int x_hi_mid = static_cast<int>(kf);
97 unsigned x_hi = static_cast<unsigned>(x_hi_mid) >> 3;
98 unsigned x_mid = static_cast<unsigned>(x_hi_mid) & 0x7;
99 // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x
100 float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf);
101
102 uint32_t exp2_hi_mid_bits =
103 EXP2_MID_BITS[x_mid] +
104 static_cast<uint32_t>(x_hi << fputil::FPBits<float>::FRACTION_LEN);
105 float exp2_hi_mid = fputil::FPBits<float>(exp2_hi_mid_bits).get_val();
106 // Degree-4 minimax polynomial generated by Sollya with the following
107 // commands:
108 // > display = hexadecimal;
109 // > P = fpminimax((10^x - 1)/x, 3, [|SG...|], [-2^-4, 2^-4]);
110 // > 1 + x * P;
111 float exp10_lo = fputil::polyeval(lo, 0x1p+0f, 0x1.26bb14p+1f, 0x1.53526p+1f,
112 0x1.04b434p+1f, 0x1.2bcf9ep+0f);
113 return {exp2_hi_mid, exp10_lo};
114}
115
116// Generated by Sollya with the following commands:
117// > display = hexadecimal;
118// > round(log2(exp(1)), SG, RN);
119static constexpr float LOG2F_E = 0x1.715476p+0f;
120
121// Generated by Sollya with the following commands:
122// > display = hexadecimal;
123// > round(log(2), SG, RN);
124static constexpr float LOGF_2 = 0x1.62e43p-1f;
125
126// Generated by Sollya with the following commands:
127// > display = hexadecimal;
128// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN));
129static constexpr cpp::array<uint32_t, 32> EXP2_MID_5_BITS = {
130 0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U,
131 0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU,
132 0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU,
133 0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU,
134 0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU,
135 0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU,
136 0x3ff5'257dU, 0x3ffa'83b3U,
137};
138
139// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
140// and exp(-x) simultaneously.
141// To compute e^x, we perform the following range reduction:
142// find hi, mid, lo such that:
143// x = (hi + mid) * log(2) + lo, in which
144// hi is an integer,
145// 0 <= mid * 2^5 < 32 is an integer
146// -2^(-5) <= lo * log2(e) <= 2^-5.
147// In particular,
148// hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
149// Then,
150// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
151// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid
152// by adding hi to the exponent field of 2^mid.
153// e^lo is computed using a degree-3 minimax polynomial generated by Sollya:
154// e^lo ~ P(lo)
155// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
156// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
157// = P_even + lo * P_odd
158// To compute e^(-x), notice that:
159// e^(-x) = 2^(-(hi + mid)) * e^(-lo)
160// ~ 2^(-(hi + mid)) * P(-lo)
161// = 2^(-(hi + mid)) * (P_even - lo * P_odd)
162// So:
163// sinh(x) = (e^x - e^(-x)) / 2
164// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
165// 2^(-(hi + mid)) * (P_even - lo * P_odd))
166// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
167// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
168// And similarly:
169// cosh(x) = (e^x + e^(-x)) / 2
170// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
171// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
172// The main point of these formulas is that the expensive part of calculating
173// the polynomials approximating lower parts of e^x and e^(-x) is shared and
174// only done once.
175template <bool IsSinh> LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) {
176 float xf = x;
177 float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f));
178 int x_hi_mid_p = static_cast<int>(kf);
179 int x_hi_mid_m = -x_hi_mid_p;
180
181 unsigned x_hi_p = static_cast<unsigned>(x_hi_mid_p) >> 5;
182 unsigned x_hi_m = static_cast<unsigned>(x_hi_mid_m) >> 5;
183 unsigned x_mid_p = static_cast<unsigned>(x_hi_mid_p) & 0x1f;
184 unsigned x_mid_m = static_cast<unsigned>(x_hi_mid_m) & 0x1f;
185
186 uint32_t exp2_hi_mid_bits_p =
187 EXP2_MID_5_BITS[x_mid_p] +
188 static_cast<uint32_t>(x_hi_p << fputil::FPBits<float>::FRACTION_LEN);
189 uint32_t exp2_hi_mid_bits_m =
190 EXP2_MID_5_BITS[x_mid_m] +
191 static_cast<uint32_t>(x_hi_m << fputil::FPBits<float>::FRACTION_LEN);
192 // exp2_hi_mid_p = 2^(hi + mid)
193 float exp2_hi_mid_p = fputil::FPBits<float>(exp2_hi_mid_bits_p).get_val();
194 // exp2_hi_mid_m = 2^(-(hi + mid))
195 float exp2_hi_mid_m = fputil::FPBits<float>(exp2_hi_mid_bits_m).get_val();
196
197 // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid))
198 float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m;
199 // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid))
200 float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m;
201
202 // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x
203 float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf);
204 float lo_sq = lo * lo;
205
206 // Degree-3 minimax polynomial generated by Sollya with the following
207 // commands:
208 // > display = hexadecimal;
209 // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]);
210 // > 1 + x * P;
211 constexpr cpp::array<float, 4> COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f,
212 0x1.555778p-3f};
213 float half_p_odd =
214 fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f);
215 float half_p_even =
216 fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f);
217
218 // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +
219 // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid))))
220 if constexpr (IsSinh)
221 return fputil::cast<float16>(fputil::multiply_add(
222 lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff));
223 // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +
224 // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid))))
225 return fputil::cast<float16>(fputil::multiply_add(
226 lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum));
227}
228
229// Generated by Sollya with the following commands:
230// > display = hexadecimal;
231// > for i from 0 to 31 do print(round(log(1 + i * 2^-5), SG, RN));
232constexpr cpp::array<float, 32> LOGF_F = {
233 0x0p+0f, 0x1.f829bp-6f, 0x1.f0a30cp-5f, 0x1.6f0d28p-4f,
234 0x1.e27076p-4f, 0x1.29553p-3f, 0x1.5ff308p-3f, 0x1.9525aap-3f,
235 0x1.c8ff7cp-3f, 0x1.fb9186p-3f, 0x1.1675cap-2f, 0x1.2e8e2cp-2f,
236 0x1.4618bcp-2f, 0x1.5d1bdcp-2f, 0x1.739d8p-2f, 0x1.89a338p-2f,
237 0x1.9f323ep-2f, 0x1.b44f78p-2f, 0x1.c8ff7cp-2f, 0x1.dd46ap-2f,
238 0x1.f128f6p-2f, 0x1.02552ap-1f, 0x1.0be72ep-1f, 0x1.154c3ep-1f,
239 0x1.1e85f6p-1f, 0x1.2795e2p-1f, 0x1.307d74p-1f, 0x1.393e0ep-1f,
240 0x1.41d8fep-1f, 0x1.4a4f86p-1f, 0x1.52a2d2p-1f, 0x1.5ad404p-1f,
241};
242
243// Generated by Sollya with the following commands:
244// > display = hexadecimal;
245// > for i from 0 to 31 do print(round(log2(1 + i * 2^-5), SG, RN));
246constexpr cpp::array<float, 32> LOG2F_F = {
247 0x0p+0f, 0x1.6bad38p-5f, 0x1.663f7p-4f, 0x1.08c588p-3f,
248 0x1.5c01a4p-3f, 0x1.acf5e2p-3f, 0x1.fbc16cp-3f, 0x1.24407ap-2f,
249 0x1.49a784p-2f, 0x1.6e221cp-2f, 0x1.91bba8p-2f, 0x1.b47ecp-2f,
250 0x1.d6753ep-2f, 0x1.f7a856p-2f, 0x1.0c105p-1f, 0x1.1bf312p-1f,
251 0x1.2b8034p-1f, 0x1.3abb4p-1f, 0x1.49a784p-1f, 0x1.584822p-1f,
252 0x1.66a008p-1f, 0x1.74b1fep-1f, 0x1.82809ep-1f, 0x1.900e62p-1f,
253 0x1.9d5dap-1f, 0x1.aa709p-1f, 0x1.b74948p-1f, 0x1.c3e9cap-1f,
254 0x1.d053f6p-1f, 0x1.dc899ap-1f, 0x1.e88c6cp-1f, 0x1.f45e08p-1f,
255};
256
257// Generated by Sollya with the following commands:
258// > display = hexadecimal;
259// > for i from 0 to 31 do print(round(log10(1 + i * 2^-5), SG, RN));
260constexpr cpp::array<float, 32> LOG10F_F = {
261 0x0p+0f, 0x1.b5e908p-7f, 0x1.af5f92p-6f, 0x1.3ed11ap-5f,
262 0x1.a30a9ep-5f, 0x1.02428cp-4f, 0x1.31b306p-4f, 0x1.5fe804p-4f,
263 0x1.8cf184p-4f, 0x1.b8de4ep-4f, 0x1.e3bc1ap-4f, 0x1.06cbd6p-3f,
264 0x1.1b3e72p-3f, 0x1.2f3b6ap-3f, 0x1.42c7e8p-3f, 0x1.55e8c6p-3f,
265 0x1.68a288p-3f, 0x1.7af974p-3f, 0x1.8cf184p-3f, 0x1.9e8e7cp-3f,
266 0x1.afd3e4p-3f, 0x1.c0c514p-3f, 0x1.d1653p-3f, 0x1.e1b734p-3f,
267 0x1.f1bdeep-3f, 0x1.00be06p-2f, 0x1.087a08p-2f, 0x1.101432p-2f,
268 0x1.178da6p-2f, 0x1.1ee778p-2f, 0x1.2622bp-2f, 0x1.2d404cp-2f,
269};
270
271// Generated by Sollya with the following commands:
272// > display = hexadecimal;
273// > for i from 0 to 31 do print(round(1 / (1 + i * 2^-5), SG, RN));
274constexpr cpp::array<float, 32> ONE_OVER_F_F = {
275 0x1p+0f, 0x1.f07c2p-1f, 0x1.e1e1e2p-1f, 0x1.d41d42p-1f,
276 0x1.c71c72p-1f, 0x1.bacf92p-1f, 0x1.af286cp-1f, 0x1.a41a42p-1f,
277 0x1.99999ap-1f, 0x1.8f9c18p-1f, 0x1.861862p-1f, 0x1.7d05f4p-1f,
278 0x1.745d18p-1f, 0x1.6c16c2p-1f, 0x1.642c86p-1f, 0x1.5c9882p-1f,
279 0x1.555556p-1f, 0x1.4e5e0ap-1f, 0x1.47ae14p-1f, 0x1.414142p-1f,
280 0x1.3b13b2p-1f, 0x1.3521dp-1f, 0x1.2f684cp-1f, 0x1.29e412p-1f,
281 0x1.24924ap-1f, 0x1.1f7048p-1f, 0x1.1a7b96p-1f, 0x1.15b1e6p-1f,
282 0x1.111112p-1f, 0x1.0c9714p-1f, 0x1.08421p-1f, 0x1.041042p-1f,
283};
284
285} // namespace LIBC_NAMESPACE_DECL
286
287#endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H
288

source code of libc/src/math/generic/expxf16.h