expm1f_sve.c source code [glibc/sysdeps/aarch64/fpu/expm1f_sve.c]

1	/ Single-precision SVE expm1*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21	#include "poly_sve_f32.h"
22
23	/ Largest value of x for which expm1(x) should round to -1. /
24	#define SpecialBound 0x1.5ebc4p+6f
25
26	static const struct data
27	{
28	/ These 4 are grouped together so they can be loaded as one quadword, then*
29	used with _lane forms of svmla/svmls. /*
30	float c2, c4, ln2_hi, ln2_lo;
31	float c0, c1, c3, inv_ln2, special_bound, shift;
32	} data = {
33	/ Generated using fpminimax. /
34	.c0 = `0x1.fffffep-2`, .c1 = `0x1.5554aep-3`,
35	.c2 = `0x1.555736p-5`, .c3 = `0x1.12287cp-7`,
36	.c4 = `0x1.6b55a2p-10`,
37
38	.special_bound = SpecialBound, .shift = `0x1.8p23f`,
39	.inv_ln2 = `0x1.715476p+0f`, .ln2_hi = `0x1.62e4p-1f`,
40	.ln2_lo = `0x1.7f7d1cp-20f`,
41	};
42
43	#define C(i) sv_f32 (d->c##i)
44
45	static svfloat32_t NOINLINE
46	special_case (svfloat32_t x, svbool_t pg)
47	{
48	return sv_call_f32 (f: expm1f, x, y: x, cmp: pg);
49	}
50
51	/ Single-precision SVE exp(x) - 1. Maximum error is 1.52 ULP:*
52	_ZGVsMxv_expm1f(0x1.8f4ebcp-2) got 0x1.e859dp-2
53	want 0x1.e859d4p-2. /*
54	svfloat32_t SV_NAME_F1 (expm1) (svfloat32_t x, svbool_t pg)
55	{
56	const struct data *d = ptr_barrier (&data);
57
58	/ Large, NaN/Inf. /
59	svbool_t special = svnot_z (pg, svaclt (pg, x, d->special_bound));
60
61	if (__glibc_unlikely (svptest_any (pg, special)))
62	return special_case (x, pg);
63
64	/ This vector is reliant on layout of data - it contains constants*
65	that can be used with _lane forms of svmla/svmls. Values are:
66	[ coeff_2, coeff_4, ln2_hi, ln2_lo ]. /*
67	svfloat32_t lane_constants = svld1rq (svptrue_b32 (), &d->c2);
68
69	/ Reduce argument to smaller range:*
70	Let i = round(x / ln2)
71	and f = x - i ln2, then f is in [-ln2/2, ln2/2].*
72	exp(x) - 1 = 2^i (expm1(f) + 1) - 1*
73	where 2^i is exact because i is an integer. /*
74	svfloat32_t j = svmla_x (pg, sv_f32 (x: d->shift), x, d->inv_ln2);
75	j = svsub_x (pg, j, d->shift);
76	svint32_t i = svcvt_s32_x (pg, j);
77
78	svfloat32_t f = svmls_lane (x, j, lane_constants, `2`);
79	f = svmls_lane (f, j, lane_constants, `3`);
80
81	/ Approximate expm1(f) using polynomial.*
82	Taylor expansion for expm1(x) has the form:
83	x + ax^2 + bx^3 + cx^4 ....
84	So we calculate the polynomial P(f) = a + bf + cf^2 + ...
85	and assemble the approximation expm1(f) ~= f + f^2 P(f). /
86	svfloat32_t p12 = svmla_lane (C (`1`), f, lane_constants, `0`);
87	svfloat32_t p34 = svmla_lane (C (`3`), f, lane_constants, `1`);
88	svfloat32_t f2 = svmul_x (pg, f, f);
89	svfloat32_t p = svmla_x (pg, p12, f2, p34);
90	p = svmla_x (pg, C (`0`), f, p);
91	p = svmla_x (pg, f, f2, p);
92
93	/ Assemble the result.*
94	expm1(x) ~= 2^i (p + 1) - 1*
95	Let t = 2^i. /*
96	svfloat32_t t = svreinterpret_f32 (
97	svadd_x (pg, svreinterpret_u32 (svlsl_x (pg, i, `23`)), `0x3f800000`));
98	return svmla_x (pg, svsub_x (pg, t, `1`), p, t);
99	}
100

source code of glibc/sysdeps/aarch64/fpu/expm1f_sve.c