exp_advsimd.c source code [glibc/sysdeps/aarch64/fpu/exp_advsimd.c]

1	/ Double-precision vector (Advanced SIMD) exp function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21
22	#define N (1 << V_EXP_TABLE_BITS)
23	#define IndexMask (N - 1)
24
25	const static volatile struct
26	{
27	float64x2_t poly[`3`];
28	float64x2_t inv_ln2, ln2_hi, ln2_lo, shift;
29	#if !WANT_SIMD_EXCEPT
30	float64x2_t special_bound, scale_thresh;
31	#endif
32	} data = {
33	/ maxerr: 1.88 +0.5 ulp*
34	rel error: 1.43372^-53*
35	abs error: 1.42992^-53 in [ -ln2/256, ln2/256 ]. /
36	.poly = { V2 (`0x1.ffffffffffd43p-2`), V2 (`0x1.55555c75adbb2p-3`),
37	V2 (`0x1.55555da646206p-5`) },
38	#if !WANT_SIMD_EXCEPT
39	.scale_thresh = V2 (`163840.0`), / 1280.0 * N. /
40	.special_bound = V2 (`704.0`),
41	#endif
42	.inv_ln2 = V2 (`0x1.71547652b82fep7`), / N/ln2. /
43	.ln2_hi = V2 (`0x1.62e42fefa39efp-8`), / ln2/N. /
44	.ln2_lo = V2 (`0x1.abc9e3b39803f3p-63`),
45	.shift = V2 (`0x1.8p+52`)
46	};
47
48	#define C(i) data.poly[i]
49	#define Tab __v_exp_data
50
51	#if WANT_SIMD_EXCEPT
52
53	# define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511). */
54	# define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9). */
55	# define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound. */
56
57	static float64x2_t VPCS_ATTR NOINLINE
58	special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)
59	{
60	/ If fenv exceptions are to be triggered correctly, fall back to the scalar*
61	routine to special lanes. /*
62	return v_call_f64 (exp, x, y, cmp);
63	}
64
65	#else
66
67	# define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513. */
68	/ SpecialBias1 + SpecialBias1 = asuint(1.0). /
69	# define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */
70	# define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */
71
72	static inline float64x2_t VPCS_ATTR
73	special_case (float64x2_t s, float64x2_t y, float64x2_t n)
74	{
75	/ 2^(n/N) may overflow, break it up into s1s2. /*
76	uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);
77	float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));
78	float64x2_t s2 = vreinterpretq_f64_u64 (
79	vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));
80	uint64x2_t cmp = vcagtq_f64 (n, data.scale_thresh);
81	float64x2_t r1 = vmulq_f64 (s1, s1);
82	float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);
83	return vbslq_f64 (cmp, r1, r0);
84	}
85
86	#endif
87
88	float64x2_t VPCS_ATTR V_NAME_D1 (exp) (float64x2_t x)
89	{
90	float64x2_t n, r, r2, s, y, z;
91	uint64x2_t cmp, u, e;
92
93	#if WANT_SIMD_EXCEPT
94	/ If any lanes are special, mask them with 1 and retain a copy of x to allow*
95	special_case to fix special lanes later. This is only necessary if fenv
96	exceptions are to be triggered correctly. /*
97	float64x2_t xm = x;
98	uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
99	cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), SpecialBound);
100	if (__glibc_unlikely (v_any_u64 (cmp)))
101	x = vbslq_f64 (cmp, v_f64 (`1`), x);
102	#else
103	cmp = vcagtq_f64 (x, data.special_bound);
104	#endif
105
106	/ n = round(x/(ln2/N)). /
107	z = vfmaq_f64 (data.shift, x, data.inv_ln2);
108	u = vreinterpretq_u64_f64 (z);
109	n = vsubq_f64 (z, data.shift);
110
111	/ r = x - nln2/N. /*
112	r = x;
113	r = vfmsq_f64 (r, data.ln2_hi, n);
114	r = vfmsq_f64 (r, data.ln2_lo, n);
115
116	e = vshlq_n_u64 (u, `52` - V_EXP_TABLE_BITS);
117
118	/ y = exp(r) - 1 ~= r + C0 r^2 + C1 r^3 + C2 r^4. /
119	r2 = vmulq_f64 (r, r);
120	y = vfmaq_f64 (C (`0`), C (`1`), r);
121	y = vfmaq_f64 (y, C (`2`), r2);
122	y = vfmaq_f64 (r, y, r2);
123
124	/ s = 2^(n/N). /
125	u = (uint64x2_t){ Tab[u[`0`] & IndexMask], Tab[u[`1`] & IndexMask] };
126	s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
127
128	if (__glibc_unlikely (v_any_u64 (cmp)))
129	#if WANT_SIMD_EXCEPT
130	return special_case (xm, vfmaq_f64 (s, y, s), cmp);
131	#else
132	return special_case (s, y, n);
133	#endif
134
135	return vfmaq_f64 (s, y, s);
136	}
137

source code of glibc/sysdeps/aarch64/fpu/exp_advsimd.c