exp2_sve.c source code [glibc/sysdeps/aarch64/fpu/exp2_sve.c]

1	/ Double-precision vector (SVE) exp2 function*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21	#include "poly_sve_f64.h"
22
23	#define N (1 << V_EXP_TABLE_BITS)
24
25	#define BigBound 1022
26	#define UOFlowBound 1280
27
28	static const struct data
29	{
30	double poly[`4`];
31	double shift, big_bound, uoflow_bound;
32	} data = {
33	/ Coefficients are computed using Remez algorithm with*
34	minimisation of the absolute error. /*
35	.poly = { `0x1.62e42fefa3686p-1`, `0x1.ebfbdff82c241p-3`, `0x1.c6b09b16de99ap-5`,
36	`0x1.3b2abf5571ad8p-7` },
37	.shift = `0x1.8p52` / N,
38	.uoflow_bound = UOFlowBound,
39	.big_bound = BigBound,
40	};
41
42	#define SpecialOffset 0x6000000000000000 /* 0x1p513. */
43	/ SpecialBias1 + SpecialBias1 = asuint(1.0). /
44	#define SpecialBias1 0x7000000000000000 /* 0x1p769. */
45	#define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
46
47	/ Update of both special and non-special cases, if any special case is*
48	detected. /*
49	static inline svfloat64_t
50	special_case (svbool_t pg, svfloat64_t s, svfloat64_t y, svfloat64_t n,
51	const struct data *d)
52	{
53	/ s=2^n may overflow, break it up into s=s1s2,
54	such that exp = s + sy can be computed as s1(s2+s2y)*
55	and s1s1 overflows only if n>0. /
56
57	/ If n<=0 then set b to 0x6, 0 otherwise. /
58	svbool_t p_sign = svcmple (pg, n, `0.0`); / n <= 0. /
59	svuint64_t b = svdup_u64_z (p_sign, SpecialOffset);
60
61	/ Set s1 to generate overflow depending on sign of exponent n. /
62	svfloat64_t s1 = svreinterpret_f64 (svsubr_x (pg, b, SpecialBias1));
63	/ Offset s to avoid overflow in final result if n is below threshold. /
64	svfloat64_t s2 = svreinterpret_f64 (
65	svadd_x (pg, svsub_x (pg, svreinterpret_u64 (s), SpecialBias2), b));
66
67	/ \|n\| > 1280 => 2^(n) overflows. /
68	svbool_t p_cmp = svacgt (pg, n, d->uoflow_bound);
69
70	svfloat64_t r1 = svmul_x (pg, s1, s1);
71	svfloat64_t r2 = svmla_x (pg, s2, s2, y);
72	svfloat64_t r0 = svmul_x (pg, r2, s1);
73
74	return svsel (p_cmp, r1, r0);
75	}
76
77	/ Fast vector implementation of exp2.*
78	Maximum measured error is 1.65 ulp.
79	_ZGVsMxv_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
80	want 0x1.f8db0d4df721dp-1. /*
81	svfloat64_t SV_NAME_D1 (exp2) (svfloat64_t x, svbool_t pg)
82	{
83	const struct data *d = ptr_barrier (&data);
84	svbool_t no_big_scale = svacle (pg, x, d->big_bound);
85	svbool_t special = svnot_z (pg, no_big_scale);
86
87	/ Reduce x to k/N + r, where k is integer and r in [-1/2N, 1/2N]. /
88	svfloat64_t shift = sv_f64 (x: d->shift);
89	svfloat64_t kd = svadd_x (pg, x, shift);
90	svuint64_t ki = svreinterpret_u64 (kd);
91	/ kd = k/N. /
92	kd = svsub_x (pg, kd, shift);
93	svfloat64_t r = svsub_x (pg, x, kd);
94
95	/ scale ~= 2^(k/N). /
96	svuint64_t idx = svand_x (pg, ki, N - `1`);
97	svuint64_t sbits = svld1_gather_index (pg, __v_exp_data, idx);
98	/ This is only a valid scale when -1023N < k < 1024N. /
99	svuint64_t top = svlsl_x (pg, ki, `52` - V_EXP_TABLE_BITS);
100	svfloat64_t scale = svreinterpret_f64 (svadd_x (pg, sbits, top));
101
102	/ Approximate exp2(r) using polynomial. /
103	svfloat64_t r2 = svmul_x (pg, r, r);
104	svfloat64_t p = sv_pairwise_poly_3_f64_x (pg, x: r, x2: r2, poly: d->poly);
105	svfloat64_t y = svmul_x (pg, r, p);
106
107	/ Assemble exp2(x) = exp2(r) * scale. /
108	if (__glibc_unlikely (svptest_any (pg, special)))
109	return special_case (pg, s: scale, y, n: kd, d);
110	return svmla_x (pg, scale, scale, y);
111	}
112

source code of glibc/sysdeps/aarch64/fpu/exp2_sve.c