exp2_advsimd.c source code [glibc/sysdeps/aarch64/fpu/exp2_advsimd.c]

1	/ Double-precision vector (AdvSIMD) exp2 function*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21	#include "poly_advsimd_f64.h"
22
23	#define N (1 << V_EXP_TABLE_BITS)
24	#define IndexMask (N - 1)
25	#define BigBound 1022.0
26	#define UOFlowBound 1280.0
27	#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */
28
29	static const struct data
30	{
31	float64x2_t poly[`4`];
32	float64x2_t shift, scale_big_bound, scale_uoflow_bound;
33	} data = {
34	/ Coefficients are computed using Remez algorithm with*
35	minimisation of the absolute error. /*
36	.poly = { V2 (`0x1.62e42fefa3686p-1`), V2 (`0x1.ebfbdff82c241p-3`),
37	V2 (`0x1.c6b09b16de99ap-5`), V2 (`0x1.3b2abf5571ad8p-7`) },
38	.shift = V2 (`0x1.8p52` / N),
39	.scale_big_bound = V2 (BigBound),
40	.scale_uoflow_bound = V2 (UOFlowBound),
41	};
42
43	static inline uint64x2_t
44	lookup_sbits (uint64x2_t i)
45	{
46	return (uint64x2_t){ __v_exp_data[i[`0`] & IndexMask],
47	__v_exp_data[i[`1`] & IndexMask] };
48	}
49
50	#if WANT_SIMD_EXCEPT
51
52	# define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */
53
54	/ Call scalar exp2 as a fallback. /
55	static float64x2_t VPCS_ATTR NOINLINE
56	special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
57	{
58	return v_call_f64 (exp2, x, y, is_special);
59	}
60
61	#else
62
63	# define SpecialOffset 0x6000000000000000 /* 0x1p513. */
64	/ SpecialBias1 + SpecialBias1 = asuint(1.0). /
65	# define SpecialBias1 0x7000000000000000 /* 0x1p769. */
66	# define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
67
68	static inline float64x2_t VPCS_ATTR
69	special_case (float64x2_t s, float64x2_t y, float64x2_t n,
70	const struct data *d)
71	{
72	/ 2^(n/N) may overflow, break it up into s1s2. /*
73	uint64x2_t b = vandq_u64 (vclezq_f64 (n), v_u64 (SpecialOffset));
74	float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b));
75	float64x2_t s2 = vreinterpretq_f64_u64 (
76	vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
77	uint64x2_t cmp = vcagtq_f64 (n, d->scale_uoflow_bound);
78	float64x2_t r1 = vmulq_f64 (s1, s1);
79	float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1);
80	return vbslq_f64 (cmp, r1, r0);
81	}
82
83	#endif
84
85	/ Fast vector implementation of exp2.*
86	Maximum measured error is 1.65 ulp.
87	_ZGVnN2v_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
88	want 0x1.f8db0d4df721dp-1. /*
89	VPCS_ATTR
90	float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
91	{
92	const struct data *d = ptr_barrier (&data);
93	uint64x2_t cmp;
94	#if WANT_SIMD_EXCEPT
95	uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
96	cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
97	/ Mask special lanes and retain a copy of x for passing to special-case*
98	handler. /*
99	float64x2_t xc = x;
100	x = v_zerofy_f64 (x, cmp);
101	#else
102	cmp = vcagtq_f64 (x, d->scale_big_bound);
103	#endif
104
105	/ n = round(x/N). /
106	float64x2_t z = vaddq_f64 (d->shift, x);
107	uint64x2_t u = vreinterpretq_u64_f64 (z);
108	float64x2_t n = vsubq_f64 (z, d->shift);
109
110	/ r = x - n/N. /
111	float64x2_t r = vsubq_f64 (x, n);
112
113	/ s = 2^(n/N). /
114	uint64x2_t e = vshlq_n_u64 (u, `52` - V_EXP_TABLE_BITS);
115	u = lookup_sbits (u);
116	float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
117
118	/ y ~ exp2(r) - 1. /
119	float64x2_t r2 = vmulq_f64 (r, r);
120	float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
121	y = vmulq_f64 (r, y);
122
123	if (__glibc_unlikely (v_any_u64 (cmp)))
124	#if !WANT_SIMD_EXCEPT
125	return special_case (s, y, n, d);
126	#else
127	return special_case (xc, vfmaq_f64 (s, s, y), cmp);
128	#endif
129	return vfmaq_f64 (s, s, y);
130	}
131

source code of glibc/sysdeps/aarch64/fpu/exp2_advsimd.c