1/* Double-precision vector (AdvSIMD) exp2 function
2
3 Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include "v_math.h"
21#include "poly_advsimd_f64.h"
22
23#define N (1 << V_EXP_TABLE_BITS)
24#define IndexMask (N - 1)
25#define BigBound 1022.0
26#define UOFlowBound 1280.0
27#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */
28
29static const struct data
30{
31 float64x2_t poly[4];
32 float64x2_t shift, scale_big_bound, scale_uoflow_bound;
33} data = {
34 /* Coefficients are computed using Remez algorithm with
35 minimisation of the absolute error. */
36 .poly = { V2 (0x1.62e42fefa3686p-1), V2 (0x1.ebfbdff82c241p-3),
37 V2 (0x1.c6b09b16de99ap-5), V2 (0x1.3b2abf5571ad8p-7) },
38 .shift = V2 (0x1.8p52 / N),
39 .scale_big_bound = V2 (BigBound),
40 .scale_uoflow_bound = V2 (UOFlowBound),
41};
42
43static inline uint64x2_t
44lookup_sbits (uint64x2_t i)
45{
46 return (uint64x2_t){ __v_exp_data[i[0] & IndexMask],
47 __v_exp_data[i[1] & IndexMask] };
48}
49
50#if WANT_SIMD_EXCEPT
51
52# define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */
53
54/* Call scalar exp2 as a fallback. */
55static float64x2_t VPCS_ATTR NOINLINE
56special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special)
57{
58 return v_call_f64 (exp2, x, y, is_special);
59}
60
61#else
62
63# define SpecialOffset 0x6000000000000000 /* 0x1p513. */
64/* SpecialBias1 + SpecialBias1 = asuint(1.0). */
65# define SpecialBias1 0x7000000000000000 /* 0x1p769. */
66# define SpecialBias2 0x3010000000000000 /* 0x1p-254. */
67
68static inline float64x2_t VPCS_ATTR
69special_case (float64x2_t s, float64x2_t y, float64x2_t n,
70 const struct data *d)
71{
72 /* 2^(n/N) may overflow, break it up into s1*s2. */
73 uint64x2_t b = vandq_u64 (vclezq_f64 (n), v_u64 (SpecialOffset));
74 float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (v_u64 (SpecialBias1), b));
75 float64x2_t s2 = vreinterpretq_f64_u64 (
76 vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), v_u64 (SpecialBias2)), b));
77 uint64x2_t cmp = vcagtq_f64 (n, d->scale_uoflow_bound);
78 float64x2_t r1 = vmulq_f64 (s1, s1);
79 float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, s2, y), s1);
80 return vbslq_f64 (cmp, r1, r0);
81}
82
83#endif
84
85/* Fast vector implementation of exp2.
86 Maximum measured error is 1.65 ulp.
87 _ZGVnN2v_exp2(-0x1.4c264ab5b559bp-6) got 0x1.f8db0d4df721fp-1
88 want 0x1.f8db0d4df721dp-1. */
89VPCS_ATTR
90float64x2_t V_NAME_D1 (exp2) (float64x2_t x)
91{
92 const struct data *d = ptr_barrier (&data);
93 uint64x2_t cmp;
94#if WANT_SIMD_EXCEPT
95 uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x));
96 cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres));
97 /* Mask special lanes and retain a copy of x for passing to special-case
98 handler. */
99 float64x2_t xc = x;
100 x = v_zerofy_f64 (x, cmp);
101#else
102 cmp = vcagtq_f64 (x, d->scale_big_bound);
103#endif
104
105 /* n = round(x/N). */
106 float64x2_t z = vaddq_f64 (d->shift, x);
107 uint64x2_t u = vreinterpretq_u64_f64 (z);
108 float64x2_t n = vsubq_f64 (z, d->shift);
109
110 /* r = x - n/N. */
111 float64x2_t r = vsubq_f64 (x, n);
112
113 /* s = 2^(n/N). */
114 uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);
115 u = lookup_sbits (u);
116 float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));
117
118 /* y ~ exp2(r) - 1. */
119 float64x2_t r2 = vmulq_f64 (r, r);
120 float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly);
121 y = vmulq_f64 (r, y);
122
123 if (__glibc_unlikely (v_any_u64 (cmp)))
124#if !WANT_SIMD_EXCEPT
125 return special_case (s, y, n, d);
126#else
127 return special_case (xc, vfmaq_f64 (s, s, y), cmp);
128#endif
129 return vfmaq_f64 (s, s, y);
130}
131

source code of glibc/sysdeps/aarch64/fpu/exp2_advsimd.c