1/* Single-precision vector (SVE) exp10 function.
2
3 Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include "sv_math.h"
21#include "poly_sve_f32.h"
22
23/* For x < -SpecialBound, the result is subnormal and not handled correctly by
24 FEXPA. */
25#define SpecialBound 37.9
26
27static const struct data
28{
29 float poly[5];
30 float shift, log10_2, log2_10_hi, log2_10_lo, special_bound;
31} data = {
32 /* Coefficients generated using Remez algorithm with minimisation of relative
33 error.
34 rel error: 0x1.89dafa3p-24
35 abs error: 0x1.167d55p-23 in [-log10(2)/2, log10(2)/2]
36 maxerr: 0.52 +0.5 ulp. */
37 .poly = { 0x1.26bb16p+1f, 0x1.5350d2p+1f, 0x1.04744ap+1f, 0x1.2d8176p+0f,
38 0x1.12b41ap-1f },
39 /* 1.5*2^17 + 127, a shift value suitable for FEXPA. */
40 .shift = 0x1.903f8p17f,
41 .log10_2 = 0x1.a934fp+1,
42 .log2_10_hi = 0x1.344136p-2,
43 .log2_10_lo = -0x1.ec10cp-27,
44 .special_bound = SpecialBound,
45};
46
47static svfloat32_t NOINLINE
48special_case (svfloat32_t x, svfloat32_t y, svbool_t special)
49{
50 return sv_call_f32 (exp10f, x, y, special);
51}
52
53/* Single-precision SVE exp10f routine. Implements the same algorithm
54 as AdvSIMD exp10f.
55 Worst case error is 1.02 ULPs.
56 _ZGVsMxv_exp10f(-0x1.040488p-4) got 0x1.ba5f9ep-1
57 want 0x1.ba5f9cp-1. */
58svfloat32_t SV_NAME_F1 (exp10) (svfloat32_t x, const svbool_t pg)
59{
60 const struct data *d = ptr_barrier (&data);
61 /* exp10(x) = 2^(n/N) * 10^r = 2^n * (1 + poly (r)),
62 with poly(r) in [1/sqrt(2), sqrt(2)] and
63 x = r + n * log10(2) / N, with r in [-log10(2)/2N, log10(2)/2N]. */
64
65 /* Load some constants in quad-word chunks to minimise memory access (last
66 lane is wasted). */
67 svfloat32_t log10_2_and_inv = svld1rq (svptrue_b32 (), &d->log10_2);
68
69 /* n = round(x/(log10(2)/N)). */
70 svfloat32_t shift = sv_f32 (x: d->shift);
71 svfloat32_t z = svmla_lane (shift, x, log10_2_and_inv, 0);
72 svfloat32_t n = svsub_x (pg, z, shift);
73
74 /* r = x - n*log10(2)/N. */
75 svfloat32_t r = svmls_lane (x, n, log10_2_and_inv, 1);
76 r = svmls_lane (r, n, log10_2_and_inv, 2);
77
78 svbool_t special = svacgt (pg, x, d->special_bound);
79 svfloat32_t scale = svexpa (svreinterpret_u32 (z));
80
81 /* Polynomial evaluation: poly(r) ~ exp10(r)-1. */
82 svfloat32_t r2 = svmul_x (pg, r, r);
83 svfloat32_t poly
84 = svmla_x (pg, svmul_x (pg, r, d->poly[0]),
85 sv_pairwise_poly_3_f32_x (pg, x: r, x2: r2, poly: d->poly + 1), r2);
86
87 if (__glibc_unlikely (svptest_any (pg, special)))
88 return special_case (x, y: svmla_x (pg, scale, scale, poly), special);
89
90 return svmla_x (pg, scale, scale, poly);
91}
92

source code of glibc/sysdeps/aarch64/fpu/exp10f_sve.c