1 | /* Single-precision vector (SVE) exp2 function |
2 | |
3 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include "sv_math.h" |
21 | #include "poly_sve_f32.h" |
22 | |
23 | #define Thres 0x1.5d5e2ap+6f |
24 | |
25 | static const struct data |
26 | { |
27 | float poly[5]; |
28 | float shift, thres; |
29 | } data = { |
30 | /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for |
31 | compatibility with polynomial helpers. */ |
32 | .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, |
33 | 0x1.59977ap-10f }, |
34 | /* 1.5*2^17 + 127. */ |
35 | .shift = 0x1.903f8p17f, |
36 | /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled |
37 | correctly by FEXPA. */ |
38 | .thres = Thres, |
39 | }; |
40 | |
41 | static svfloat32_t NOINLINE |
42 | special_case (svfloat32_t x, svfloat32_t y, svbool_t special) |
43 | { |
44 | return sv_call_f32 (f: exp2f, x, y, cmp: special); |
45 | } |
46 | |
47 | /* Single-precision SVE exp2f routine. Implements the same algorithm |
48 | as AdvSIMD exp2f. |
49 | Worst case error is 1.04 ULPs. |
50 | SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 |
51 | want 0x1.ba7ebp+0. */ |
52 | svfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) |
53 | { |
54 | const struct data *d = ptr_barrier (&data); |
55 | /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] |
56 | x = n + r, with r in [-1/2, 1/2]. */ |
57 | svfloat32_t shift = sv_f32 (x: d->shift); |
58 | svfloat32_t z = svadd_x (pg, x, shift); |
59 | svfloat32_t n = svsub_x (pg, z, shift); |
60 | svfloat32_t r = svsub_x (pg, x, n); |
61 | |
62 | svbool_t special = svacgt (pg, x, d->thres); |
63 | svfloat32_t scale = svexpa (svreinterpret_u32 (z)); |
64 | |
65 | /* Polynomial evaluation: poly(r) ~ exp2(r)-1. |
66 | Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for |
67 | coefficients 1 to 4, and apply most significant coefficient directly. */ |
68 | svfloat32_t r2 = svmul_x (pg, r, r); |
69 | svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, x: r, x2: r2, poly: d->poly + 1); |
70 | svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); |
71 | svfloat32_t poly = svmla_x (pg, p0, r2, p14); |
72 | |
73 | if (__glibc_unlikely (svptest_any (pg, special))) |
74 | return special_case (x, y: svmla_x (pg, scale, scale, poly), special); |
75 | |
76 | return svmla_x (pg, scale, scale, poly); |
77 | } |
78 | |