| 1 | /* Double-precision vector (SVE) log function. |
| 2 | |
| 3 | Copyright (C) 2023-2024 Free Software Foundation, Inc. |
| 4 | This file is part of the GNU C Library. |
| 5 | |
| 6 | The GNU C Library is free software; you can redistribute it and/or |
| 7 | modify it under the terms of the GNU Lesser General Public |
| 8 | License as published by the Free Software Foundation; either |
| 9 | version 2.1 of the License, or (at your option) any later version. |
| 10 | |
| 11 | The GNU C Library is distributed in the hope that it will be useful, |
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 14 | Lesser General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU Lesser General Public |
| 17 | License along with the GNU C Library; if not, see |
| 18 | <https://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #include "sv_math.h" |
| 21 | |
| 22 | #define N (1 << V_LOG_TABLE_BITS) |
| 23 | #define Max (0x7ff0000000000000) |
| 24 | #define Min (0x0010000000000000) |
| 25 | #define Thresh (0x7fe0000000000000) /* Max - Min. */ |
| 26 | |
| 27 | static const struct data |
| 28 | { |
| 29 | double c0, c2; |
| 30 | double c1, c3; |
| 31 | double ln2, c4; |
| 32 | uint64_t off; |
| 33 | } data = { |
| 34 | .c0 = -0x1.ffffffffffff7p-2, |
| 35 | .c1 = 0x1.55555555170d4p-2, |
| 36 | .c2 = -0x1.0000000399c27p-2, |
| 37 | .c3 = 0x1.999b2e90e94cap-3, |
| 38 | .c4 = -0x1.554e550bd501ep-3, |
| 39 | .ln2 = 0x1.62e42fefa39efp-1, |
| 40 | .off = 0x3fe6900900000000, |
| 41 | }; |
| 42 | |
| 43 | static svfloat64_t NOINLINE |
| 44 | special_case (svfloat64_t hi, svuint64_t tmp, svfloat64_t y, svfloat64_t r2, |
| 45 | svbool_t special, const struct data *d) |
| 46 | { |
| 47 | svfloat64_t x = svreinterpret_f64 (svadd_x (svptrue_b64 (), tmp, d->off)); |
| 48 | return sv_call_f64 (f: log, x, y: svmla_x (svptrue_b64 (), hi, r2, y), cmp: special); |
| 49 | } |
| 50 | |
| 51 | /* Double-precision SVE log routine. |
| 52 | Maximum measured error is 2.64 ulp: |
| 53 | SV_NAME_D1 (log)(0x1.95e54bc91a5e2p+184) got 0x1.fffffffe88cacp+6 |
| 54 | want 0x1.fffffffe88cafp+6. */ |
| 55 | svfloat64_t SV_NAME_D1 (log) (svfloat64_t x, const svbool_t pg) |
| 56 | { |
| 57 | const struct data *d = ptr_barrier (&data); |
| 58 | |
| 59 | svuint64_t ix = svreinterpret_u64 (x); |
| 60 | svbool_t special = svcmpge (pg, svsub_x (pg, ix, Min), Thresh); |
| 61 | |
| 62 | /* x = 2^k z; where z is in range [Off,2*Off) and exact. |
| 63 | The range is split into N subintervals. |
| 64 | The ith subinterval contains z and c is near its center. */ |
| 65 | svuint64_t tmp = svsub_x (pg, ix, d->off); |
| 66 | /* Calculate table index = (tmp >> (52 - V_LOG_TABLE_BITS)) % N. |
| 67 | The actual value of i is double this due to table layout. */ |
| 68 | svuint64_t i |
| 69 | = svand_x (pg, svlsr_x (pg, tmp, (51 - V_LOG_TABLE_BITS)), (N - 1) << 1); |
| 70 | svuint64_t iz = svsub_x (pg, ix, svand_x (pg, tmp, 0xfffULL << 52)); |
| 71 | svfloat64_t z = svreinterpret_f64 (iz); |
| 72 | /* Lookup in 2 global lists (length N). */ |
| 73 | svfloat64_t invc = svld1_gather_index (pg, &__v_log_data.table[0].invc, i); |
| 74 | svfloat64_t logc = svld1_gather_index (pg, &__v_log_data.table[0].logc, i); |
| 75 | |
| 76 | /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ |
| 77 | svfloat64_t kd = svcvt_f64_x (pg, svasr_x (pg, svreinterpret_s64 (tmp), 52)); |
| 78 | /* hi = r + log(c) + k*Ln2. */ |
| 79 | svfloat64_t ln2_and_c4 = svld1rq_f64 (svptrue_b64 (), &d->ln2); |
| 80 | svfloat64_t r = svmad_x (pg, invc, z, -1); |
| 81 | svfloat64_t hi = svmla_lane_f64 (logc, kd, ln2_and_c4, 0); |
| 82 | hi = svadd_x (pg, r, hi); |
| 83 | |
| 84 | /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ |
| 85 | svfloat64_t odd_coeffs = svld1rq_f64 (svptrue_b64 (), &d->c1); |
| 86 | svfloat64_t r2 = svmul_x (svptrue_b64 (), r, r); |
| 87 | svfloat64_t y = svmla_lane_f64 (sv_f64 (x: d->c2), r, odd_coeffs, 1); |
| 88 | svfloat64_t p = svmla_lane_f64 (sv_f64 (x: d->c0), r, odd_coeffs, 0); |
| 89 | y = svmla_lane_f64 (y, r2, ln2_and_c4, 1); |
| 90 | y = svmla_x (pg, p, r2, y); |
| 91 | |
| 92 | if (__glibc_unlikely (svptest_any (pg, special))) |
| 93 | return special_case (hi, tmp, y, r2, special, d); |
| 94 | return svmla_x (pg, hi, r2, y); |
| 95 | } |
| 96 | |