log10f_advsimd.c source code [glibc/sysdeps/aarch64/fpu/log10f_advsimd.c]

1	/ Single-precision vector (AdvSIMD) log10 function*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21	#include "poly_advsimd_f32.h"
22
23	static const struct data
24	{
25	uint32x4_t min_norm;
26	uint16x8_t special_bound;
27	float32x4_t poly[`8`];
28	float32x4_t inv_ln10, ln2;
29	uint32x4_t off, mantissa_mask;
30	} data = {
31	/ Use order 9 for log10(1+x), i.e. order 8 for log10(1+x)/x, with x in*
32	[-1/3, 1/3] (offset=2/3). Max. relative error: 0x1.068ee468p-25. /*
33	.poly = { V4 (-`0x1.bcb79cp-3f`), V4 (`0x1.2879c8p-3f`), V4 (-`0x1.bcd472p-4f`),
34	V4 (`0x1.6408f8p-4f`), V4 (-`0x1.246f8p-4f`), V4 (`0x1.f0e514p-5f`),
35	V4 (-`0x1.0fc92cp-4f`), V4 (`0x1.f5f76ap-5f`) },
36	.ln2 = V4 (`0x1.62e43p-1f`),
37	.inv_ln10 = V4 (`0x1.bcb7b2p-2f`),
38	.min_norm = V4 (`0x00800000`),
39	.special_bound = V8 (`0x7f00`), / asuint32(inf) - min_norm. /
40	.off = V4 (`0x3f2aaaab`), / 0.666667. /
41	.mantissa_mask = V4 (`0x007fffff`),
42	};
43
44	static float32x4_t VPCS_ATTR NOINLINE
45	special_case (float32x4_t x, float32x4_t y, float32x4_t p, float32x4_t r2,
46	uint16x4_t cmp)
47	{
48	/ Fall back to scalar code. /
49	return v_call_f32 (log10f, x, vfmaq_f32 (y, p, r2), vmovl_u16 (cmp));
50	}
51
52	/ Fast implementation of AdvSIMD log10f,*
53	uses a similar approach as AdvSIMD logf with the same offset (i.e., 2/3) and
54	an order 9 polynomial.
55	Maximum error: 3.305ulps (nearest rounding.)
56	_ZGVnN4v_log10f(0x1.555c16p+0) got 0x1.ffe2fap-4
57	want 0x1.ffe2f4p-4. /*
58	float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (log10) (float32x4_t x)
59	{
60	const struct data *d = ptr_barrier (&data);
61	uint32x4_t u = vreinterpretq_u32_f32 (x);
62	uint16x4_t special = vcge_u16 (vsubhn_u32 (u, d->min_norm),
63	vget_low_u16 (d->special_bound));
64
65	/ x = 2^n * (1+r), where 2/3 < 1+r < 4/3. /
66	u = vsubq_u32 (u, d->off);
67	float32x4_t n = vcvtq_f32_s32 (
68	vshrq_n_s32 (vreinterpretq_s32_u32 (u), `23`)); / signextend. /
69	u = vaddq_u32 (vandq_u32 (u, d->mantissa_mask), d->off);
70	float32x4_t r = vsubq_f32 (vreinterpretq_f32_u32 (u), v_f32 (`1.0f`));
71
72	/ y = log10(1+r) + n * log10(2). /
73	float32x4_t r2 = vmulq_f32 (r, r);
74	float32x4_t poly = v_pw_horner_7_f32 (r, r2, d->poly);
75	/ y = Log10(2) * n + poly * InvLn(10). /
76	float32x4_t y = vfmaq_f32 (r, d->ln2, n);
77	y = vmulq_f32 (y, d->inv_ln10);
78
79	if (__glibc_unlikely (v_any_u16h (special)))
80	return special_case (x, y, poly, r2, special);
81	return vfmaq_f32 (y, poly, r2);
82	}
83	libmvec_hidden_def (V_NAME_F1 (log10))
84	HALF_WIDTH_ALIAS_F1 (log10)
85

source code of glibc/sysdeps/aarch64/fpu/log10f_advsimd.c