logf_sve.c source code [glibc/sysdeps/aarch64/fpu/logf_sve.c]

1	/ Single-precision vector (SVE) log function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21
22	static const struct data
23	{
24	float poly_0135[`4`];
25	float poly_246[`3`];
26	float ln2;
27	} data = {
28	.poly_0135 = {
29	/ Coefficients copied from the AdvSIMD routine in math/, then rearranged so*
30	that coeffs 0, 1, 3 and 5 can be loaded as a single quad-word, hence used
31	with _lane variant of MLA intrinsic. /*
32	-`0x1.3e737cp-3f`, `0x1.5a9aa2p-3f`, `0x1.961348p-3f`, `0x1.555d7cp-2f`
33	},
34	.poly_246 = { -`0x1.4f9934p-3f`, -`0x1.00187cp-2f`, -`0x1.ffffc8p-2f` },
35	.ln2 = `0x1.62e43p-1f`
36	};
37
38	#define Min (0x00800000)
39	#define Max (0x7f800000)
40	#define Thresh (0x7f000000) /* Max - Min. */
41	#define Mask (0x007fffff)
42	#define Off (0x3f2aaaab) /* 0.666667. */
43
44	static svfloat32_t NOINLINE
45	special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
46	{
47	return sv_call_f32 (f: logf, x, y, cmp);
48	}
49
50	/ Optimised implementation of SVE logf, using the same algorithm and*
51	polynomial as the AdvSIMD routine. Maximum error is 3.34 ULPs:
52	SV_NAME_F1 (log)(0x1.557298p+0) got 0x1.26edecp-2
53	want 0x1.26ede6p-2. /*
54	svfloat32_t SV_NAME_F1 (log) (svfloat32_t x, const svbool_t pg)
55	{
56	const struct data *d = ptr_barrier (&data);
57
58	svuint32_t u = svreinterpret_u32 (x);
59	svbool_t cmp = svcmpge (pg, svsub_x (pg, u, Min), Thresh);
60
61	/ x = 2^n * (1+r), where 2/3 < 1+r < 4/3. /
62	u = svsub_x (pg, u, Off);
63	svfloat32_t n = svcvt_f32_x (
64	pg, svasr_x (pg, svreinterpret_s32 (u), `23`)); / Sign-extend. /
65	u = svand_x (pg, u, Mask);
66	u = svadd_x (pg, u, Off);
67	svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), `1.0f`);
68
69	/ y = log(1+r) + nln2. /*
70	svfloat32_t r2 = svmul_x (pg, r, r);
71	/ nln2 + r + r2(P6 + rP5 + r2(P4 + rP3 + r2(P2 + rP1 + r2P0))). /
72	svfloat32_t p_0135 = svld1rq (svptrue_b32 (), &d->poly_0135[`0`]);
73	svfloat32_t p = svmla_lane (sv_f32 (x: d->poly_246[`0`]), r, p_0135, `1`);
74	svfloat32_t q = svmla_lane (sv_f32 (x: d->poly_246[`1`]), r, p_0135, `2`);
75	svfloat32_t y = svmla_lane (sv_f32 (x: d->poly_246[`2`]), r, p_0135, `3`);
76	p = svmla_lane (p, r2, p_0135, `0`);
77
78	q = svmla_x (pg, q, r2, p);
79	y = svmla_x (pg, y, r2, q);
80	p = svmla_x (pg, r, n, d->ln2);
81
82	if (__glibc_unlikely (svptest_any (pg, cmp)))
83	return special_case (x, y: svmla_x (svnot_z (pg, cmp), p, r2, y), cmp);
84	return svmla_x (pg, p, r2, y);
85	}
86

source code of glibc/sysdeps/aarch64/fpu/logf_sve.c