log2f_sve.c source code [glibc/sysdeps/aarch64/fpu/log2f_sve.c]

1	/ Single-precision vector (SVE) log2 function*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21
22	static const struct data
23	{
24	float poly_02468[`5`];
25	float poly_1357[`4`];
26	} data = {
27	.poly_1357 = {
28	/ Coefficients copied from the AdvSIMD routine, then rearranged so that coeffs*
29	1, 3, 5 and 7 can be loaded as a single quad-word, hence used with _lane
30	variant of MLA intrinsic. /*
31	-`0x1.715458p-1f`, -`0x1.7171a4p-2f`, -`0x1.e5143ep-3f`, -`0x1.c675bp-3f`
32	},
33	.poly_02468 = { `0x1.715476p0f`, `0x1.ec701cp-2f`, `0x1.27a0b8p-2f`,
34	`0x1.9d8ecap-3f`, `0x1.9e495p-3f` },
35	};
36
37	#define Min (0x00800000)
38	#define Max (0x7f800000)
39	#define Thres (0x7f000000) /* Max - Min. */
40	#define MantissaMask (0x007fffff)
41	#define Off (0x3f2aaaab) /* 0.666667. */
42
43	static svfloat32_t NOINLINE
44	special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
45	{
46	return sv_call_f32 (f: log2f, x, y, cmp);
47	}
48
49	/ Optimised implementation of SVE log2f, using the same algorithm*
50	and polynomial as AdvSIMD log2f.
51	Maximum error is 2.48 ULPs:
52	SV_NAME_F1 (log2)(0x1.558174p+0) got 0x1.a9be84p-2
53	want 0x1.a9be8p-2. /*
54	svfloat32_t SV_NAME_F1 (log2) (svfloat32_t x, const svbool_t pg)
55	{
56	const struct data *d = ptr_barrier (&data);
57
58	svuint32_t u = svreinterpret_u32 (x);
59	svbool_t special = svcmpge (pg, svsub_x (pg, u, Min), Thres);
60
61	/ x = 2^n * (1+r), where 2/3 < 1+r < 4/3. /
62	u = svsub_x (pg, u, Off);
63	svfloat32_t n = svcvt_f32_x (
64	pg, svasr_x (pg, svreinterpret_s32 (u), `23`)); / Sign-extend. /
65	u = svand_x (pg, u, MantissaMask);
66	u = svadd_x (pg, u, Off);
67	svfloat32_t r = svsub_x (pg, svreinterpret_f32 (u), `1.0f`);
68
69	/ y = log2(1+r) + n. /
70	svfloat32_t r2 = svmul_x (pg, r, r);
71
72	/ Evaluate polynomial using pairwise Horner scheme. /
73	svfloat32_t p_1357 = svld1rq (svptrue_b32 (), &d->poly_1357[`0`]);
74	svfloat32_t q_01 = svmla_lane (sv_f32 (x: d->poly_02468[`0`]), r, p_1357, `0`);
75	svfloat32_t q_23 = svmla_lane (sv_f32 (x: d->poly_02468[`1`]), r, p_1357, `1`);
76	svfloat32_t q_45 = svmla_lane (sv_f32 (x: d->poly_02468[`2`]), r, p_1357, `2`);
77	svfloat32_t q_67 = svmla_lane (sv_f32 (x: d->poly_02468[`3`]), r, p_1357, `3`);
78	svfloat32_t y = svmla_x (pg, q_67, r2, sv_f32 (x: d->poly_02468[`4`]));
79	y = svmla_x (pg, q_45, r2, y);
80	y = svmla_x (pg, q_23, r2, y);
81	y = svmla_x (pg, q_01, r2, y);
82
83	if (__glibc_unlikely (svptest_any (pg, special)))
84	return special_case (x, y: svmla_x (svnot_z (pg, special), n, r, y), cmp: special);
85	return svmla_x (pg, n, r, y);
86	}
87

source code of glibc/sysdeps/aarch64/fpu/log2f_sve.c