sinf_sve.c source code [glibc/sysdeps/aarch64/fpu/sinf_sve.c]

1	/ Single-precision vector (SVE) sin function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21
22	static const struct data
23	{
24	float poly[`4`];
25	/ Pi-related values to be loaded as one quad-word and used with*
26	svmla_lane. /*
27	float negpi1, negpi2, negpi3, invpi;
28	float shift;
29	} data = {
30	.poly = {
31	/ Non-zero coefficients from the degree 9 Taylor series expansion of*
32	sin. /*
33	-`0x1.555548p-3f`, `0x1.110df4p-7f`, -`0x1.9f42eap-13f`, `0x1.5b2e76p-19f`
34	},
35	.negpi1 = -`0x1.921fb6p+1f`,
36	.negpi2 = `0x1.777a5cp-24f`,
37	.negpi3 = `0x1.ee59dap-49f`,
38	.invpi = `0x1.45f306p-2f`,
39	.shift = `0x1.8p+23f`
40	};
41
42	#define RangeVal 0x49800000 /* asuint32 (0x1p20f). */
43	#define C(i) sv_f32 (d->poly[i])
44
45	static svfloat32_t NOINLINE
46	special_case (svfloat32_t x, svfloat32_t y, svbool_t cmp)
47	{
48	return sv_call_f32 (f: sinf, x, y, cmp);
49	}
50
51	/ A fast SVE implementation of sinf.*
52	Maximum error: 1.89 ULPs.
53	This maximum error is achieved at multiple values in [-2^18, 2^18]
54	but one example is:
55	SV_NAME_F1 (sin)(0x1.9247a4p+0) got 0x1.fffff6p-1 want 0x1.fffffap-1. /*
56	svfloat32_t SV_NAME_F1 (sin) (svfloat32_t x, const svbool_t pg)
57	{
58	const struct data *d = ptr_barrier (&data);
59
60	svfloat32_t ax = svabs_x (pg, x);
61	svuint32_t sign
62	= sveor_x (pg, svreinterpret_u32 (x), svreinterpret_u32 (ax));
63	svbool_t cmp = svcmpge (pg, svreinterpret_u32 (ax), RangeVal);
64
65	/ pi_vals are a quad-word of helper values - the first 3 elements contain*
66	-pi in extended precision, the last contains 1 / pi. /*
67	svfloat32_t pi_vals = svld1rq (svptrue_b32 (), &d->negpi1);
68
69	/ n = rint(\|x\|/pi). /
70	svfloat32_t n = svmla_lane (sv_f32 (x: d->shift), ax, pi_vals, `3`);
71	svuint32_t odd = svlsl_x (pg, svreinterpret_u32 (n), `31`);
72	n = svsub_x (pg, n, d->shift);
73
74	/ r = \|x\| - npi (range reduction into -pi/2 .. pi/2). /*
75	svfloat32_t r;
76	r = svmla_lane (ax, n, pi_vals, `0`);
77	r = svmla_lane (r, n, pi_vals, `1`);
78	r = svmla_lane (r, n, pi_vals, `2`);
79
80	/ sin(r) approx using a degree 9 polynomial from the Taylor series*
81	expansion. Note that only the odd terms of this are non-zero. /*
82	svfloat32_t r2 = svmul_x (pg, r, r);
83	svfloat32_t y;
84	y = svmla_x (pg, C (`2`), r2, C (`3`));
85	y = svmla_x (pg, C (`1`), r2, y);
86	y = svmla_x (pg, C (`0`), r2, y);
87	y = svmla_x (pg, r, r, svmul_x (pg, y, r2));
88
89	/ sign = y^sign^odd. /
90	sign = sveor_x (pg, sign, odd);
91
92	if (__glibc_unlikely (svptest_any (pg, cmp)))
93	return special_case (x,
94	y: svreinterpret_f32 (sveor_x (
95	svnot_z (pg, cmp), svreinterpret_u32 (y), sign)),
96	cmp);
97	return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
98	}
99

source code of glibc/sysdeps/aarch64/fpu/sinf_sve.c