sin_sve.c source code [glibc/sysdeps/aarch64/fpu/sin_sve.c]

1	/ Double-precision vector (SVE) sin function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21
22	static const struct data
23	{
24	double inv_pi, pi_1, pi_2, pi_3, shift, range_val;
25	double poly[`7`];
26	} data = {
27	.poly = { -`0x1.555555555547bp-3`, `0x1.1111111108a4dp-7`, -`0x1.a01a019936f27p-13`,
28	`0x1.71de37a97d93ep-19`, -`0x1.ae633919987c6p-26`,
29	`0x1.60e277ae07cecp-33`, -`0x1.9e9540300a1p-41`, },
30
31	.inv_pi = `0x1.45f306dc9c883p-2`,
32	.pi_1 = `0x1.921fb54442d18p+1`,
33	.pi_2 = `0x1.1a62633145c06p-53`,
34	.pi_3 = `0x1.c1cd129024e09p-106`,
35	.shift = `0x1.8p52`,
36	.range_val = `0x1p23`,
37	};
38
39	#define C(i) sv_f64 (d->poly[i])
40
41	static svfloat64_t NOINLINE
42	special_case (svfloat64_t x, svfloat64_t y, svbool_t cmp)
43	{
44	return sv_call_f64 (f: sin, x, y, cmp);
45	}
46
47	/ A fast SVE implementation of sin.*
48	Maximum observed error in [-pi/2, pi/2], where argument is not reduced,
49	is 2.87 ULP:
50	_ZGVsMxv_sin (0x1.921d5c6a07142p+0) got 0x1.fffffffa7dc02p-1
51	want 0x1.fffffffa7dc05p-1
52	Maximum observed error in the entire non-special domain ([-2^23, 2^23])
53	is 3.22 ULP:
54	_ZGVsMxv_sin (0x1.5702447b6f17bp+22) got 0x1.ffdcd125c84fbp-3
55	want 0x1.ffdcd125c84f8p-3. /*
56	svfloat64_t SV_NAME_D1 (sin) (svfloat64_t x, const svbool_t pg)
57	{
58	const struct data *d = ptr_barrier (&data);
59
60	/ Load some values in quad-word chunks to minimise memory access. /
61	const svbool_t ptrue = svptrue_b64 ();
62	svfloat64_t shift = sv_f64 (x: d->shift);
63	svfloat64_t inv_pi_and_pi1 = svld1rq (ptrue, &d->inv_pi);
64	svfloat64_t pi2_and_pi3 = svld1rq (ptrue, &d->pi_2);
65
66	/ n = rint(\|x\|/pi). /
67	svfloat64_t n = svmla_lane (shift, x, inv_pi_and_pi1, `0`);
68	svuint64_t odd = svlsl_x (pg, svreinterpret_u64 (n), `63`);
69	n = svsub_x (pg, n, shift);
70
71	/ r = \|x\| - n(pi/2) (range reduction into -pi/2 .. pi/2). /*
72	svfloat64_t r = x;
73	r = svmls_lane (r, n, inv_pi_and_pi1, `1`);
74	r = svmls_lane (r, n, pi2_and_pi3, `0`);
75	r = svmls_lane (r, n, pi2_and_pi3, `1`);
76
77	/ sin(r) poly approx. /
78	svfloat64_t r2 = svmul_x (pg, r, r);
79	svfloat64_t r3 = svmul_x (pg, r2, r);
80	svfloat64_t r4 = svmul_x (pg, r2, r2);
81
82	svfloat64_t t1 = svmla_x (pg, C (`4`), C (`5`), r2);
83	svfloat64_t t2 = svmla_x (pg, C (`2`), C (`3`), r2);
84	svfloat64_t t3 = svmla_x (pg, C (`0`), C (`1`), r2);
85
86	svfloat64_t y = svmla_x (pg, t1, C (`6`), r4);
87	y = svmla_x (pg, t2, y, r4);
88	y = svmla_x (pg, t3, y, r4);
89	y = svmla_x (pg, r, y, r3);
90
91	svbool_t cmp = svacle (pg, x, d->range_val);
92	cmp = svnot_z (pg, cmp);
93	if (__glibc_unlikely (svptest_any (pg, cmp)))
94	return special_case (x,
95	y: svreinterpret_f64 (sveor_z (
96	svnot_z (pg, cmp), svreinterpret_u64 (y), odd)),
97	cmp);
98
99	/ Copy sign. /
100	return svreinterpret_f64 (sveor_z (pg, svreinterpret_u64 (y), odd));
101	}
102

source code of glibc/sysdeps/aarch64/fpu/sin_sve.c