asin_sve.c source code [glibc/sysdeps/aarch64/fpu/asin_sve.c]

1	/ Double-precision SVE inverse sin*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "sv_math.h"
21	#include "poly_sve_f64.h"
22
23	static const struct data
24	{
25	float64_t poly[`12`];
26	float64_t pi_over_2f;
27	} data = {
28	/ Polynomial approximation of (asin(sqrt(x)) - sqrt(x)) / (x * sqrt(x))*
29	on [ 0x1p-106, 0x1p-2 ], relative error: 0x1.c3d8e169p-57. /*
30	.poly = { `0x1.555555555554ep-3`, `0x1.3333333337233p-4`,
31	`0x1.6db6db67f6d9fp-5`, `0x1.f1c71fbd29fbbp-6`,
32	`0x1.6e8b264d467d6p-6`, `0x1.1c5997c357e9dp-6`,
33	`0x1.c86a22cd9389dp-7`, `0x1.856073c22ebbep-7`,
34	`0x1.fd1151acb6bedp-8`, `0x1.087182f799c1dp-6`,
35	-`0x1.6602748120927p-7`, `0x1.cfa0dd1f9478p-6`, },
36	.pi_over_2f = `0x1.921fb54442d18p+0`,
37	};
38
39	#define P(i) sv_f64 (d->poly[i])
40
41	/ Double-precision SVE implementation of vector asin(x).*
42
43	For \|x\| in [0, 0.5], use an order 11 polynomial P such that the final
44	approximation is an odd polynomial: asin(x) ~ x + x^3 P(x^2).
45
46	The largest observed error in this region is 0.52 ulps,
47	_ZGVsMxv_asin(0x1.d95ae04998b6cp-2) got 0x1.ec13757305f27p-2
48	want 0x1.ec13757305f26p-2.
49
50	For \|x\| in [0.5, 1.0], use same approximation with a change of variable
51
52	asin(x) = pi/2 - (y + y z * P(z)), with z = (1-x)/2 and y = sqrt(z).*
53
54	The largest observed error in this region is 2.69 ulps,
55	_ZGVsMxv_asin(0x1.044ac9819f573p-1) got 0x1.110d7e85fdd5p-1
56	want 0x1.110d7e85fdd53p-1. /*
57	svfloat64_t SV_NAME_D1 (asin) (svfloat64_t x, const svbool_t pg)
58	{
59	const struct data *d = ptr_barrier (&data);
60
61	svuint64_t sign = svand_x (pg, svreinterpret_u64 (x), `0x8000000000000000`);
62	svfloat64_t ax = svabs_x (pg, x);
63	svbool_t a_ge_half = svacge (pg, x, `0.5`);
64
65	/ Evaluate polynomial Q(x) = y + y * z * P(z) with*
66	z = x ^ 2 and y = \|x\| , if \|x\| < 0.5
67	z = (1 - \|x\|) / 2 and y = sqrt(z), if \|x\| >= 0.5. /*
68	svfloat64_t z2 = svsel (a_ge_half, svmls_x (pg, sv_f64 (x: `0.5`), ax, `0.5`),
69	svmul_x (pg, x, x));
70	svfloat64_t z = svsqrt_m (ax, a_ge_half, z2);
71
72	/ Use a single polynomial approximation P for both intervals. /
73	svfloat64_t z4 = svmul_x (pg, z2, z2);
74	svfloat64_t z8 = svmul_x (pg, z4, z4);
75	svfloat64_t z16 = svmul_x (pg, z8, z8);
76	svfloat64_t p = sv_estrin_11_f64_x (pg, x: z2, x2: z4, x4: z8, x8: z16, poly: d->poly);
77	/ Finalize polynomial: z + z * z2 * P(z2). /
78	p = svmla_x (pg, z, svmul_x (pg, z, z2), p);
79
80	/ asin(\|x\|) = Q(\|x\|) , for \|x\| < 0.5*
81	= pi/2 - 2 Q(\|x\|), for \|x\| >= 0.5. /*
82	svfloat64_t y = svmad_m (a_ge_half, p, sv_f64 (x: -`2.0`), d->pi_over_2f);
83
84	/ Copy sign. /
85	return svreinterpret_f64 (svorr_x (pg, svreinterpret_u64 (y), sign));
86	}
87

source code of glibc/sysdeps/aarch64/fpu/asin_sve.c