sinf_advsimd.c source code [glibc/sysdeps/aarch64/fpu/sinf_advsimd.c]

1	/ Single-precision vector (Advanced SIMD) sin function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21
22	static const struct data
23	{
24	float32x4_t poly[`4`];
25	float32x4_t range_val, inv_pi, shift, pi_1, pi_2, pi_3;
26	} data = {
27	/ 1.886 ulp error. /
28	.poly = { V4 (-`0x1.555548p-3f`), V4 (`0x1.110df4p-7f`), V4 (-`0x1.9f42eap-13f`),
29	V4 (`0x1.5b2e76p-19f`) },
30
31	.pi_1 = V4 (`0x1.921fb6p+1f`),
32	.pi_2 = V4 (-`0x1.777a5cp-24f`),
33	.pi_3 = V4 (-`0x1.ee59dap-49f`),
34
35	.inv_pi = V4 (`0x1.45f306p-2f`),
36	.shift = V4 (`0x1.8p+23f`),
37	.range_val = V4 (`0x1p20f`)
38	};
39
40	#if WANT_SIMD_EXCEPT
41	# define TinyBound v_u32 (0x21000000) /* asuint32(0x1p-61f). */
42	# define Thresh v_u32 (0x28800000) /* RangeVal - TinyBound. */
43	#endif
44
45	#define C(i) d->poly[i]
46
47	static float32x4_t VPCS_ATTR NOINLINE
48	special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
49	{
50	/ Fall back to scalar code. /
51	y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
52	return v_call_f32 (sinf, x, y, cmp);
53	}
54
55	float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x)
56	{
57	const struct data *d = ptr_barrier (&data);
58	float32x4_t n, r, r2, y;
59	uint32x4_t odd, cmp;
60
61	#if WANT_SIMD_EXCEPT
62	uint32x4_t ir = vreinterpretq_u32_f32 (vabsq_f32 (x));
63	cmp = vcgeq_u32 (vsubq_u32 (ir, TinyBound), Thresh);
64	/ If fenv exceptions are to be triggered correctly, set any special lanes*
65	to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
66	special-case handler later. /*
67	r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x);
68	#else
69	r = x;
70	cmp = vcageq_f32 (x, d->range_val);
71	#endif
72
73	/ n = rint(\|x\|/pi) /
74	n = vfmaq_f32 (d->shift, d->inv_pi, r);
75	odd = vshlq_n_u32 (vreinterpretq_u32_f32 (n), `31`);
76	n = vsubq_f32 (n, d->shift);
77
78	/ r = \|x\| - npi (range reduction into -pi/2 .. pi/2) /*
79	r = vfmsq_f32 (r, d->pi_1, n);
80	r = vfmsq_f32 (r, d->pi_2, n);
81	r = vfmsq_f32 (r, d->pi_3, n);
82
83	/ y = sin(r) /
84	r2 = vmulq_f32 (r, r);
85	y = vfmaq_f32 (C (`2`), C (`3`), r2);
86	y = vfmaq_f32 (C (`1`), y, r2);
87	y = vfmaq_f32 (C (`0`), y, r2);
88	y = vfmaq_f32 (r, vmulq_f32 (y, r2), r);
89
90	if (__glibc_unlikely (v_any_u32 (cmp)))
91	return special_case (x, y, odd, cmp);
92	return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
93	}
94	libmvec_hidden_def (V_NAME_F1 (sin))
95	HALF_WIDTH_ALIAS_F1 (sin)
96

source code of glibc/sysdeps/aarch64/fpu/sinf_advsimd.c