cosf_advsimd.c source code [glibc/sysdeps/aarch64/fpu/cosf_advsimd.c]

1	/ Single-precision vector (Advanced SIMD) cos function.*
2
3	Copyright (C) 2023-2024 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#include "v_math.h"
21
22	static const struct data
23	{
24	float32x4_t poly[`4`];
25	float32x4_t range_val, inv_pi, pi_1, pi_2, pi_3;
26	} data = {
27	/ 1.886 ulp error. /
28	.poly = { V4 (-`0x1.555548p-3f`), V4 (`0x1.110df4p-7f`), V4 (-`0x1.9f42eap-13f`),
29	V4 (`0x1.5b2e76p-19f`) },
30
31	.pi_1 = V4 (`0x1.921fb6p+1f`),
32	.pi_2 = V4 (-`0x1.777a5cp-24f`),
33	.pi_3 = V4 (-`0x1.ee59dap-49f`),
34
35	.inv_pi = V4 (`0x1.45f306p-2f`),
36	.range_val = V4 (`0x1p20f`)
37	};
38
39	#define C(i) d->poly[i]
40
41	static float32x4_t VPCS_ATTR NOINLINE
42	special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
43	{
44	/ Fall back to scalar code. /
45	y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
46	return v_call_f32 (cosf, x, y, cmp);
47	}
48
49	float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x)
50	{
51	const struct data *d = ptr_barrier (&data);
52	float32x4_t n, r, r2, r3, y;
53	uint32x4_t odd, cmp;
54
55	#if WANT_SIMD_EXCEPT
56	r = vabsq_f32 (x);
57	cmp = vcgeq_u32 (vreinterpretq_u32_f32 (r),
58	vreinterpretq_u32_f32 (d->range_val));
59	if (__glibc_unlikely (v_any_u32 (cmp)))
60	/ If fenv exceptions are to be triggered correctly, set any special lanes*
61	to 1 (which is neutral w.r.t. fenv). These lanes will be fixed by
62	special-case handler later. /*
63	r = vbslq_f32 (cmp, v_f32 (`1.0f`), r);
64	#else
65	cmp = vcageq_f32 (x, d->range_val);
66	r = x;
67	#endif
68
69	/ n = rint((\|x\|+pi/2)/pi) - 0.5. /
70	n = vrndaq_f32 (vfmaq_f32 (v_f32 (`0.5`), r, d->inv_pi));
71	odd = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), `31`);
72	n = vsubq_f32 (n, v_f32 (`0.5f`));
73
74	/ r = \|x\| - npi (range reduction into -pi/2 .. pi/2). /*
75	r = vfmsq_f32 (r, d->pi_1, n);
76	r = vfmsq_f32 (r, d->pi_2, n);
77	r = vfmsq_f32 (r, d->pi_3, n);
78
79	/ y = sin(r). /
80	r2 = vmulq_f32 (r, r);
81	r3 = vmulq_f32 (r2, r);
82	y = vfmaq_f32 (C (`2`), C (`3`), r2);
83	y = vfmaq_f32 (C (`1`), y, r2);
84	y = vfmaq_f32 (C (`0`), y, r2);
85	y = vfmaq_f32 (r, y, r3);
86
87	if (__glibc_unlikely (v_any_u32 (cmp)))
88	return special_case (x, y, odd, cmp);
89	return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
90	}
91	libmvec_hidden_def (V_NAME_F1 (cos))
92	HALF_WIDTH_ALIAS_F1 (cos)
93

source code of glibc/sysdeps/aarch64/fpu/cosf_advsimd.c