1 | /* |
2 | * Single-precision vector cos function. |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | #include "mathlib.h" |
10 | #include "v_math.h" |
11 | #if V_SUPPORTED |
12 | |
13 | static const float Poly[] = { |
14 | /* 1.886 ulp error */ |
15 | 0x1.5b2e76p-19f, |
16 | -0x1.9f42eap-13f, |
17 | 0x1.110df4p-7f, |
18 | -0x1.555548p-3f, |
19 | }; |
20 | #define Pi1 v_f32 (0x1.921fb6p+1f) |
21 | #define Pi2 v_f32 (-0x1.777a5cp-24f) |
22 | #define Pi3 v_f32 (-0x1.ee59dap-49f) |
23 | #define A3 v_f32 (Poly[3]) |
24 | #define A5 v_f32 (Poly[2]) |
25 | #define A7 v_f32 (Poly[1]) |
26 | #define A9 v_f32 (Poly[0]) |
27 | #define RangeVal v_f32 (0x1p20f) |
28 | #define InvPi v_f32 (0x1.45f306p-2f) |
29 | #define Shift v_f32 (0x1.8p+23f) |
30 | #define AbsMask v_u32 (0x7fffffff) |
31 | #define HalfPi v_f32 (0x1.921fb6p0f) |
32 | |
33 | VPCS_ATTR |
34 | static v_f32_t |
35 | specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) |
36 | { |
37 | /* Fall back to scalar code. */ |
38 | return v_call_f32 (cosf, x, y, cmp); |
39 | } |
40 | |
41 | VPCS_ATTR |
42 | v_f32_t |
43 | V_NAME(cosf) (v_f32_t x) |
44 | { |
45 | v_f32_t n, r, r2, y; |
46 | v_u32_t odd, cmp; |
47 | |
48 | r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); |
49 | cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); |
50 | |
51 | /* n = rint((|x|+pi/2)/pi) - 0.5 */ |
52 | n = v_fma_f32 (InvPi, r + HalfPi, Shift); |
53 | odd = v_as_u32_f32 (n) << 31; |
54 | n -= Shift; |
55 | n -= v_f32 (0.5f); |
56 | |
57 | /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ |
58 | r = v_fma_f32 (-Pi1, n, r); |
59 | r = v_fma_f32 (-Pi2, n, r); |
60 | r = v_fma_f32 (-Pi3, n, r); |
61 | |
62 | /* y = sin(r) */ |
63 | r2 = r * r; |
64 | y = v_fma_f32 (A9, r2, A7); |
65 | y = v_fma_f32 (y, r2, A5); |
66 | y = v_fma_f32 (y, r2, A3); |
67 | y = v_fma_f32 (y * r2, r, r); |
68 | |
69 | /* sign fix */ |
70 | y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd); |
71 | |
72 | if (unlikely (v_any_u32 (cmp))) |
73 | return specialcase (x, y, cmp); |
74 | return y; |
75 | } |
76 | VPCS_ALIAS |
77 | #endif |
78 | |