1 | /* |
2 | * Single-precision vector sin function. |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | #include "mathlib.h" |
10 | #include "v_math.h" |
11 | #if V_SUPPORTED |
12 | |
13 | static const float Poly[] = { |
14 | /* 1.886 ulp error */ |
15 | 0x1.5b2e76p-19f, |
16 | -0x1.9f42eap-13f, |
17 | 0x1.110df4p-7f, |
18 | -0x1.555548p-3f, |
19 | }; |
20 | #define Pi1 v_f32 (0x1.921fb6p+1f) |
21 | #define Pi2 v_f32 (-0x1.777a5cp-24f) |
22 | #define Pi3 v_f32 (-0x1.ee59dap-49f) |
23 | #define A3 v_f32 (Poly[3]) |
24 | #define A5 v_f32 (Poly[2]) |
25 | #define A7 v_f32 (Poly[1]) |
26 | #define A9 v_f32 (Poly[0]) |
27 | #define RangeVal v_f32 (0x1p20f) |
28 | #define InvPi v_f32 (0x1.45f306p-2f) |
29 | #define Shift v_f32 (0x1.8p+23f) |
30 | #define AbsMask v_u32 (0x7fffffff) |
31 | |
32 | VPCS_ATTR |
33 | static v_f32_t |
34 | specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) |
35 | { |
36 | /* Fall back to scalar code. */ |
37 | return v_call_f32 (f: sinf, x, y, p: cmp); |
38 | } |
39 | |
40 | VPCS_ATTR |
41 | v_f32_t |
42 | V_NAME(sinf) (v_f32_t x) |
43 | { |
44 | v_f32_t n, r, r2, y; |
45 | v_u32_t sign, odd, cmp; |
46 | |
47 | r = v_as_f32_u32 (x: v_as_u32_f32 (x) & AbsMask); |
48 | sign = v_as_u32_f32 (x) & ~AbsMask; |
49 | cmp = v_cond_u32 (x: v_as_u32_f32 (x: r) >= v_as_u32_f32 (RangeVal)); |
50 | |
51 | /* n = rint(|x|/pi) */ |
52 | n = v_fma_f32 (InvPi, y: r, Shift); |
53 | odd = v_as_u32_f32 (x: n) << 31; |
54 | n -= Shift; |
55 | |
56 | /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ |
57 | r = v_fma_f32 (x: -Pi1, y: n, z: r); |
58 | r = v_fma_f32 (x: -Pi2, y: n, z: r); |
59 | r = v_fma_f32 (x: -Pi3, y: n, z: r); |
60 | |
61 | /* y = sin(r) */ |
62 | r2 = r * r; |
63 | y = v_fma_f32 (A9, y: r2, A7); |
64 | y = v_fma_f32 (x: y, y: r2, A5); |
65 | y = v_fma_f32 (x: y, y: r2, A3); |
66 | y = v_fma_f32 (x: y * r2, y: r, z: r); |
67 | |
68 | /* sign fix */ |
69 | y = v_as_f32_u32 (x: v_as_u32_f32 (x: y) ^ sign ^ odd); |
70 | |
71 | if (unlikely (v_any_u32 (cmp))) |
72 | return specialcase (x, y, cmp); |
73 | return y; |
74 | } |
75 | VPCS_ALIAS |
76 | #endif |
77 | |