1//===-- Half-precision acosf16(x) function --------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
6//
7//
8//===----------------------------------------------------------------------===//
9
10#include "src/math/acosf16.h"
11#include "hdr/errno_macros.h"
12#include "hdr/fenv_macros.h"
13#include "src/__support/FPUtil/FEnvImpl.h"
14#include "src/__support/FPUtil/FPBits.h"
15#include "src/__support/FPUtil/PolyEval.h"
16#include "src/__support/FPUtil/cast.h"
17#include "src/__support/FPUtil/except_value_utils.h"
18#include "src/__support/FPUtil/multiply_add.h"
19#include "src/__support/FPUtil/sqrt.h"
20#include "src/__support/macros/optimization.h"
21
22namespace LIBC_NAMESPACE_DECL {
23
24// Generated by Sollya using the following command:
25// > round(pi/2, SG, RN);
26// > round(pi, SG, RN);
27static constexpr float PI_OVER_2 = 0x1.921fb6p0f;
28static constexpr float PI = 0x1.921fb6p1f;
29
30#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
31static constexpr size_t N_EXCEPTS = 2;
32
33static constexpr fputil::ExceptValues<float16, N_EXCEPTS> ACOSF16_EXCEPTS{{
34 // (input, RZ output, RU offset, RD offset, RN offset)
35 {0xacaf, 0x3e93, 1, 0, 0},
36 {0xb874, 0x4052, 1, 0, 1},
37}};
38#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
39
40LLVM_LIBC_FUNCTION(float16, acosf16, (float16 x)) {
41 using FPBits = fputil::FPBits<float16>;
42 FPBits xbits(x);
43
44 uint16_t x_u = xbits.uintval();
45 uint16_t x_abs = x_u & 0x7fff;
46 uint16_t x_sign = x_u >> 15;
47
48 // |x| > 0x1p0, |x| > 1, or x is NaN.
49 if (LIBC_UNLIKELY(x_abs > 0x3c00)) {
50 // acosf16(NaN) = NaN
51 if (xbits.is_nan()) {
52 if (xbits.is_signaling_nan()) {
53 fputil::raise_except_if_required(FE_INVALID);
54 return FPBits::quiet_nan().get_val();
55 }
56
57 return x;
58 }
59
60 // 1 < |x| <= +/-inf
61 fputil::raise_except_if_required(FE_INVALID);
62 fputil::set_errno_if_required(EDOM);
63
64 return FPBits::quiet_nan().get_val();
65 }
66
67 float xf = x;
68
69#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
70 // Handle exceptional values
71 if (auto r = ACOSF16_EXCEPTS.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
72 return r.value();
73#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
74
75 // |x| == 0x1p0, x is 1 or -1
76 // if x is (-)1, return pi, else
77 // if x is (+)1, return 0
78 if (LIBC_UNLIKELY(x_abs == 0x3c00))
79 return fputil::cast<float16>(x_sign ? PI : 0.0f);
80
81 float xsq = xf * xf;
82
83 // |x| <= 0x1p-1, |x| <= 0.5
84 if (x_abs <= 0x3800) {
85 // if x is 0, return pi/2
86 if (LIBC_UNLIKELY(x_abs == 0))
87 return fputil::cast<float16>(PI_OVER_2);
88
89 // Note that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
90 // Degree-6 minimax polynomial of asin(x) generated by Sollya with:
91 // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
92 float interm =
93 fputil::polyeval(xsq, 0x1.000002p0f, 0x1.554c2ap-3f, 0x1.3541ccp-4f,
94 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
95 return fputil::cast<float16>(fputil::multiply_add(-xf, interm, PI_OVER_2));
96 }
97
98 // When |x| > 0.5, assume that 0.5 < |x| <= 1
99 //
100 // Step-by-step range-reduction proof:
101 // 1: Let y = asin(x), such that, x = sin(y)
102 // 2: From complimentary angle identity:
103 // x = sin(y) = cos(pi/2 - y)
104 // 3: Let z = pi/2 - y, such that x = cos(z)
105 // 4: From double angle formula; cos(2A) = 1 - 2 * sin^2(A):
106 // z = 2A, z/2 = A
107 // cos(z) = 1 - 2 * sin^2(z/2)
108 // 5: Make sin(z/2) subject of the formula:
109 // sin(z/2) = sqrt((1 - cos(z))/2)
110 // 6: Recall [3]; x = cos(z). Therefore:
111 // sin(z/2) = sqrt((1 - x)/2)
112 // 7: Let u = (1 - x)/2
113 // 8: Therefore:
114 // asin(sqrt(u)) = z/2
115 // 2 * asin(sqrt(u)) = z
116 // 9: Recall [3]; z = pi/2 - y. Therefore:
117 // y = pi/2 - z
118 // y = pi/2 - 2 * asin(sqrt(u))
119 // 10: Recall [1], y = asin(x). Therefore:
120 // asin(x) = pi/2 - 2 * asin(sqrt(u))
121 // 11: Recall that: acos(x) = pi/2 + asin(-x) = pi/2 - asin(x)
122 // Therefore:
123 // acos(x) = pi/2 - (pi/2 - 2 * asin(sqrt(u)))
124 // acos(x) = 2 * asin(sqrt(u))
125 //
126 // THE RANGE REDUCTION, HOW?
127 // 12: Recall [7], u = (1 - x)/2
128 // 13: Since 0.5 < x <= 1, therefore:
129 // 0 <= u <= 0.25 and 0 <= sqrt(u) <= 0.5
130 //
131 // Hence, we can reuse the same [0, 0.5] domain polynomial approximation for
132 // Step [11] as `sqrt(u)` is in range.
133 // When -1 < x <= -0.5, the identity:
134 // acos(x) = pi - acos(-x)
135 // allows us to compute for the negative x value (lhs)
136 // with a positive x value instead (rhs).
137
138 float xf_abs = (xf < 0 ? -xf : xf);
139 float u = fputil::multiply_add(-0.5f, xf_abs, 0.5f);
140 float sqrt_u = fputil::sqrt<float>(u);
141
142 // Degree-6 minimax polynomial of asin(x) generated by Sollya with:
143 // > P = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8|], [|SG...|], [0, 0.5]);
144 float asin_sqrt_u =
145 sqrt_u * fputil::polyeval(u, 0x1.000002p0f, 0x1.554c2ap-3f,
146 0x1.3541ccp-4f, 0x1.43b2d6p-5f, 0x1.a0d73ep-5f);
147
148 return fputil::cast<float16>(
149 x_sign ? fputil::multiply_add(-2.0f, asin_sqrt_u, PI) : 2 * asin_sqrt_u);
150}
151} // namespace LIBC_NAMESPACE_DECL
152

source code of libc/src/math/generic/acosf16.cpp