1//===-- Implementation of hypotf16 function -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/math/hypotf16.h"
10#include "src/__support/FPUtil/FEnvImpl.h"
11#include "src/__support/FPUtil/FPBits.h"
12#include "src/__support/FPUtil/cast.h"
13#include "src/__support/FPUtil/multiply_add.h"
14#include "src/__support/FPUtil/sqrt.h"
15#include "src/__support/common.h"
16#include "src/__support/macros/optimization.h"
17#include "src/__support/macros/properties/types.h"
18
19namespace LIBC_NAMESPACE_DECL {
20
21// For targets where conversion from float to float16 has to be
22// emulated, fputil::hypot<float16> is faster
23LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) {
24 using FloatBits = fputil::FPBits<float>;
25 using FPBits = fputil::FPBits<float16>;
26
27 FPBits x_abs = FPBits(x).abs();
28 FPBits y_abs = FPBits(y).abs();
29
30 bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
31
32 FPBits a_bits = x_abs_larger ? x_abs : y_abs;
33 FPBits b_bits = x_abs_larger ? y_abs : x_abs;
34
35 uint16_t a_u = a_bits.uintval();
36 uint16_t b_u = b_bits.uintval();
37
38 // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
39 // generates extra exponent bit masking instructions on x86-64.
40 if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
41 // x or y is inf or nan
42 if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) {
43 fputil::raise_except_if_required(FE_INVALID);
44 return FPBits::quiet_nan().get_val();
45 }
46 if (a_bits.is_inf() || b_bits.is_inf())
47 return FPBits::inf().get_val();
48 return a_bits.get_val();
49 }
50
51 // TODO: Investigate why replacing the return line below with:
52 // return x_bits.get_val() + y_bits.get_val();
53 // fails the hypotf16 smoke tests.
54 if (LIBC_UNLIKELY(a_u - b_u >=
55 static_cast<uint16_t>((FPBits::FRACTION_LEN + 2)
56 << FPBits::FRACTION_LEN)))
57 return a_bits.get_val() + b_bits.get_val();
58
59 float af = fputil::cast<float>(a_bits.get_val());
60 float bf = fputil::cast<float>(b_bits.get_val());
61
62 // These squares are exact.
63 float a_sq = af * af;
64 float sum_sq = fputil::multiply_add(bf, bf, a_sq);
65
66 FloatBits result(fputil::sqrt<float>(sum_sq));
67 uint32_t r_u = result.uintval();
68
69 // If any of the sticky bits of the result are non-zero, except the LSB, then
70 // the rounded result is correct.
71 if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0FFE) == 0)) {
72 float r_d = result.get_val();
73
74 // Perform rounding correction.
75 float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq);
76 float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
77
78 if (err > 0) {
79 r_u |= 1;
80 } else if ((err < 0) && (r_u & 1) == 0) {
81 r_u -= 1;
82 } else if ((r_u & 0x0000'1FFF) == 0) {
83 // The rounded result is exact.
84 fputil::clear_except_if_required(FE_INEXACT);
85 }
86 return fputil::cast<float16>(FloatBits(r_u).get_val());
87 }
88
89 return fputil::cast<float16>(result.get_val());
90}
91
92} // namespace LIBC_NAMESPACE_DECL
93

source code of libc/src/math/generic/hypotf16.cpp