1 | //===-- Implementation of hypotf function ---------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | #include "src/math/hypotf.h" |
9 | #include "src/__support/FPUtil/FEnvImpl.h" |
10 | #include "src/__support/FPUtil/FPBits.h" |
11 | #include "src/__support/FPUtil/double_double.h" |
12 | #include "src/__support/FPUtil/multiply_add.h" |
13 | #include "src/__support/FPUtil/sqrt.h" |
14 | #include "src/__support/common.h" |
15 | #include "src/__support/macros/config.h" |
16 | #include "src/__support/macros/optimization.h" |
17 | |
18 | namespace LIBC_NAMESPACE_DECL { |
19 | |
20 | LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { |
21 | using DoubleBits = fputil::FPBits<double>; |
22 | using FPBits = fputil::FPBits<float>; |
23 | |
24 | FPBits x_abs = FPBits(x).abs(); |
25 | FPBits y_abs = FPBits(y).abs(); |
26 | |
27 | bool x_abs_larger = x_abs.uintval() >= y_abs.uintval(); |
28 | |
29 | FPBits a_bits = x_abs_larger ? x_abs : y_abs; |
30 | FPBits b_bits = x_abs_larger ? y_abs : x_abs; |
31 | |
32 | uint32_t a_u = a_bits.uintval(); |
33 | uint32_t b_u = b_bits.uintval(); |
34 | |
35 | // Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()` |
36 | // generates extra exponent bit masking instructions on x86-64. |
37 | if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) { |
38 | // x or y is inf or nan |
39 | if (a_bits.is_signaling_nan() || b_bits.is_signaling_nan()) { |
40 | fputil::raise_except_if_required(FE_INVALID); |
41 | return FPBits::quiet_nan().get_val(); |
42 | } |
43 | if (a_bits.is_inf() || b_bits.is_inf()) |
44 | return FPBits::inf().get_val(); |
45 | return a_bits.get_val(); |
46 | } |
47 | |
48 | if (LIBC_UNLIKELY(a_u - b_u >= |
49 | static_cast<uint32_t>((FPBits::FRACTION_LEN + 2) |
50 | << FPBits::FRACTION_LEN))) |
51 | return x_abs.get_val() + y_abs.get_val(); |
52 | |
53 | double ad = static_cast<double>(a_bits.get_val()); |
54 | double bd = static_cast<double>(b_bits.get_val()); |
55 | |
56 | // These squares are exact. |
57 | double a_sq = ad * ad; |
58 | #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
59 | double sum_sq = fputil::multiply_add(bd, bd, a_sq); |
60 | #else |
61 | double b_sq = bd * bd; |
62 | double sum_sq = a_sq + b_sq; |
63 | #endif |
64 | |
65 | // Take sqrt in double precision. |
66 | DoubleBits result(fputil::sqrt<double>(sum_sq)); |
67 | uint64_t r_u = result.uintval(); |
68 | |
69 | // If any of the sticky bits of the result are non-zero, except the LSB, then |
70 | // the rounded result is correct. |
71 | if (LIBC_UNLIKELY(((r_u + 1) & 0x0000'0000'0FFF'FFFE) == 0)) { |
72 | double r_d = result.get_val(); |
73 | |
74 | // Perform rounding correction. |
75 | #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
76 | double sum_sq_lo = fputil::multiply_add(bd, bd, a_sq - sum_sq); |
77 | double err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq); |
78 | #else |
79 | fputil::DoubleDouble r_sq = fputil::exact_mult(r_d, r_d); |
80 | double sum_sq_lo = b_sq - (sum_sq - a_sq); |
81 | double err = (sum_sq - r_sq.hi) + (sum_sq_lo - r_sq.lo); |
82 | #endif |
83 | |
84 | if (err > 0) { |
85 | r_u |= 1; |
86 | } else if ((err < 0) && (r_u & 1) == 0) { |
87 | r_u -= 1; |
88 | } else if ((r_u & 0x0000'0000'1FFF'FFFF) == 0) { |
89 | // The rounded result is exact. |
90 | fputil::clear_except_if_required(FE_INEXACT); |
91 | } |
92 | return static_cast<float>(DoubleBits(r_u).get_val()); |
93 | } |
94 | |
95 | return static_cast<float>(result.get_val()); |
96 | } |
97 | |
98 | } // namespace LIBC_NAMESPACE_DECL |
99 | |