hypotf16.cpp source code [libc/src/math/generic/hypotf16.cpp]

1	//===-- Implementation of hypotf16 function -------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/math/hypotf16.h"
10	#include "src/__support/FPUtil/FEnvImpl.h"
11	#include "src/__support/FPUtil/FPBits.h"
12	#include "src/__support/FPUtil/cast.h"
13	#include "src/__support/FPUtil/multiply_add.h"
14	#include "src/__support/FPUtil/sqrt.h"
15	#include "src/__support/common.h"
16	#include "src/__support/macros/optimization.h"
17	#include "src/__support/macros/properties/types.h"
18
19	namespace LIBC_NAMESPACE_DECL {
20
21	// For targets where conversion from float to float16 has to be
22	// emulated, fputil::hypot<float16> is faster
23	LLVM_LIBC_FUNCTION(float16, hypotf16, (float16 x, float16 y)) {
24	using FloatBits = fputil::FPBits<float>;
25	using FPBits = fputil::FPBits<float16>;
26
27	FPBits x_abs = FPBits(x).abs();
28	FPBits y_abs = FPBits(y).abs();
29
30	bool x_abs_larger = x_abs.uintval() >= y_abs.uintval();
31
32	FPBits a_bits = x_abs_larger ? x_abs : y_abs;
33	FPBits b_bits = x_abs_larger ? y_abs : x_abs;
34
35	uint16_t a_u = a_bits.uintval();
36	uint16_t b_u = b_bits.uintval();
37
38	// Note: replacing `a_u >= FPBits::EXP_MASK` with `a_bits.is_inf_or_nan()`
39	// generates extra exponent bit masking instructions on x86-64.
40	if (LIBC_UNLIKELY(a_u >= FPBits::EXP_MASK)) {
41	// x or y is inf or nan
42	if (a_bits.is_signaling_nan() \|\| b_bits.is_signaling_nan()) {
43	fputil::raise_except_if_required(FE_INVALID);
44	return FPBits::quiet_nan().get_val();
45	}
46	if (a_bits.is_inf() \|\| b_bits.is_inf())
47	return FPBits::inf().get_val();
48	return a_bits.get_val();
49	}
50
51	// TODO: Investigate why replacing the return line below with:
52	// return x_bits.get_val() + y_bits.get_val();
53	// fails the hypotf16 smoke tests.
54	if (LIBC_UNLIKELY(a_u - b_u >=
55	static_cast<uint16_t>((FPBits::FRACTION_LEN + `2`)
56	<< FPBits::FRACTION_LEN)))
57	return a_bits.get_val() + b_bits.get_val();
58
59	float af = fputil::cast<float>(a_bits.get_val());
60	float bf = fputil::cast<float>(b_bits.get_val());
61
62	// These squares are exact.
63	float a_sq = af * af;
64	float sum_sq = fputil::multiply_add(bf, bf, a_sq);
65
66	FloatBits result(fputil::sqrt<float>(sum_sq));
67	uint32_t r_u = result.uintval();
68
69	// If any of the sticky bits of the result are non-zero, except the LSB, then
70	// the rounded result is correct.
71	if (LIBC_UNLIKELY(((r_u + `1`) & `0x0000'0FFE`) == `0`)) {
72	float r_d = result.get_val();
73
74	// Perform rounding correction.
75	float sum_sq_lo = fputil::multiply_add(bf, bf, a_sq - sum_sq);
76	float err = sum_sq_lo - fputil::multiply_add(r_d, r_d, -sum_sq);
77
78	if (err > `0`) {
79	r_u \|= `1`;
80	} else if ((err < `0`) && (r_u & `1`) == `0`) {
81	r_u -= `1`;
82	} else if ((r_u & `0x0000'1FFF`) == `0`) {
83	// The rounded result is exact.
84	fputil::clear_except_if_required(FE_INEXACT);
85	}
86	return fputil::cast<float16>(FloatBits(r_u).get_val());
87	}
88
89	return fputil::cast<float16>(result.get_val());
90	}
91
92	} // namespace LIBC_NAMESPACE_DECL
93

source code of libc/src/math/generic/hypotf16.cpp