NormalFloat.h source code [libc/src/__support/FPUtil/NormalFloat.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	//===-- A class to store a normalized floating point number ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10	#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11
12	#include "FPBits.h"
13
14	#include "src/__support/CPP/type_traits.h"
15	#include "src/__support/common.h"
16	#include "src/__support/macros/config.h"
17
18	#include <stdint.h>
19
20	namespace LIBC_NAMESPACE_DECL {
21	namespace fputil {
22
23	// A class which stores the normalized form of a floating point value.
24	// The special IEEE-754 bits patterns of Zero, infinity and NaNs are
25	// are not handled by this class.
26	//
27	// A normalized floating point number is of this form:
28	// (-1)sign 2^exponent * <mantissa>
29	// where <mantissa> is of the form 1.<...>.
30	template <typename T> struct NormalFloat {
31	static_assert(
32	cpp::is_floating_point_v<T>,
33	"NormalFloat template parameter has to be a floating point type.");
34
35	using StorageType = typename FPBits<T>::StorageType;
36	static constexpr StorageType ONE =
37	(StorageType(1) << FPBits<T>::FRACTION_LEN);
38
39	// Unbiased exponent value.
40	int32_t exponent;
41
42	StorageType mantissa;
43	// We want \|StorageType\| to have atleast one bit more than the actual mantissa
44	// bit width to accommodate the implicit 1 value.
45	static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
46	"Bad type for mantissa in NormalFloat.");
47
48	Sign sign = Sign::POS;
49
50	LIBC_INLINE NormalFloat(Sign s, int32_t e, StorageType m)
51	: exponent(e), mantissa(m), sign(s) {
52	if (mantissa >= ONE)
53	return;
54
55	unsigned normalization_shift = evaluate_normalization_shift(mantissa);
56	mantissa <<= normalization_shift;
57	exponent -= normalization_shift;
58	}
59
60	LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(FPBits<T>(x)); }
61
62	LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
63
64	// Compares this normalized number with another normalized number.
65	// Returns -1 is this number is less than \|other\|, 0 if this number is equal
66	// to \|other\|, and 1 if this number is greater than \|other\|.
67	LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
68	const int result = sign.is_neg() ? -1 : 1;
69	if (sign != other.sign)
70	return result;
71
72	if (exponent > other.exponent) {
73	return result;
74	} else if (exponent == other.exponent) {
75	if (mantissa > other.mantissa)
76	return result;
77	else if (mantissa == other.mantissa)
78	return 0;
79	else
80	return -result;
81	} else {
82	return -result;
83	}
84	}
85
86	// Returns a new normalized floating point number which is equal in value
87	// to this number multiplied by 2^e. That is:
88	// new = this * 2^e
89	LIBC_INLINE NormalFloat<T> mul2(int e) const {
90	NormalFloat<T> result = *this;
91	result.exponent += e;
92	return result;
93	}
94
95	LIBC_INLINE operator T() const {
96	int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
97	// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
98	constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
99	if (biased_exponent > MAX_EXPONENT_VALUE) {
100	return FPBits<T>::inf(sign).get_val();
101	}
102
103	FPBits<T> result(T(0.0));
104	result.set_sign(sign);
105
106	constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
107	if (exponent < SUBNORMAL_EXPONENT) {
108	unsigned shift = static_cast<unsigned>(SUBNORMAL_EXPONENT - exponent);
109	// Since exponent > subnormalExponent, shift is strictly greater than
110	// zero.
111	if (shift <= FPBits<T>::FRACTION_LEN + 1) {
112	// Generate a subnormal number. Might lead to loss of precision.
113	// We round to nearest and round halfway cases to even.
114	const StorageType shift_out_mask =
115	static_cast<StorageType>(StorageType(1) << shift) - 1;
116	const StorageType shift_out_value = mantissa & shift_out_mask;
117	const StorageType halfway_value =
118	static_cast<StorageType>(StorageType(1) << (shift - 1));
119	result.set_biased_exponent(0);
120	result.set_mantissa(mantissa >> shift);
121	StorageType new_mantissa = result.get_mantissa();
122	if (shift_out_value > halfway_value) {
123	new_mantissa += 1;
124	} else if (shift_out_value == halfway_value) {
125	// Round to even.
126	if (result.get_mantissa() & 0x1)
127	new_mantissa += 1;
128	}
129	result.set_mantissa(new_mantissa);
130	// Adding 1 to mantissa can lead to overflow. This can only happen if
131	// mantissa was all ones (0b111..11). For such a case, we will carry
132	// the overflow into the exponent.
133	if (new_mantissa == ONE)
134	result.set_biased_exponent(1);
135	return result.get_val();
136	} else {
137	return result.get_val();
138	}
139	}
140
141	result.set_biased_exponent(
142	static_cast<StorageType>(exponent + FPBits<T>::EXP_BIAS));
143	result.set_mantissa(mantissa);
144	return result.get_val();
145	}
146
147	private:
148	LIBC_INLINE void init_from_bits(FPBits<T> bits) {
149	sign = bits.sign();
150
151	if (bits.is_inf_or_nan() \|\| bits.is_zero()) {
152	// Ignore special bit patterns. Implementations deal with them separately
153	// anyway so this should not be a problem.
154	exponent = 0;
155	mantissa = 0;
156	return;
157	}
158
159	// Normalize subnormal numbers.
160	if (bits.is_subnormal()) {
161	unsigned shift = evaluate_normalization_shift(bits.get_mantissa());
162	mantissa = static_cast<StorageType>(bits.get_mantissa() << shift);
163	exponent = 1 - FPBits<T>::EXP_BIAS - static_cast<int32_t>(shift);
164	} else {
165	exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
166	mantissa = ONE \| bits.get_mantissa();
167	}
168	}
169
170	LIBC_INLINE unsigned evaluate_normalization_shift(StorageType m) {
171	unsigned shift = 0;
172	for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
173	m <<= 1, ++shift)
174	;
175	return shift;
176	}
177	};
178
179	#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
180	template <>
181	LIBC_INLINE void
182	NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
183	sign = bits.sign();
184
185	if (bits.is_inf_or_nan() \|\| bits.is_zero()) {
186	// Ignore special bit patterns. Implementations deal with them separately
187	// anyway so this should not be a problem.
188	exponent = 0;
189	mantissa = 0;
190	return;
191	}
192
193	if (bits.is_subnormal()) {
194	if (bits.get_implicit_bit() == 0) {
195	// Since we ignore zero value, the mantissa in this case is non-zero.
196	int normalization_shift =
197	evaluate_normalization_shift(bits.get_mantissa());
198	exponent = -16382 - normalization_shift;
199	mantissa = (bits.get_mantissa() << normalization_shift);
200	} else {
201	exponent = -16382;
202	mantissa = ONE \| bits.get_mantissa();
203	}
204	} else {
205	if (bits.get_implicit_bit() == 0) {
206	// Invalid number so just store 0 similar to a NaN.
207	exponent = 0;
208	mantissa = 0;
209	} else {
210	exponent = bits.get_biased_exponent() - 16383;
211	mantissa = ONE \| bits.get_mantissa();
212	}
213	}
214	}
215
216	template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
217	using LDBits = FPBits<long double>;
218	int biased_exponent = exponent + LDBits::EXP_BIAS;
219	// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
220	constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
221	if (biased_exponent > MAX_EXPONENT_VALUE) {
222	return LDBits::inf(sign).get_val();
223	}
224
225	FPBits<long double> result(0.0l);
226	result.set_sign(sign);
227
228	constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
229	if (exponent < SUBNORMAL_EXPONENT) {
230	unsigned shift = SUBNORMAL_EXPONENT - exponent;
231	if (shift <= LDBits::FRACTION_LEN + 1) {
232	// Generate a subnormal number. Might lead to loss of precision.
233	// We round to nearest and round halfway cases to even.
234	const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
235	const StorageType shift_out_value = mantissa & shift_out_mask;
236	const StorageType halfway_value = StorageType(1) << (shift - 1);
237	result.set_biased_exponent(0);
238	result.set_mantissa(mantissa >> shift);
239	StorageType new_mantissa = result.get_mantissa();
240	if (shift_out_value > halfway_value) {
241	new_mantissa += 1;
242	} else if (shift_out_value == halfway_value) {
243	// Round to even.
244	if (result.get_mantissa() & 0x1)
245	new_mantissa += 1;
246	}
247	result.set_mantissa(new_mantissa);
248	// Adding 1 to mantissa can lead to overflow. This can only happen if
249	// mantissa was all ones (0b111..11). For such a case, we will carry
250	// the overflow into the exponent and set the implicit bit to 1.
251	if (new_mantissa == ONE) {
252	result.set_biased_exponent(1);
253	result.set_implicit_bit(1);
254	} else {
255	result.set_implicit_bit(0);
256	}
257	return result.get_val();
258	} else {
259	return result.get_val();
260	}
261	}
262
263	result.set_biased_exponent(biased_exponent);
264	result.set_mantissa(mantissa);
265	result.set_implicit_bit(1);
266	return result.get_val();
267	}
268	#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
269
270	} // namespace fputil
271	} // namespace LIBC_NAMESPACE_DECL
272
273	#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
274

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

source code of libc/src/__support/FPUtil/NormalFloat.h