1//===-- A class to store a normalized floating point number -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
10#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
11
12#include "FPBits.h"
13
14#include "src/__support/CPP/type_traits.h"
15#include "src/__support/common.h"
16
17#include <stdint.h>
18
19namespace LIBC_NAMESPACE {
20namespace fputil {
21
22// A class which stores the normalized form of a floating point value.
23// The special IEEE-754 bits patterns of Zero, infinity and NaNs are
24// are not handled by this class.
25//
26// A normalized floating point number is of this form:
27// (-1)*sign * 2^exponent * <mantissa>
28// where <mantissa> is of the form 1.<...>.
29template <typename T> struct NormalFloat {
30 static_assert(
31 cpp::is_floating_point_v<T>,
32 "NormalFloat template parameter has to be a floating point type.");
33
34 using StorageType = typename FPBits<T>::StorageType;
35 static constexpr StorageType ONE =
36 (StorageType(1) << FPBits<T>::FRACTION_LEN);
37
38 // Unbiased exponent value.
39 int32_t exponent;
40
41 StorageType mantissa;
42 // We want |StorageType| to have atleast one bit more than the actual mantissa
43 // bit width to accommodate the implicit 1 value.
44 static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
45 "Bad type for mantissa in NormalFloat.");
46
47 Sign sign = Sign::POS;
48
49 LIBC_INLINE NormalFloat(Sign s, int32_t e, StorageType m)
50 : exponent(e), mantissa(m), sign(s) {
51 if (mantissa >= ONE)
52 return;
53
54 unsigned normalization_shift = evaluate_normalization_shift(m: mantissa);
55 mantissa = mantissa << normalization_shift;
56 exponent -= normalization_shift;
57 }
58
59 LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(bits: FPBits<T>(x)); }
60
61 LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
62
63 // Compares this normalized number with another normalized number.
64 // Returns -1 is this number is less than |other|, 0 if this number is equal
65 // to |other|, and 1 if this number is greater than |other|.
66 LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
67 const int result = sign.is_neg() ? -1 : 1;
68 if (sign != other.sign)
69 return result;
70
71 if (exponent > other.exponent) {
72 return result;
73 } else if (exponent == other.exponent) {
74 if (mantissa > other.mantissa)
75 return result;
76 else if (mantissa == other.mantissa)
77 return 0;
78 else
79 return -result;
80 } else {
81 return -result;
82 }
83 }
84
85 // Returns a new normalized floating point number which is equal in value
86 // to this number multiplied by 2^e. That is:
87 // new = this * 2^e
88 LIBC_INLINE NormalFloat<T> mul2(int e) const {
89 NormalFloat<T> result = *this;
90 result.exponent += e;
91 return result;
92 }
93
94 LIBC_INLINE operator T() const {
95 int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
96 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
97 constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
98 if (biased_exponent > MAX_EXPONENT_VALUE) {
99 return FPBits<T>::inf(sign).get_val();
100 }
101
102 FPBits<T> result(T(0.0));
103 result.set_sign(sign);
104
105 constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
106 if (exponent < SUBNORMAL_EXPONENT) {
107 unsigned shift = SUBNORMAL_EXPONENT - exponent;
108 // Since exponent > subnormalExponent, shift is strictly greater than
109 // zero.
110 if (shift <= FPBits<T>::FRACTION_LEN + 1) {
111 // Generate a subnormal number. Might lead to loss of precision.
112 // We round to nearest and round halfway cases to even.
113 const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
114 const StorageType shift_out_value = mantissa & shift_out_mask;
115 const StorageType halfway_value = StorageType(1) << (shift - 1);
116 result.set_biased_exponent(0);
117 result.set_mantissa(mantissa >> shift);
118 StorageType new_mantissa = result.get_mantissa();
119 if (shift_out_value > halfway_value) {
120 new_mantissa += 1;
121 } else if (shift_out_value == halfway_value) {
122 // Round to even.
123 if (result.get_mantissa() & 0x1)
124 new_mantissa += 1;
125 }
126 result.set_mantissa(new_mantissa);
127 // Adding 1 to mantissa can lead to overflow. This can only happen if
128 // mantissa was all ones (0b111..11). For such a case, we will carry
129 // the overflow into the exponent.
130 if (new_mantissa == ONE)
131 result.set_biased_exponent(1);
132 return result.get_val();
133 } else {
134 return result.get_val();
135 }
136 }
137
138 result.set_biased_exponent(exponent + FPBits<T>::EXP_BIAS);
139 result.set_mantissa(mantissa);
140 return result.get_val();
141 }
142
143private:
144 LIBC_INLINE void init_from_bits(FPBits<T> bits) {
145 sign = bits.sign();
146
147 if (bits.is_inf_or_nan() || bits.is_zero()) {
148 // Ignore special bit patterns. Implementations deal with them separately
149 // anyway so this should not be a problem.
150 exponent = 0;
151 mantissa = 0;
152 return;
153 }
154
155 // Normalize subnormal numbers.
156 if (bits.is_subnormal()) {
157 unsigned shift = evaluate_normalization_shift(m: bits.get_mantissa());
158 mantissa = StorageType(bits.get_mantissa()) << shift;
159 exponent = 1 - FPBits<T>::EXP_BIAS - shift;
160 } else {
161 exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
162 mantissa = ONE | bits.get_mantissa();
163 }
164 }
165
166 LIBC_INLINE unsigned evaluate_normalization_shift(StorageType m) {
167 unsigned shift = 0;
168 for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
169 m <<= 1, ++shift)
170 ;
171 return shift;
172 }
173};
174
175#ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
176template <>
177LIBC_INLINE void
178NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
179 sign = bits.sign();
180
181 if (bits.is_inf_or_nan() || bits.is_zero()) {
182 // Ignore special bit patterns. Implementations deal with them separately
183 // anyway so this should not be a problem.
184 exponent = 0;
185 mantissa = 0;
186 return;
187 }
188
189 if (bits.is_subnormal()) {
190 if (bits.get_implicit_bit() == 0) {
191 // Since we ignore zero value, the mantissa in this case is non-zero.
192 int normalization_shift =
193 evaluate_normalization_shift(m: bits.get_mantissa());
194 exponent = -16382 - normalization_shift;
195 mantissa = (bits.get_mantissa() << normalization_shift);
196 } else {
197 exponent = -16382;
198 mantissa = ONE | bits.get_mantissa();
199 }
200 } else {
201 if (bits.get_implicit_bit() == 0) {
202 // Invalid number so just store 0 similar to a NaN.
203 exponent = 0;
204 mantissa = 0;
205 } else {
206 exponent = bits.get_biased_exponent() - 16383;
207 mantissa = ONE | bits.get_mantissa();
208 }
209 }
210}
211
212template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
213 using LDBits = FPBits<long double>;
214 int biased_exponent = exponent + LDBits::EXP_BIAS;
215 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
216 constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
217 if (biased_exponent > MAX_EXPONENT_VALUE) {
218 return LDBits::inf(sign).get_val();
219 }
220
221 FPBits<long double> result(0.0l);
222 result.set_sign(sign);
223
224 constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
225 if (exponent < SUBNORMAL_EXPONENT) {
226 unsigned shift = SUBNORMAL_EXPONENT - exponent;
227 if (shift <= LDBits::FRACTION_LEN + 1) {
228 // Generate a subnormal number. Might lead to loss of precision.
229 // We round to nearest and round halfway cases to even.
230 const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
231 const StorageType shift_out_value = mantissa & shift_out_mask;
232 const StorageType halfway_value = StorageType(1) << (shift - 1);
233 result.set_biased_exponent(0);
234 result.set_mantissa(mantissa >> shift);
235 StorageType new_mantissa = result.get_mantissa();
236 if (shift_out_value > halfway_value) {
237 new_mantissa += 1;
238 } else if (shift_out_value == halfway_value) {
239 // Round to even.
240 if (result.get_mantissa() & 0x1)
241 new_mantissa += 1;
242 }
243 result.set_mantissa(new_mantissa);
244 // Adding 1 to mantissa can lead to overflow. This can only happen if
245 // mantissa was all ones (0b111..11). For such a case, we will carry
246 // the overflow into the exponent and set the implicit bit to 1.
247 if (new_mantissa == ONE) {
248 result.set_biased_exponent(1);
249 result.set_implicit_bit(1);
250 } else {
251 result.set_implicit_bit(0);
252 }
253 return result.get_val();
254 } else {
255 return result.get_val();
256 }
257 }
258
259 result.set_biased_exponent(biased_exponent);
260 result.set_mantissa(mantissa);
261 result.set_implicit_bit(1);
262 return result.get_val();
263}
264#endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
265
266} // namespace fputil
267} // namespace LIBC_NAMESPACE
268
269#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
270

source code of libc/src/__support/FPUtil/NormalFloat.h