1 | //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
10 | #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
11 | |
12 | // This is a helper class for use in floating-point conversions between |
13 | // binary and decimal representations. It holds a multiple-precision |
14 | // integer value using digits of a radix that is a large even power of ten |
15 | // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied |
16 | // by a signed exponent that denotes multiplication by a power of ten. |
17 | // The effective radix point is to the right of the digits (i.e., they do |
18 | // not represent a fraction). |
19 | // |
20 | // The operations supported by this class are limited to those required |
21 | // for conversions between binary and decimal representations; it is not |
22 | // a general-purpose facility. |
23 | |
24 | #include "flang/Common/bit-population-count.h" |
25 | #include "flang/Common/leading-zero-bit-count.h" |
26 | #include "flang/Common/uint128.h" |
27 | #include "flang/Decimal/binary-floating-point.h" |
28 | #include "flang/Decimal/decimal.h" |
29 | #include <cinttypes> |
30 | #include <limits> |
31 | #include <type_traits> |
32 | |
33 | // Some environments, viz. glibc 2.17, allow the macro HUGE |
34 | // to leak out of <math.h>. |
35 | #undef HUGE |
36 | |
37 | namespace Fortran::decimal { |
38 | |
39 | static constexpr std::uint64_t TenToThe(int power) { |
40 | return power <= 0 ? 1 : 10 * TenToThe(power: power - 1); |
41 | } |
42 | |
43 | // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be |
44 | // even, so that pairs of decimal digits do not straddle Digits. |
45 | // So LOG10RADIX must be 16 or 6. |
46 | template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber { |
47 | public: |
48 | using Real = BinaryFloatingPointNumber<PREC>; |
49 | static constexpr int log10Radix{LOG10RADIX}; |
50 | |
51 | private: |
52 | static constexpr std::uint64_t uint64Radix{TenToThe(power: log10Radix)}; |
53 | static constexpr int minDigitBits{ |
54 | 64 - common::LeadingZeroBitCount(uint64Radix)}; |
55 | using Digit = common::HostUnsignedIntType<minDigitBits>; |
56 | static constexpr Digit radix{uint64Radix}; |
57 | static_assert(radix < std::numeric_limits<Digit>::max() / 1000, |
58 | "radix is somehow too big" ); |
59 | static_assert(radix > std::numeric_limits<Digit>::max() / 10000, |
60 | "radix is somehow too small" ); |
61 | |
62 | // The base-2 logarithm of the least significant bit that can arise |
63 | // in a subnormal IEEE floating-point number. |
64 | static constexpr int minLog2AnyBit{ |
65 | -Real::exponentBias - Real::binaryPrecision}; |
66 | |
67 | // The number of Digits needed to represent the smallest subnormal. |
68 | static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; |
69 | |
70 | public: |
71 | explicit RT_API_ATTRS BigRadixFloatingPointNumber( |
72 | enum FortranRounding rounding = RoundNearest) |
73 | : rounding_{rounding} {} |
74 | |
75 | // Converts a binary floating point value. |
76 | explicit RT_API_ATTRS BigRadixFloatingPointNumber( |
77 | Real, enum FortranRounding = RoundNearest); |
78 | |
79 | RT_API_ATTRS BigRadixFloatingPointNumber &SetToZero() { |
80 | isNegative_ = false; |
81 | digits_ = 0; |
82 | exponent_ = 0; |
83 | return *this; |
84 | } |
85 | |
86 | RT_API_ATTRS bool IsInteger() const { return exponent_ >= 0; } |
87 | |
88 | // Converts decimal floating-point to binary. |
89 | RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary(); |
90 | |
91 | // Parses and converts to binary. Handles leading spaces, |
92 | // "NaN", & optionally-signed "Inf". Does not skip internal |
93 | // spaces. |
94 | // The argument is a reference to a pointer that is left |
95 | // pointing to the first character that wasn't parsed. |
96 | RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary( |
97 | const char *&, const char *end = nullptr); |
98 | |
99 | // Formats a decimal floating-point number to a user buffer. |
100 | // May emit "NaN" or "Inf", or an possibly-signed integer. |
101 | // No decimal point is written, but if it were, it would be |
102 | // after the last digit; the effective decimal exponent is |
103 | // returned as part of the result structure so that it can be |
104 | // formatted by the client. |
105 | RT_API_ATTRS ConversionToDecimalResult ConvertToDecimal( |
106 | char *, std::size_t, enum DecimalConversionFlags, int digits) const; |
107 | |
108 | // Discard decimal digits not needed to distinguish this value |
109 | // from the decimal encodings of two others (viz., the nearest binary |
110 | // floating-point numbers immediately below and above this one). |
111 | // The last decimal digit may not be uniquely determined in all |
112 | // cases, and will be the mean value when that is so (e.g., if |
113 | // last decimal digit values 6-8 would all work, it'll be a 7). |
114 | // This minimization necessarily assumes that the value will be |
115 | // emitted and read back into the same (or less precise) format |
116 | // with default rounding to the nearest value. |
117 | RT_API_ATTRS void Minimize( |
118 | BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); |
119 | |
120 | template <typename STREAM> STREAM &Dump(STREAM &) const; |
121 | |
122 | private: |
123 | RT_API_ATTRS BigRadixFloatingPointNumber( |
124 | const BigRadixFloatingPointNumber &that) |
125 | : digits_{that.digits_}, exponent_{that.exponent_}, |
126 | isNegative_{that.isNegative_}, rounding_{that.rounding_} { |
127 | for (int j{0}; j < digits_; ++j) { |
128 | digit_[j] = that.digit_[j]; |
129 | } |
130 | } |
131 | |
132 | RT_API_ATTRS bool IsZero() const { |
133 | // Don't assume normalization. |
134 | for (int j{0}; j < digits_; ++j) { |
135 | if (digit_[j] != 0) { |
136 | return false; |
137 | } |
138 | } |
139 | return true; |
140 | } |
141 | |
142 | // Predicate: true when 10*value would cause a carry. |
143 | // (When this happens during decimal-to-binary conversion, |
144 | // there are more digits in the input string than can be |
145 | // represented precisely.) |
146 | RT_API_ATTRS bool IsFull() const { |
147 | return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10; |
148 | } |
149 | |
150 | // Sets *this to an unsigned integer value. |
151 | // Returns any remainder. |
152 | template <typename UINT> RT_API_ATTRS UINT SetTo(UINT n) { |
153 | static_assert( |
154 | std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>); |
155 | SetToZero(); |
156 | while (n != 0) { |
157 | auto q{n / 10u}; |
158 | if (n != q * 10) { |
159 | break; |
160 | } |
161 | ++exponent_; |
162 | n = q; |
163 | } |
164 | if constexpr (sizeof n < sizeof(Digit)) { |
165 | if (n != 0) { |
166 | digit_[digits_++] = n; |
167 | } |
168 | return 0; |
169 | } else { |
170 | while (n != 0 && digits_ < digitLimit_) { |
171 | auto q{n / radix}; |
172 | digit_[digits_++] = static_cast<Digit>(n - q * radix); |
173 | n = q; |
174 | } |
175 | return n; |
176 | } |
177 | } |
178 | |
179 | RT_API_ATTRS int RemoveLeastOrderZeroDigits() { |
180 | int remove{0}; |
181 | if (digits_ > 0 && digit_[0] == 0) { |
182 | while (remove < digits_ && digit_[remove] == 0) { |
183 | ++remove; |
184 | } |
185 | if (remove >= digits_) { |
186 | digits_ = 0; |
187 | } else if (remove > 0) { |
188 | #if defined __GNUC__ && __GNUC__ < 8 |
189 | // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure |
190 | // on -Werror=array-bounds. This can be removed if -Werror is disable. |
191 | for (int j{0}; j + remove < digits_ && (j + remove < maxDigits); ++j) { |
192 | #else |
193 | for (int j{0}; j + remove < digits_; ++j) { |
194 | #endif |
195 | digit_[j] = digit_[j + remove]; |
196 | } |
197 | digits_ -= remove; |
198 | } |
199 | } |
200 | return remove; |
201 | } |
202 | |
203 | RT_API_ATTRS void RemoveLeadingZeroDigits() { |
204 | while (digits_ > 0 && digit_[digits_ - 1] == 0) { |
205 | --digits_; |
206 | } |
207 | } |
208 | |
209 | RT_API_ATTRS void Normalize() { |
210 | RemoveLeadingZeroDigits(); |
211 | exponent_ += RemoveLeastOrderZeroDigits() * log10Radix; |
212 | } |
213 | |
214 | // This limited divisibility test only works for even divisors of the radix, |
215 | // which is fine since it's only ever used with 2 and 5. |
216 | template <int N> RT_API_ATTRS bool IsDivisibleBy() const { |
217 | static_assert(N > 1 && radix % N == 0, "bad modulus" ); |
218 | return digits_ == 0 || (digit_[0] % N) == 0; |
219 | } |
220 | |
221 | template <unsigned DIVISOR> RT_API_ATTRS int DivideBy() { |
222 | Digit remainder{0}; |
223 | for (int j{digits_ - 1}; j >= 0; --j) { |
224 | Digit q{digit_[j] / DIVISOR}; |
225 | Digit nrem{digit_[j] - DIVISOR * q}; |
226 | digit_[j] = q + (radix / DIVISOR) * remainder; |
227 | remainder = nrem; |
228 | } |
229 | return remainder; |
230 | } |
231 | |
232 | RT_API_ATTRS void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix |
233 | Digit remainder{0}; |
234 | auto mask{(Digit{1} << twoPow) - 1}; |
235 | auto coeff{radix >> twoPow}; |
236 | for (int j{digits_ - 1}; j >= 0; --j) { |
237 | auto nrem{digit_[j] & mask}; |
238 | digit_[j] = (digit_[j] >> twoPow) + coeff * remainder; |
239 | remainder = nrem; |
240 | } |
241 | } |
242 | |
243 | // Returns true on overflow |
244 | RT_API_ATTRS bool DivideByPowerOfTwoInPlace(int twoPow) { |
245 | if (digits_ > 0) { |
246 | while (twoPow > 0) { |
247 | int chunk{twoPow > log10Radix ? log10Radix : twoPow}; |
248 | if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) { |
249 | DivideByPowerOfTwo(chunk); |
250 | twoPow -= chunk; |
251 | continue; |
252 | } |
253 | twoPow -= chunk; |
254 | if (digit_[digits_ - 1] >> chunk != 0) { |
255 | if (digits_ == digitLimit_) { |
256 | return true; // overflow |
257 | } |
258 | digit_[digits_++] = 0; |
259 | } |
260 | auto remainder{digit_[digits_ - 1]}; |
261 | exponent_ -= log10Radix; |
262 | auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix |
263 | auto mask{(Digit{1} << chunk) - 1}; |
264 | for (int j{digits_ - 1}; j >= 1; --j) { |
265 | digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder; |
266 | remainder = digit_[j - 1] & mask; |
267 | } |
268 | digit_[0] = coeff * remainder; |
269 | } |
270 | } |
271 | return false; // no overflow |
272 | } |
273 | |
274 | RT_API_ATTRS int AddCarry(int position = 0, int carry = 1) { |
275 | for (; position < digits_; ++position) { |
276 | Digit v{digit_[position] + carry}; |
277 | if (v < radix) { |
278 | digit_[position] = v; |
279 | return 0; |
280 | } |
281 | digit_[position] = v - radix; |
282 | carry = 1; |
283 | } |
284 | if (digits_ < digitLimit_) { |
285 | digit_[digits_++] = carry; |
286 | return 0; |
287 | } |
288 | Normalize(); |
289 | if (digits_ < digitLimit_) { |
290 | digit_[digits_++] = carry; |
291 | return 0; |
292 | } |
293 | return carry; |
294 | } |
295 | |
296 | RT_API_ATTRS void Decrement() { |
297 | for (int j{0}; digit_[j]-- == 0; ++j) { |
298 | digit_[j] = radix - 1; |
299 | } |
300 | } |
301 | |
302 | template <int N> RT_API_ATTRS int MultiplyByHelper(int carry = 0) { |
303 | for (int j{0}; j < digits_; ++j) { |
304 | auto v{N * digit_[j] + carry}; |
305 | carry = v / radix; |
306 | digit_[j] = v - carry * radix; // i.e., v % radix |
307 | } |
308 | return carry; |
309 | } |
310 | |
311 | template <int N> RT_API_ATTRS int MultiplyBy(int carry = 0) { |
312 | if (int newCarry{MultiplyByHelper<N>(carry)}) { |
313 | return AddCarry(digits_, newCarry); |
314 | } else { |
315 | return 0; |
316 | } |
317 | } |
318 | |
319 | template <int N> RT_API_ATTRS int MultiplyWithoutNormalization() { |
320 | if (int carry{MultiplyByHelper<N>(0)}) { |
321 | if (digits_ < digitLimit_) { |
322 | digit_[digits_++] = carry; |
323 | return 0; |
324 | } else { |
325 | return carry; |
326 | } |
327 | } else { |
328 | return 0; |
329 | } |
330 | } |
331 | |
332 | RT_API_ATTRS void LoseLeastSignificantDigit(); // with rounding |
333 | |
334 | RT_API_ATTRS void PushCarry(int carry) { |
335 | if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) { |
336 | LoseLeastSignificantDigit(); |
337 | digit_[digits_ - 1] += carry; |
338 | } else { |
339 | digit_[digits_++] = carry; |
340 | } |
341 | } |
342 | |
343 | // Adds another number and then divides by two. |
344 | // Assumes same exponent and sign. |
345 | // Returns true when the result has effectively been rounded down. |
346 | RT_API_ATTRS bool Mean(const BigRadixFloatingPointNumber &); |
347 | |
348 | // Parses a floating-point number; leaves the pointer reference |
349 | // argument pointing at the next character after what was recognized. |
350 | // The "end" argument can be left null if the caller is sure that the |
351 | // string is properly terminated with an addressable character that |
352 | // can't be in a valid floating-point character. |
353 | RT_API_ATTRS bool ParseNumber(const char *&, bool &inexact, const char *end); |
354 | |
355 | using Raw = typename Real::RawType; |
356 | constexpr RT_API_ATTRS Raw SignBit() const { |
357 | return Raw{isNegative_} << (Real::bits - 1); |
358 | } |
359 | constexpr RT_API_ATTRS Raw Infinity() const { |
360 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
361 | result <<= Real::significandBits; |
362 | result |= SignBit(); |
363 | if constexpr (Real::bits == 80) { // x87 |
364 | result |= Raw{1} << 63; |
365 | } |
366 | return result; |
367 | } |
368 | constexpr RT_API_ATTRS Raw NaN(bool isQuiet = true) { |
369 | Raw result{Real::maxExponent}; |
370 | result <<= Real::significandBits; |
371 | result |= SignBit(); |
372 | if constexpr (Real::bits == 80) { // x87 |
373 | result |= Raw{isQuiet ? 3u : 2u} << 62; |
374 | } else { |
375 | Raw quiet{isQuiet ? Raw{2} : Raw{1}}; |
376 | quiet <<= Real::significandBits - 2; |
377 | result |= quiet; |
378 | } |
379 | return result; |
380 | } |
381 | constexpr RT_API_ATTRS Raw HUGE() const { |
382 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
383 | result <<= Real::significandBits; |
384 | result |= SignBit(); |
385 | return result - 1; // decrement exponent, set all significand bits |
386 | } |
387 | |
388 | Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD |
389 | int digits_{0}; // # of elements in digit_[] array; zero when zero |
390 | int digitLimit_{maxDigits}; // precision clamp |
391 | int exponent_{0}; // signed power of ten |
392 | bool isNegative_{false}; |
393 | enum FortranRounding rounding_ { RoundNearest }; |
394 | }; |
395 | } // namespace Fortran::decimal |
396 | #endif |
397 | |