1 | //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
10 | #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
11 | |
12 | // This is a helper class for use in floating-point conversions between |
13 | // binary and decimal representations. It holds a multiple-precision |
14 | // integer value using digits of a radix that is a large even power of ten |
15 | // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied |
16 | // by a signed exponent that denotes multiplication by a power of ten. |
17 | // The effective radix point is to the right of the digits (i.e., they do |
18 | // not represent a fraction). |
19 | // |
20 | // The operations supported by this class are limited to those required |
21 | // for conversions between binary and decimal representations; it is not |
22 | // a general-purpose facility. |
23 | |
24 | #include "flang/Common/bit-population-count.h" |
25 | #include "flang/Common/leading-zero-bit-count.h" |
26 | #include "flang/Common/uint128.h" |
27 | #include "flang/Decimal/binary-floating-point.h" |
28 | #include "flang/Decimal/decimal.h" |
29 | #include <cinttypes> |
30 | #include <limits> |
31 | #include <type_traits> |
32 | |
33 | namespace Fortran::decimal { |
34 | |
35 | static constexpr std::uint64_t TenToThe(int power) { |
36 | return power <= 0 ? 1 : 10 * TenToThe(power: power - 1); |
37 | } |
38 | |
39 | // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be |
40 | // even, so that pairs of decimal digits do not straddle Digits. |
41 | // So LOG10RADIX must be 16 or 6. |
42 | template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber { |
43 | public: |
44 | using Real = BinaryFloatingPointNumber<PREC>; |
45 | static constexpr int log10Radix{LOG10RADIX}; |
46 | |
47 | private: |
48 | static constexpr std::uint64_t uint64Radix{TenToThe(power: log10Radix)}; |
49 | static constexpr int minDigitBits{ |
50 | 64 - common::LeadingZeroBitCount(uint64Radix)}; |
51 | using Digit = common::HostUnsignedIntType<minDigitBits>; |
52 | static constexpr Digit radix{uint64Radix}; |
53 | static_assert(radix < std::numeric_limits<Digit>::max() / 1000, |
54 | "radix is somehow too big" ); |
55 | static_assert(radix > std::numeric_limits<Digit>::max() / 10000, |
56 | "radix is somehow too small" ); |
57 | |
58 | // The base-2 logarithm of the least significant bit that can arise |
59 | // in a subnormal IEEE floating-point number. |
60 | static constexpr int minLog2AnyBit{ |
61 | -Real::exponentBias - Real::binaryPrecision}; |
62 | |
63 | // The number of Digits needed to represent the smallest subnormal. |
64 | static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; |
65 | |
66 | public: |
67 | explicit BigRadixFloatingPointNumber( |
68 | enum FortranRounding rounding = RoundNearest) |
69 | : rounding_{rounding} {} |
70 | |
71 | // Converts a binary floating point value. |
72 | explicit BigRadixFloatingPointNumber( |
73 | Real, enum FortranRounding = RoundNearest); |
74 | |
75 | BigRadixFloatingPointNumber &SetToZero() { |
76 | isNegative_ = false; |
77 | digits_ = 0; |
78 | exponent_ = 0; |
79 | return *this; |
80 | } |
81 | |
82 | // Converts decimal floating-point to binary. |
83 | ConversionToBinaryResult<PREC> ConvertToBinary(); |
84 | |
85 | // Parses and converts to binary. Handles leading spaces, |
86 | // "NaN", & optionally-signed "Inf". Does not skip internal |
87 | // spaces. |
88 | // The argument is a reference to a pointer that is left |
89 | // pointing to the first character that wasn't parsed. |
90 | ConversionToBinaryResult<PREC> ConvertToBinary( |
91 | const char *&, const char *end = nullptr); |
92 | |
93 | // Formats a decimal floating-point number to a user buffer. |
94 | // May emit "NaN" or "Inf", or an possibly-signed integer. |
95 | // No decimal point is written, but if it were, it would be |
96 | // after the last digit; the effective decimal exponent is |
97 | // returned as part of the result structure so that it can be |
98 | // formatted by the client. |
99 | ConversionToDecimalResult ConvertToDecimal( |
100 | char *, std::size_t, enum DecimalConversionFlags, int digits) const; |
101 | |
102 | // Discard decimal digits not needed to distinguish this value |
103 | // from the decimal encodings of two others (viz., the nearest binary |
104 | // floating-point numbers immediately below and above this one). |
105 | // The last decimal digit may not be uniquely determined in all |
106 | // cases, and will be the mean value when that is so (e.g., if |
107 | // last decimal digit values 6-8 would all work, it'll be a 7). |
108 | // This minimization necessarily assumes that the value will be |
109 | // emitted and read back into the same (or less precise) format |
110 | // with default rounding to the nearest value. |
111 | void Minimize( |
112 | BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); |
113 | |
114 | template <typename STREAM> STREAM &Dump(STREAM &) const; |
115 | |
116 | private: |
117 | BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that) |
118 | : digits_{that.digits_}, exponent_{that.exponent_}, |
119 | isNegative_{that.isNegative_}, rounding_{that.rounding_} { |
120 | for (int j{0}; j < digits_; ++j) { |
121 | digit_[j] = that.digit_[j]; |
122 | } |
123 | } |
124 | |
125 | bool IsZero() const { |
126 | // Don't assume normalization. |
127 | for (int j{0}; j < digits_; ++j) { |
128 | if (digit_[j] != 0) { |
129 | return false; |
130 | } |
131 | } |
132 | return true; |
133 | } |
134 | |
135 | // Predicate: true when 10*value would cause a carry. |
136 | // (When this happens during decimal-to-binary conversion, |
137 | // there are more digits in the input string than can be |
138 | // represented precisely.) |
139 | bool IsFull() const { |
140 | return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10; |
141 | } |
142 | |
143 | // Sets *this to an unsigned integer value. |
144 | // Returns any remainder. |
145 | template <typename UINT> UINT SetTo(UINT n) { |
146 | static_assert( |
147 | std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>); |
148 | SetToZero(); |
149 | while (n != 0) { |
150 | auto q{n / 10u}; |
151 | if (n != q * 10) { |
152 | break; |
153 | } |
154 | ++exponent_; |
155 | n = q; |
156 | } |
157 | if constexpr (sizeof n < sizeof(Digit)) { |
158 | if (n != 0) { |
159 | digit_[digits_++] = n; |
160 | } |
161 | return 0; |
162 | } else { |
163 | while (n != 0 && digits_ < digitLimit_) { |
164 | auto q{n / radix}; |
165 | digit_[digits_++] = static_cast<Digit>(n - q * radix); |
166 | n = q; |
167 | } |
168 | return n; |
169 | } |
170 | } |
171 | |
172 | int RemoveLeastOrderZeroDigits() { |
173 | int remove{0}; |
174 | if (digits_ > 0 && digit_[0] == 0) { |
175 | while (remove < digits_ && digit_[remove] == 0) { |
176 | ++remove; |
177 | } |
178 | if (remove >= digits_) { |
179 | digits_ = 0; |
180 | } else if (remove > 0) { |
181 | #if defined __GNUC__ && __GNUC__ < 8 |
182 | // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure |
183 | // on -Werror=array-bounds. This can be removed if -Werror is disable. |
184 | for (int j{0}; j + remove < digits_ && (j + remove < maxDigits); ++j) { |
185 | #else |
186 | for (int j{0}; j + remove < digits_; ++j) { |
187 | #endif |
188 | digit_[j] = digit_[j + remove]; |
189 | } |
190 | digits_ -= remove; |
191 | } |
192 | } |
193 | return remove; |
194 | } |
195 | |
196 | void RemoveLeadingZeroDigits() { |
197 | while (digits_ > 0 && digit_[digits_ - 1] == 0) { |
198 | --digits_; |
199 | } |
200 | } |
201 | |
202 | void Normalize() { |
203 | RemoveLeadingZeroDigits(); |
204 | exponent_ += RemoveLeastOrderZeroDigits() * log10Radix; |
205 | } |
206 | |
207 | // This limited divisibility test only works for even divisors of the radix, |
208 | // which is fine since it's only ever used with 2 and 5. |
209 | template <int N> bool IsDivisibleBy() const { |
210 | static_assert(N > 1 && radix % N == 0, "bad modulus" ); |
211 | return digits_ == 0 || (digit_[0] % N) == 0; |
212 | } |
213 | |
214 | template <unsigned DIVISOR> int DivideBy() { |
215 | Digit remainder{0}; |
216 | for (int j{digits_ - 1}; j >= 0; --j) { |
217 | Digit q{digit_[j] / DIVISOR}; |
218 | Digit nrem{digit_[j] - DIVISOR * q}; |
219 | digit_[j] = q + (radix / DIVISOR) * remainder; |
220 | remainder = nrem; |
221 | } |
222 | return remainder; |
223 | } |
224 | |
225 | void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix |
226 | Digit remainder{0}; |
227 | auto mask{(Digit{1} << twoPow) - 1}; |
228 | auto coeff{radix >> twoPow}; |
229 | for (int j{digits_ - 1}; j >= 0; --j) { |
230 | auto nrem{digit_[j] & mask}; |
231 | digit_[j] = (digit_[j] >> twoPow) + coeff * remainder; |
232 | remainder = nrem; |
233 | } |
234 | } |
235 | |
236 | // Returns true on overflow |
237 | bool DivideByPowerOfTwoInPlace(int twoPow) { |
238 | if (digits_ > 0) { |
239 | while (twoPow > 0) { |
240 | int chunk{twoPow > log10Radix ? log10Radix : twoPow}; |
241 | if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) { |
242 | DivideByPowerOfTwo(twoPow: chunk); |
243 | twoPow -= chunk; |
244 | continue; |
245 | } |
246 | twoPow -= chunk; |
247 | if (digit_[digits_ - 1] >> chunk != 0) { |
248 | if (digits_ == digitLimit_) { |
249 | return true; // overflow |
250 | } |
251 | digit_[digits_++] = 0; |
252 | } |
253 | auto remainder{digit_[digits_ - 1]}; |
254 | exponent_ -= log10Radix; |
255 | auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix |
256 | auto mask{(Digit{1} << chunk) - 1}; |
257 | for (int j{digits_ - 1}; j >= 1; --j) { |
258 | digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder; |
259 | remainder = digit_[j - 1] & mask; |
260 | } |
261 | digit_[0] = coeff * remainder; |
262 | } |
263 | } |
264 | return false; // no overflow |
265 | } |
266 | |
267 | int AddCarry(int position = 0, int carry = 1) { |
268 | for (; position < digits_; ++position) { |
269 | Digit v{digit_[position] + carry}; |
270 | if (v < radix) { |
271 | digit_[position] = v; |
272 | return 0; |
273 | } |
274 | digit_[position] = v - radix; |
275 | carry = 1; |
276 | } |
277 | if (digits_ < digitLimit_) { |
278 | digit_[digits_++] = carry; |
279 | return 0; |
280 | } |
281 | Normalize(); |
282 | if (digits_ < digitLimit_) { |
283 | digit_[digits_++] = carry; |
284 | return 0; |
285 | } |
286 | return carry; |
287 | } |
288 | |
289 | void Decrement() { |
290 | for (int j{0}; digit_[j]-- == 0; ++j) { |
291 | digit_[j] = radix - 1; |
292 | } |
293 | } |
294 | |
295 | template <int N> int MultiplyByHelper(int carry = 0) { |
296 | for (int j{0}; j < digits_; ++j) { |
297 | auto v{N * digit_[j] + carry}; |
298 | carry = v / radix; |
299 | digit_[j] = v - carry * radix; // i.e., v % radix |
300 | } |
301 | return carry; |
302 | } |
303 | |
304 | template <int N> int MultiplyBy(int carry = 0) { |
305 | if (int newCarry{MultiplyByHelper<N>(carry)}) { |
306 | return AddCarry(position: digits_, carry: newCarry); |
307 | } else { |
308 | return 0; |
309 | } |
310 | } |
311 | |
312 | template <int N> int MultiplyWithoutNormalization() { |
313 | if (int carry{MultiplyByHelper<N>(0)}) { |
314 | if (digits_ < digitLimit_) { |
315 | digit_[digits_++] = carry; |
316 | return 0; |
317 | } else { |
318 | return carry; |
319 | } |
320 | } else { |
321 | return 0; |
322 | } |
323 | } |
324 | |
325 | void LoseLeastSignificantDigit(); // with rounding |
326 | |
327 | void PushCarry(int carry) { |
328 | if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) { |
329 | LoseLeastSignificantDigit(); |
330 | digit_[digits_ - 1] += carry; |
331 | } else { |
332 | digit_[digits_++] = carry; |
333 | } |
334 | } |
335 | |
336 | // Adds another number and then divides by two. |
337 | // Assumes same exponent and sign. |
338 | // Returns true when the result has effectively been rounded down. |
339 | bool Mean(const BigRadixFloatingPointNumber &); |
340 | |
341 | // Parses a floating-point number; leaves the pointer reference |
342 | // argument pointing at the next character after what was recognized. |
343 | // The "end" argument can be left null if the caller is sure that the |
344 | // string is properly terminated with an addressable character that |
345 | // can't be in a valid floating-point character. |
346 | bool ParseNumber(const char *&, bool &inexact, const char *end); |
347 | |
348 | using Raw = typename Real::RawType; |
349 | constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); } |
350 | constexpr Raw Infinity() const { |
351 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
352 | result <<= Real::significandBits; |
353 | result |= SignBit(); |
354 | if constexpr (Real::bits == 80) { // x87 |
355 | result |= Raw{1} << 63; |
356 | } |
357 | return result; |
358 | } |
359 | constexpr Raw NaN(bool isQuiet = true) { |
360 | Raw result{Real::maxExponent}; |
361 | result <<= Real::significandBits; |
362 | result |= SignBit(); |
363 | if constexpr (Real::bits == 80) { // x87 |
364 | result |= Raw{isQuiet ? 3u : 2u} << 62; |
365 | } else { |
366 | Raw quiet{isQuiet ? Raw{2} : Raw{1}}; |
367 | quiet <<= Real::significandBits - 2; |
368 | result |= quiet; |
369 | } |
370 | return result; |
371 | } |
372 | constexpr Raw HUGE() const { |
373 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
374 | result <<= Real::significandBits; |
375 | result |= SignBit(); |
376 | return result - 1; // decrement exponent, set all significand bits |
377 | } |
378 | |
379 | Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD |
380 | int digits_{0}; // # of elements in digit_[] array; zero when zero |
381 | int digitLimit_{maxDigits}; // precision clamp |
382 | int exponent_{0}; // signed power of ten |
383 | bool isNegative_{false}; |
384 | enum FortranRounding rounding_ { RoundNearest }; |
385 | }; |
386 | } // namespace Fortran::decimal |
387 | #endif |
388 | |