1 | //===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "big-radix-floating-point.h" |
10 | #include "flang/Common/bit-population-count.h" |
11 | #include "flang/Common/leading-zero-bit-count.h" |
12 | #include "flang/Decimal/binary-floating-point.h" |
13 | #include "flang/Decimal/decimal.h" |
14 | #include <cinttypes> |
15 | #include <cstring> |
16 | #include <ctype.h> |
17 | |
18 | namespace Fortran::decimal { |
19 | |
20 | template <int PREC, int LOG10RADIX> |
21 | bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber( |
22 | const char *&p, bool &inexact, const char *end) { |
23 | SetToZero(); |
24 | if (end && p >= end) { |
25 | return false; |
26 | } |
27 | // Skip leading spaces |
28 | for (; p != end && *p == ' '; ++p) { |
29 | } |
30 | if (p == end) { |
31 | return false; |
32 | } |
33 | const char *q{p}; |
34 | isNegative_ = *q == '-'; |
35 | if (*q == '-' || *q == '+') { |
36 | ++q; |
37 | } |
38 | const char *start{q}; |
39 | for (; q != end && *q == '0'; ++q) { |
40 | } |
41 | const char *firstDigit{q}; |
42 | for (; q != end && *q >= '0' && *q <= '9'; ++q) { |
43 | } |
44 | const char *point{nullptr}; |
45 | if (q != end && *q == '.') { |
46 | point = q; |
47 | for (++q; q != end && *q >= '0' && *q <= '9'; ++q) { |
48 | } |
49 | } |
50 | if (q == start || (q == start + 1 && start == point)) { |
51 | return false; // require at least one digit |
52 | } |
53 | // There's a valid number here; set the reference argument to point to |
54 | // the first character afterward, which might be an exponent part. |
55 | p = q; |
56 | // Strip off trailing zeroes |
57 | if (point) { |
58 | while (q[-1] == '0') { |
59 | --q; |
60 | } |
61 | if (q[-1] == '.') { |
62 | point = nullptr; |
63 | --q; |
64 | } |
65 | } |
66 | if (!point) { |
67 | while (q > firstDigit && q[-1] == '0') { |
68 | --q; |
69 | ++exponent_; |
70 | } |
71 | } |
72 | // Trim any excess digits |
73 | const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)}; |
74 | if (q > limit) { |
75 | inexact = true; |
76 | if (point >= limit) { |
77 | q = point; |
78 | point = nullptr; |
79 | } |
80 | if (!point) { |
81 | exponent_ += q - limit; |
82 | } |
83 | q = limit; |
84 | } |
85 | if (point) { |
86 | exponent_ -= static_cast<int>(q - point - 1); |
87 | } |
88 | if (q == firstDigit) { |
89 | exponent_ = 0; // all zeros |
90 | } |
91 | // Rack the decimal digits up into big Digits. |
92 | for (auto times{radix}; q-- > firstDigit;) { |
93 | if (*q != '.') { |
94 | if (times == radix) { |
95 | digit_[digits_++] = *q - '0'; |
96 | times = 10; |
97 | } else { |
98 | digit_[digits_ - 1] += times * (*q - '0'); |
99 | times *= 10; |
100 | } |
101 | } |
102 | } |
103 | // Look for an optional exponent field. |
104 | if (p == end) { |
105 | return true; |
106 | } |
107 | q = p; |
108 | switch (*q) { |
109 | case 'e': |
110 | case 'E': |
111 | case 'd': |
112 | case 'D': |
113 | case 'q': |
114 | case 'Q': { |
115 | if (++q == end) { |
116 | break; |
117 | } |
118 | bool negExpo{*q == '-'}; |
119 | if (*q == '-' || *q == '+') { |
120 | ++q; |
121 | } |
122 | if (q != end && *q >= '0' && *q <= '9') { |
123 | int expo{0}; |
124 | for (; q != end && *q == '0'; ++q) { |
125 | } |
126 | const char *expDig{q}; |
127 | for (; q != end && *q >= '0' && *q <= '9'; ++q) { |
128 | expo = 10 * expo + *q - '0'; |
129 | } |
130 | if (q >= expDig + 8) { |
131 | // There's a ridiculous number of nonzero exponent digits. |
132 | // The decimal->binary conversion routine will cope with |
133 | // returning 0 or Inf, but we must ensure that "expo" didn't |
134 | // overflow back around to something legal. |
135 | expo = 10 * Real::decimalRange; |
136 | exponent_ = 0; |
137 | } |
138 | p = q; // exponent is valid; advance the termination pointer |
139 | if (negExpo) { |
140 | exponent_ -= expo; |
141 | } else { |
142 | exponent_ += expo; |
143 | } |
144 | } |
145 | } break; |
146 | default: |
147 | break; |
148 | } |
149 | return true; |
150 | } |
151 | |
152 | template <int PREC, int LOG10RADIX> |
153 | void BigRadixFloatingPointNumber<PREC, |
154 | LOG10RADIX>::LoseLeastSignificantDigit() { |
155 | Digit LSD{digit_[0]}; |
156 | for (int j{0}; j < digits_ - 1; ++j) { |
157 | digit_[j] = digit_[j + 1]; |
158 | } |
159 | digit_[digits_ - 1] = 0; |
160 | bool incr{false}; |
161 | switch (rounding_) { |
162 | case RoundNearest: |
163 | incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0); |
164 | break; |
165 | case RoundUp: |
166 | incr = LSD > 0 && !isNegative_; |
167 | break; |
168 | case RoundDown: |
169 | incr = LSD > 0 && isNegative_; |
170 | break; |
171 | case RoundToZero: |
172 | break; |
173 | case RoundCompatible: |
174 | incr = LSD >= radix / 2; |
175 | break; |
176 | } |
177 | for (int j{0}; (digit_[j] += incr) == radix; ++j) { |
178 | digit_[j] = 0; |
179 | } |
180 | } |
181 | |
182 | // This local utility class represents an unrounded nonnegative |
183 | // binary floating-point value with an unbiased (i.e., signed) |
184 | // binary exponent, an integer value (not a fraction) with an implied |
185 | // binary point to its *right*, and some guard bits for rounding. |
186 | template <int PREC> class IntermediateFloat { |
187 | public: |
188 | static constexpr int precision{PREC}; |
189 | using IntType = common::HostUnsignedIntType<precision>; |
190 | static constexpr IntType topBit{IntType{1} << (precision - 1)}; |
191 | static constexpr IntType mask{topBit + (topBit - 1)}; |
192 | |
193 | IntermediateFloat() {} |
194 | IntermediateFloat(const IntermediateFloat &) = default; |
195 | |
196 | // Assumes that exponent_ is valid on entry, and may increment it. |
197 | // Returns the number of guard_ bits that have been determined. |
198 | template <typename UINT> bool SetTo(UINT n) { |
199 | static constexpr int nBits{CHAR_BIT * sizeof n}; |
200 | if constexpr (precision >= nBits) { |
201 | value_ = n; |
202 | guard_ = 0; |
203 | return 0; |
204 | } else { |
205 | int shift{common::BitsNeededFor(n) - precision}; |
206 | if (shift <= 0) { |
207 | value_ = n; |
208 | guard_ = 0; |
209 | return 0; |
210 | } else { |
211 | value_ = n >> shift; |
212 | exponent_ += shift; |
213 | n <<= nBits - shift; |
214 | guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0); |
215 | return shift; |
216 | } |
217 | } |
218 | } |
219 | |
220 | void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; } |
221 | bool IsFull() const { return value_ >= topBit; } |
222 | void AdjustExponent(int by) { exponent_ += by; } |
223 | void SetGuard(int g) { |
224 | guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1); |
225 | } |
226 | |
227 | ConversionToBinaryResult<PREC> ToBinary( |
228 | bool isNegative, FortranRounding) const; |
229 | |
230 | private: |
231 | static constexpr int guardBits{3}; // guard, round, sticky |
232 | using GuardType = int; |
233 | static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)}; |
234 | |
235 | IntType value_{0}; |
236 | GuardType guard_{0}; |
237 | int exponent_{0}; |
238 | }; |
239 | |
240 | // The standard says that these overflow cases round to "representable" |
241 | // numbers, and some popular compilers interpret that to mean +/-HUGE() |
242 | // rather than +/-Inf. |
243 | static inline constexpr bool RoundOverflowToHuge( |
244 | enum FortranRounding rounding, bool isNegative) { |
245 | return rounding == RoundToZero || (!isNegative && rounding == RoundDown) || |
246 | (isNegative && rounding == RoundUp); |
247 | } |
248 | |
249 | template <int PREC> |
250 | ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary( |
251 | bool isNegative, FortranRounding rounding) const { |
252 | using Binary = BinaryFloatingPointNumber<PREC>; |
253 | // Create a fraction with a binary point to the left of the integer |
254 | // value_, and bias the exponent. |
255 | IntType fraction{value_}; |
256 | GuardType guard{guard_}; |
257 | int expo{exponent_ + Binary::exponentBias + (precision - 1)}; |
258 | while (expo < 1 && (fraction > 0 || guard > oneHalf)) { |
259 | guard = (guard & 1) | (guard >> 1) | |
260 | ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1)); |
261 | fraction >>= 1; |
262 | ++expo; |
263 | } |
264 | int flags{Exact}; |
265 | if (guard != 0) { |
266 | flags |= Inexact; |
267 | } |
268 | if (fraction == 0) { |
269 | if (guard <= oneHalf) { |
270 | if ((!isNegative && rounding == RoundUp) || |
271 | (isNegative && rounding == RoundDown)) { |
272 | // round to least nonzero value |
273 | expo = 0; |
274 | } else { // round to zero |
275 | if (guard != 0) { |
276 | flags |= Underflow; |
277 | } |
278 | return {Binary{}, static_cast<enum ConversionResultFlags>(flags)}; |
279 | } |
280 | } |
281 | } else { |
282 | // The value is nonzero; normalize it. |
283 | while (fraction < topBit && expo > 1) { |
284 | --expo; |
285 | fraction = fraction * 2 + (guard >> (guardBits - 2)); |
286 | guard = |
287 | (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1); |
288 | } |
289 | } |
290 | // Apply rounding |
291 | bool incr{false}; |
292 | switch (rounding) { |
293 | case RoundNearest: |
294 | incr = guard > oneHalf || (guard == oneHalf && (fraction & 1)); |
295 | break; |
296 | case RoundUp: |
297 | incr = guard != 0 && !isNegative; |
298 | break; |
299 | case RoundDown: |
300 | incr = guard != 0 && isNegative; |
301 | break; |
302 | case RoundToZero: |
303 | break; |
304 | case RoundCompatible: |
305 | incr = guard >= oneHalf; |
306 | break; |
307 | } |
308 | if (incr) { |
309 | if (fraction == mask) { |
310 | // rounding causes a carry |
311 | ++expo; |
312 | fraction = topBit; |
313 | } else { |
314 | ++fraction; |
315 | } |
316 | } |
317 | if (expo == 1 && fraction < topBit) { |
318 | expo = 0; // subnormal |
319 | flags |= Underflow; |
320 | } else if (expo == 0) { |
321 | flags |= Underflow; |
322 | } else if (expo >= Binary::maxExponent) { |
323 | if (RoundOverflowToHuge(rounding, isNegative)) { |
324 | expo = Binary::maxExponent - 1; |
325 | fraction = mask; |
326 | } else { // Inf |
327 | expo = Binary::maxExponent; |
328 | flags |= Overflow; |
329 | if constexpr (Binary::bits == 80) { // x87 |
330 | fraction = IntType{1} << 63; |
331 | } else { |
332 | fraction = 0; |
333 | } |
334 | } |
335 | } |
336 | using Raw = typename Binary::RawType; |
337 | Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1); |
338 | raw |= static_cast<Raw>(expo) << Binary::significandBits; |
339 | if constexpr (Binary::isImplicitMSB) { |
340 | fraction &= ~topBit; |
341 | } |
342 | raw |= fraction; |
343 | return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)}; |
344 | } |
345 | |
346 | template <int PREC, int LOG10RADIX> |
347 | ConversionToBinaryResult<PREC> |
348 | BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() { |
349 | // On entry, *this holds a multi-precision integer value in a radix of a |
350 | // large power of ten. Its radix point is defined to be to the right of its |
351 | // digits, and "exponent_" is the power of ten by which it is to be scaled. |
352 | Normalize(); |
353 | if (digits_ == 0) { // zero value |
354 | return {Real{SignBit()}}; |
355 | } |
356 | // The value is not zero: x = D. * 10.**E |
357 | // Shift our perspective on the radix (& decimal) point so that |
358 | // it sits to the *left* of the digits: i.e., x = .D * 10.**E |
359 | exponent_ += digits_ * log10Radix; |
360 | // Sanity checks for ridiculous exponents |
361 | static constexpr int crazy{2 * Real::decimalRange + log10Radix}; |
362 | if (exponent_ < -crazy) { |
363 | enum ConversionResultFlags flags { |
364 | static_cast<enum ConversionResultFlags>(Inexact | Underflow) |
365 | }; |
366 | if ((!isNegative_ && rounding_ == RoundUp) || |
367 | (isNegative_ && rounding_ == RoundDown)) { |
368 | // return least nonzero value |
369 | return {Real{Raw{1} | SignBit()}, flags}; |
370 | } else { // underflow to +/-0. |
371 | return {Real{SignBit()}, flags}; |
372 | } |
373 | } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf |
374 | if (RoundOverflowToHuge(rounding_, isNegative_)) { |
375 | return {Real{HUGE()}}; |
376 | } else { |
377 | return {Real{Infinity()}, Overflow}; |
378 | } |
379 | } |
380 | // Apply any negative decimal exponent by multiplication |
381 | // by a power of two, adjusting the binary exponent to compensate. |
382 | IntermediateFloat<PREC> f; |
383 | while (exponent_ < log10Radix) { |
384 | // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9) |
385 | f.AdjustExponent(-9); |
386 | digitLimit_ = digits_; |
387 | if (int carry{MultiplyWithoutNormalization<512>()}) { |
388 | // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) |
389 | PushCarry(carry); |
390 | exponent_ += log10Radix; |
391 | } |
392 | } |
393 | // Apply any positive decimal exponent greater than |
394 | // is needed to treat the topmost digit as an integer |
395 | // part by multiplying by 10 or 10000 repeatedly. |
396 | while (exponent_ > log10Radix) { |
397 | digitLimit_ = digits_; |
398 | int carry; |
399 | if (exponent_ >= log10Radix + 4) { |
400 | // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4) |
401 | exponent_ -= 4; |
402 | carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>(); |
403 | f.AdjustExponent(4); |
404 | } else { |
405 | // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1) |
406 | --exponent_; |
407 | carry = MultiplyWithoutNormalization<5>(); |
408 | f.AdjustExponent(1); |
409 | } |
410 | if (carry != 0) { |
411 | // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex) |
412 | PushCarry(carry); |
413 | exponent_ += log10Radix; |
414 | } |
415 | } |
416 | // So exponent_ is now log10Radix, meaning that the |
417 | // MSD can be taken as an integer part and transferred |
418 | // to the binary result. |
419 | // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex) |
420 | int guardShift{f.SetTo(digit_[--digits_])}; |
421 | // Transfer additional bits until the result is normal. |
422 | digitLimit_ = digits_; |
423 | while (!f.IsFull()) { |
424 | // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1) |
425 | f.AdjustExponent(-1); |
426 | std::uint32_t carry = MultiplyWithoutNormalization<2>(); |
427 | f.ShiftIn(carry); |
428 | } |
429 | // Get the next few bits for rounding. Allow for some guard bits |
430 | // that may have already been set in f.SetTo() above. |
431 | int guard{0}; |
432 | if (guardShift == 0) { |
433 | guard = MultiplyWithoutNormalization<4>(); |
434 | } else if (guardShift == 1) { |
435 | guard = MultiplyWithoutNormalization<2>(); |
436 | } |
437 | guard = guard + guard + !IsZero(); |
438 | f.SetGuard(guard); |
439 | return f.ToBinary(isNegative_, rounding_); |
440 | } |
441 | |
442 | template <int PREC, int LOG10RADIX> |
443 | ConversionToBinaryResult<PREC> |
444 | BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary( |
445 | const char *&p, const char *limit) { |
446 | bool inexact{false}; |
447 | if (ParseNumber(p, inexact, end: limit)) { |
448 | auto result{ConvertToBinary()}; |
449 | if (inexact) { |
450 | result.flags = |
451 | static_cast<enum ConversionResultFlags>(result.flags | Inexact); |
452 | } |
453 | return result; |
454 | } else { |
455 | // Could not parse a decimal floating-point number. p has been |
456 | // advanced over any leading spaces. Most Fortran compilers set |
457 | // the sign bit for -NaN. |
458 | const char *q{p}; |
459 | if (!limit || q < limit) { |
460 | isNegative_ = *q == '-'; |
461 | if (isNegative_ || *q == '+') { |
462 | ++q; |
463 | } |
464 | } |
465 | if ((!limit || limit >= q + 3) && toupper(c: q[0]) == 'N' && |
466 | toupper(c: q[1]) == 'A' && toupper(c: q[2]) == 'N') { |
467 | // NaN |
468 | p = q + 3; |
469 | bool isQuiet{true}; |
470 | if ((!limit || p < limit) && *p == '(') { |
471 | int depth{1}; |
472 | do { |
473 | ++p; |
474 | if (limit && p >= limit) { |
475 | // Invalid input |
476 | return {Real{NaN(false)}, Invalid}; |
477 | } else if (*p == '(') { |
478 | ++depth; |
479 | } else if (*p == ')') { |
480 | --depth; |
481 | } else if (*p != ' ') { |
482 | // Implementation dependent, but other compilers |
483 | // all return quiet NaNs. |
484 | } |
485 | } while (depth > 0); |
486 | ++p; |
487 | } |
488 | return {Real{NaN(isQuiet)}}; |
489 | } else { // Inf? |
490 | if ((!limit || limit >= q + 3) && toupper(c: q[0]) == 'I' && |
491 | toupper(c: q[1]) == 'N' && toupper(c: q[2]) == 'F') { |
492 | if ((!limit || limit >= q + 8) && toupper(c: q[3]) == 'I' && |
493 | toupper(c: q[4]) == 'N' && toupper(c: q[5]) == 'I' && |
494 | toupper(c: q[6]) == 'T' && toupper(c: q[7]) == 'Y') { |
495 | p = q + 8; |
496 | } else { |
497 | p = q + 3; |
498 | } |
499 | return {Real{Infinity()}}; |
500 | } else { |
501 | // Invalid input |
502 | return {Real{NaN()}, Invalid}; |
503 | } |
504 | } |
505 | } |
506 | } |
507 | |
508 | template <int PREC> |
509 | ConversionToBinaryResult<PREC> ConvertToBinary( |
510 | const char *&p, enum FortranRounding rounding, const char *end) { |
511 | return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end); |
512 | } |
513 | |
514 | template ConversionToBinaryResult<8> ConvertToBinary<8>( |
515 | const char *&, enum FortranRounding, const char *end); |
516 | template ConversionToBinaryResult<11> ConvertToBinary<11>( |
517 | const char *&, enum FortranRounding, const char *end); |
518 | template ConversionToBinaryResult<24> ConvertToBinary<24>( |
519 | const char *&, enum FortranRounding, const char *end); |
520 | template ConversionToBinaryResult<53> ConvertToBinary<53>( |
521 | const char *&, enum FortranRounding, const char *end); |
522 | template ConversionToBinaryResult<64> ConvertToBinary<64>( |
523 | const char *&, enum FortranRounding, const char *end); |
524 | template ConversionToBinaryResult<113> ConvertToBinary<113>( |
525 | const char *&, enum FortranRounding, const char *end); |
526 | |
527 | extern "C" { |
528 | enum ConversionResultFlags ConvertDecimalToFloat( |
529 | const char **p, float *f, enum FortranRounding rounding) { |
530 | auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)}; |
531 | std::memcpy(dest: reinterpret_cast<void *>(f), |
532 | src: reinterpret_cast<const void *>(&result.binary), n: sizeof *f); |
533 | return result.flags; |
534 | } |
535 | enum ConversionResultFlags ConvertDecimalToDouble( |
536 | const char **p, double *d, enum FortranRounding rounding) { |
537 | auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)}; |
538 | std::memcpy(dest: reinterpret_cast<void *>(d), |
539 | src: reinterpret_cast<const void *>(&result.binary), n: sizeof *d); |
540 | return result.flags; |
541 | } |
542 | enum ConversionResultFlags ConvertDecimalToLongDouble( |
543 | const char **p, long double *ld, enum FortranRounding rounding) { |
544 | auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)}; |
545 | std::memcpy(dest: reinterpret_cast<void *>(ld), |
546 | src: reinterpret_cast<const void *>(&result.binary), n: sizeof *ld); |
547 | return result.flags; |
548 | } |
549 | } |
550 | } // namespace Fortran::decimal |
551 | |