| 1 | //===-- lib/Decimal/big-radix-floating-point.h ------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
| 10 | #define FORTRAN_DECIMAL_BIG_RADIX_FLOATING_POINT_H_ |
| 11 | |
| 12 | // This is a helper class for use in floating-point conversions between |
| 13 | // binary and decimal representations. It holds a multiple-precision |
| 14 | // integer value using digits of a radix that is a large even power of ten |
| 15 | // (10,000,000,000,000,000 by default, 10**16). These digits are accompanied |
| 16 | // by a signed exponent that denotes multiplication by a power of ten. |
| 17 | // The effective radix point is to the right of the digits (i.e., they do |
| 18 | // not represent a fraction). |
| 19 | // |
| 20 | // The operations supported by this class are limited to those required |
| 21 | // for conversions between binary and decimal representations; it is not |
| 22 | // a general-purpose facility. |
| 23 | |
| 24 | #include "flang/Common/bit-population-count.h" |
| 25 | #include "flang/Common/leading-zero-bit-count.h" |
| 26 | #include "flang/Common/uint128.h" |
| 27 | #include "flang/Decimal/binary-floating-point.h" |
| 28 | #include "flang/Decimal/decimal.h" |
| 29 | #include <cinttypes> |
| 30 | #include <limits> |
| 31 | #include <type_traits> |
| 32 | |
| 33 | // Some environments, viz. glibc 2.17, allow the macro HUGE |
| 34 | // to leak out of <math.h>. |
| 35 | #undef HUGE |
| 36 | |
| 37 | namespace Fortran::decimal { |
| 38 | |
| 39 | static constexpr std::uint64_t TenToThe(int power) { |
| 40 | return power <= 0 ? 1 : 10 * TenToThe(power: power - 1); |
| 41 | } |
| 42 | |
| 43 | // 10**(LOG10RADIX + 3) must be < 2**wordbits, and LOG10RADIX must be |
| 44 | // even, so that pairs of decimal digits do not straddle Digits. |
| 45 | // So LOG10RADIX must be 16 or 6. |
| 46 | template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber { |
| 47 | public: |
| 48 | using Real = BinaryFloatingPointNumber<PREC>; |
| 49 | static constexpr int log10Radix{LOG10RADIX}; |
| 50 | |
| 51 | private: |
| 52 | static constexpr std::uint64_t uint64Radix{TenToThe(power: log10Radix)}; |
| 53 | static constexpr int minDigitBits{ |
| 54 | 64 - common::LeadingZeroBitCount(uint64Radix)}; |
| 55 | using Digit = common::HostUnsignedIntType<minDigitBits>; |
| 56 | static constexpr Digit radix{uint64Radix}; |
| 57 | static_assert(radix < std::numeric_limits<Digit>::max() / 1000, |
| 58 | "radix is somehow too big" ); |
| 59 | static_assert(radix > std::numeric_limits<Digit>::max() / 10000, |
| 60 | "radix is somehow too small" ); |
| 61 | |
| 62 | // The base-2 logarithm of the least significant bit that can arise |
| 63 | // in a subnormal IEEE floating-point number. |
| 64 | static constexpr int minLog2AnyBit{ |
| 65 | -Real::exponentBias - Real::binaryPrecision}; |
| 66 | |
| 67 | // The number of Digits needed to represent the smallest subnormal. |
| 68 | static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix}; |
| 69 | |
| 70 | public: |
| 71 | explicit RT_API_ATTRS BigRadixFloatingPointNumber( |
| 72 | enum FortranRounding rounding = RoundNearest) |
| 73 | : rounding_{rounding} {} |
| 74 | |
| 75 | // Converts a binary floating point value. |
| 76 | explicit RT_API_ATTRS BigRadixFloatingPointNumber( |
| 77 | Real, enum FortranRounding = RoundNearest); |
| 78 | |
| 79 | RT_API_ATTRS BigRadixFloatingPointNumber &SetToZero() { |
| 80 | isNegative_ = false; |
| 81 | digits_ = 0; |
| 82 | exponent_ = 0; |
| 83 | return *this; |
| 84 | } |
| 85 | |
| 86 | RT_API_ATTRS bool IsInteger() const { return exponent_ >= 0; } |
| 87 | |
| 88 | // Converts decimal floating-point to binary. |
| 89 | RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary(); |
| 90 | |
| 91 | // Parses and converts to binary. Handles leading spaces, |
| 92 | // "NaN", & optionally-signed "Inf". Does not skip internal |
| 93 | // spaces. |
| 94 | // The argument is a reference to a pointer that is left |
| 95 | // pointing to the first character that wasn't parsed. |
| 96 | RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary( |
| 97 | const char *&, const char *end = nullptr); |
| 98 | |
| 99 | // Formats a decimal floating-point number to a user buffer. |
| 100 | // May emit "NaN" or "Inf", or an possibly-signed integer. |
| 101 | // No decimal point is written, but if it were, it would be |
| 102 | // after the last digit; the effective decimal exponent is |
| 103 | // returned as part of the result structure so that it can be |
| 104 | // formatted by the client. |
| 105 | RT_API_ATTRS ConversionToDecimalResult ConvertToDecimal( |
| 106 | char *, std::size_t, enum DecimalConversionFlags, int digits) const; |
| 107 | |
| 108 | // Discard decimal digits not needed to distinguish this value |
| 109 | // from the decimal encodings of two others (viz., the nearest binary |
| 110 | // floating-point numbers immediately below and above this one). |
| 111 | // The last decimal digit may not be uniquely determined in all |
| 112 | // cases, and will be the mean value when that is so (e.g., if |
| 113 | // last decimal digit values 6-8 would all work, it'll be a 7). |
| 114 | // This minimization necessarily assumes that the value will be |
| 115 | // emitted and read back into the same (or less precise) format |
| 116 | // with default rounding to the nearest value. |
| 117 | RT_API_ATTRS void Minimize( |
| 118 | BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more); |
| 119 | |
| 120 | template <typename STREAM> STREAM &Dump(STREAM &) const; |
| 121 | |
| 122 | private: |
| 123 | RT_API_ATTRS BigRadixFloatingPointNumber( |
| 124 | const BigRadixFloatingPointNumber &that) |
| 125 | : digits_{that.digits_}, exponent_{that.exponent_}, |
| 126 | isNegative_{that.isNegative_}, rounding_{that.rounding_} { |
| 127 | for (int j{0}; j < digits_; ++j) { |
| 128 | digit_[j] = that.digit_[j]; |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | RT_API_ATTRS bool IsZero() const { |
| 133 | // Don't assume normalization. |
| 134 | for (int j{0}; j < digits_; ++j) { |
| 135 | if (digit_[j] != 0) { |
| 136 | return false; |
| 137 | } |
| 138 | } |
| 139 | return true; |
| 140 | } |
| 141 | |
| 142 | // Predicate: true when 10*value would cause a carry. |
| 143 | // (When this happens during decimal-to-binary conversion, |
| 144 | // there are more digits in the input string than can be |
| 145 | // represented precisely.) |
| 146 | RT_API_ATTRS bool IsFull() const { |
| 147 | return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10; |
| 148 | } |
| 149 | |
| 150 | // Sets *this to an unsigned integer value. |
| 151 | // Returns any remainder. |
| 152 | template <typename UINT> RT_API_ATTRS UINT SetTo(UINT n) { |
| 153 | static_assert( |
| 154 | std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>); |
| 155 | SetToZero(); |
| 156 | while (n != 0) { |
| 157 | auto q{n / 10u}; |
| 158 | if (n != q * 10) { |
| 159 | break; |
| 160 | } |
| 161 | ++exponent_; |
| 162 | n = q; |
| 163 | } |
| 164 | if constexpr (sizeof n < sizeof(Digit)) { |
| 165 | if (n != 0) { |
| 166 | digit_[digits_++] = n; |
| 167 | } |
| 168 | return 0; |
| 169 | } else { |
| 170 | while (n != 0 && digits_ < digitLimit_) { |
| 171 | auto q{n / radix}; |
| 172 | digit_[digits_++] = static_cast<Digit>(n - q * radix); |
| 173 | n = q; |
| 174 | } |
| 175 | return n; |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | RT_API_ATTRS int RemoveLeastOrderZeroDigits() { |
| 180 | int remove{0}; |
| 181 | if (digits_ > 0 && digit_[0] == 0) { |
| 182 | while (remove < digits_ && digit_[remove] == 0) { |
| 183 | ++remove; |
| 184 | } |
| 185 | if (remove >= digits_) { |
| 186 | digits_ = 0; |
| 187 | } else if (remove > 0) { |
| 188 | #if defined __GNUC__ && __GNUC__ < 8 |
| 189 | // (&& j + remove < maxDigits) was added to avoid GCC < 8 build failure |
| 190 | // on -Werror=array-bounds. This can be removed if -Werror is disable. |
| 191 | for (int j{0}; j + remove < digits_ && (j + remove < maxDigits); ++j) { |
| 192 | #else |
| 193 | for (int j{0}; j + remove < digits_; ++j) { |
| 194 | #endif |
| 195 | digit_[j] = digit_[j + remove]; |
| 196 | } |
| 197 | digits_ -= remove; |
| 198 | } |
| 199 | } |
| 200 | return remove; |
| 201 | } |
| 202 | |
| 203 | RT_API_ATTRS void RemoveLeadingZeroDigits() { |
| 204 | while (digits_ > 0 && digit_[digits_ - 1] == 0) { |
| 205 | --digits_; |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | RT_API_ATTRS void Normalize() { |
| 210 | RemoveLeadingZeroDigits(); |
| 211 | exponent_ += RemoveLeastOrderZeroDigits() * log10Radix; |
| 212 | } |
| 213 | |
| 214 | // This limited divisibility test only works for even divisors of the radix, |
| 215 | // which is fine since it's only ever used with 2 and 5. |
| 216 | template <int N> RT_API_ATTRS bool IsDivisibleBy() const { |
| 217 | static_assert(N > 1 && radix % N == 0, "bad modulus" ); |
| 218 | return digits_ == 0 || (digit_[0] % N) == 0; |
| 219 | } |
| 220 | |
| 221 | template <unsigned DIVISOR> RT_API_ATTRS int DivideBy() { |
| 222 | Digit remainder{0}; |
| 223 | for (int j{digits_ - 1}; j >= 0; --j) { |
| 224 | Digit q{digit_[j] / DIVISOR}; |
| 225 | Digit nrem{digit_[j] - DIVISOR * q}; |
| 226 | digit_[j] = q + (radix / DIVISOR) * remainder; |
| 227 | remainder = nrem; |
| 228 | } |
| 229 | return remainder; |
| 230 | } |
| 231 | |
| 232 | RT_API_ATTRS void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix |
| 233 | Digit remainder{0}; |
| 234 | auto mask{(Digit{1} << twoPow) - 1}; |
| 235 | auto coeff{radix >> twoPow}; |
| 236 | for (int j{digits_ - 1}; j >= 0; --j) { |
| 237 | auto nrem{digit_[j] & mask}; |
| 238 | digit_[j] = (digit_[j] >> twoPow) + coeff * remainder; |
| 239 | remainder = nrem; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | // Returns true on overflow |
| 244 | RT_API_ATTRS bool DivideByPowerOfTwoInPlace(int twoPow) { |
| 245 | if (digits_ > 0) { |
| 246 | while (twoPow > 0) { |
| 247 | int chunk{twoPow > log10Radix ? log10Radix : twoPow}; |
| 248 | if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) { |
| 249 | DivideByPowerOfTwo(chunk); |
| 250 | twoPow -= chunk; |
| 251 | continue; |
| 252 | } |
| 253 | twoPow -= chunk; |
| 254 | if (digit_[digits_ - 1] >> chunk != 0) { |
| 255 | if (digits_ == digitLimit_) { |
| 256 | return true; // overflow |
| 257 | } |
| 258 | digit_[digits_++] = 0; |
| 259 | } |
| 260 | auto remainder{digit_[digits_ - 1]}; |
| 261 | exponent_ -= log10Radix; |
| 262 | auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix |
| 263 | auto mask{(Digit{1} << chunk) - 1}; |
| 264 | for (int j{digits_ - 1}; j >= 1; --j) { |
| 265 | digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder; |
| 266 | remainder = digit_[j - 1] & mask; |
| 267 | } |
| 268 | digit_[0] = coeff * remainder; |
| 269 | } |
| 270 | } |
| 271 | return false; // no overflow |
| 272 | } |
| 273 | |
| 274 | RT_API_ATTRS int AddCarry(int position = 0, int carry = 1) { |
| 275 | for (; position < digits_; ++position) { |
| 276 | Digit v{digit_[position] + carry}; |
| 277 | if (v < radix) { |
| 278 | digit_[position] = v; |
| 279 | return 0; |
| 280 | } |
| 281 | digit_[position] = v - radix; |
| 282 | carry = 1; |
| 283 | } |
| 284 | if (digits_ < digitLimit_) { |
| 285 | digit_[digits_++] = carry; |
| 286 | return 0; |
| 287 | } |
| 288 | Normalize(); |
| 289 | if (digits_ < digitLimit_) { |
| 290 | digit_[digits_++] = carry; |
| 291 | return 0; |
| 292 | } |
| 293 | return carry; |
| 294 | } |
| 295 | |
| 296 | RT_API_ATTRS void Decrement() { |
| 297 | for (int j{0}; digit_[j]-- == 0; ++j) { |
| 298 | digit_[j] = radix - 1; |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | template <int N> RT_API_ATTRS int MultiplyByHelper(int carry = 0) { |
| 303 | for (int j{0}; j < digits_; ++j) { |
| 304 | auto v{N * digit_[j] + carry}; |
| 305 | carry = v / radix; |
| 306 | digit_[j] = v - carry * radix; // i.e., v % radix |
| 307 | } |
| 308 | return carry; |
| 309 | } |
| 310 | |
| 311 | template <int N> RT_API_ATTRS int MultiplyBy(int carry = 0) { |
| 312 | if (int newCarry{MultiplyByHelper<N>(carry)}) { |
| 313 | return AddCarry(digits_, newCarry); |
| 314 | } else { |
| 315 | return 0; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | template <int N> RT_API_ATTRS int MultiplyWithoutNormalization() { |
| 320 | if (int carry{MultiplyByHelper<N>(0)}) { |
| 321 | if (digits_ < digitLimit_) { |
| 322 | digit_[digits_++] = carry; |
| 323 | return 0; |
| 324 | } else { |
| 325 | return carry; |
| 326 | } |
| 327 | } else { |
| 328 | return 0; |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | RT_API_ATTRS void LoseLeastSignificantDigit(); // with rounding |
| 333 | |
| 334 | RT_API_ATTRS void PushCarry(int carry) { |
| 335 | if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) { |
| 336 | LoseLeastSignificantDigit(); |
| 337 | digit_[digits_ - 1] += carry; |
| 338 | } else { |
| 339 | digit_[digits_++] = carry; |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | // Adds another number and then divides by two. |
| 344 | // Assumes same exponent and sign. |
| 345 | // Returns true when the result has effectively been rounded down. |
| 346 | RT_API_ATTRS bool Mean(const BigRadixFloatingPointNumber &); |
| 347 | |
| 348 | // Parses a floating-point number; leaves the pointer reference |
| 349 | // argument pointing at the next character after what was recognized. |
| 350 | // The "end" argument can be left null if the caller is sure that the |
| 351 | // string is properly terminated with an addressable character that |
| 352 | // can't be in a valid floating-point character. |
| 353 | RT_API_ATTRS bool ParseNumber(const char *&, bool &inexact, const char *end); |
| 354 | |
| 355 | using Raw = typename Real::RawType; |
| 356 | constexpr RT_API_ATTRS Raw SignBit() const { |
| 357 | return Raw{isNegative_} << (Real::bits - 1); |
| 358 | } |
| 359 | constexpr RT_API_ATTRS Raw Infinity() const { |
| 360 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
| 361 | result <<= Real::significandBits; |
| 362 | result |= SignBit(); |
| 363 | if constexpr (Real::bits == 80) { // x87 |
| 364 | result |= Raw{1} << 63; |
| 365 | } |
| 366 | return result; |
| 367 | } |
| 368 | constexpr RT_API_ATTRS Raw NaN(bool isQuiet = true) { |
| 369 | Raw result{Real::maxExponent}; |
| 370 | result <<= Real::significandBits; |
| 371 | result |= SignBit(); |
| 372 | if constexpr (Real::bits == 80) { // x87 |
| 373 | result |= Raw{isQuiet ? 3u : 2u} << 62; |
| 374 | } else { |
| 375 | Raw quiet{isQuiet ? Raw{2} : Raw{1}}; |
| 376 | quiet <<= Real::significandBits - 2; |
| 377 | result |= quiet; |
| 378 | } |
| 379 | return result; |
| 380 | } |
| 381 | constexpr RT_API_ATTRS Raw HUGE() const { |
| 382 | Raw result{static_cast<Raw>(Real::maxExponent)}; |
| 383 | result <<= Real::significandBits; |
| 384 | result |= SignBit(); |
| 385 | return result - 1; // decrement exponent, set all significand bits |
| 386 | } |
| 387 | |
| 388 | Digit digit_[maxDigits]; // in little-endian order: digit_[0] is LSD |
| 389 | int digits_{0}; // # of elements in digit_[] array; zero when zero |
| 390 | int digitLimit_{maxDigits}; // precision clamp |
| 391 | int exponent_{0}; // signed power of ten |
| 392 | bool isNegative_{false}; |
| 393 | enum FortranRounding rounding_ { RoundNearest }; |
| 394 | }; |
| 395 | } // namespace Fortran::decimal |
| 396 | #endif |
| 397 | |