| 1 | // Copyright 2020-2023 Daniel Lemire |
| 2 | // Copyright 2023 Matt Borland |
| 3 | // Distributed under the Boost Software License, Version 1.0. |
| 4 | // https://www.boost.org/LICENSE_1_0.txt |
| 5 | // |
| 6 | // Derivative of: https://github.com/fastfloat/fast_float |
| 7 | |
| 8 | #ifndef BOOST_CHARCONV_DETAIL_FASTFLOAT_BIGINT_HPP |
| 9 | #define BOOST_CHARCONV_DETAIL_FASTFLOAT_BIGINT_HPP |
| 10 | |
| 11 | #include <boost/charconv/detail/fast_float/float_common.hpp> |
| 12 | #include <algorithm> |
| 13 | #include <cstdint> |
| 14 | #include <climits> |
| 15 | #include <cstring> |
| 16 | |
| 17 | namespace boost { namespace charconv { namespace detail { namespace fast_float { |
| 18 | |
| 19 | // the limb width: we want efficient multiplication of double the bits in |
| 20 | // limb, or for 64-bit limbs, at least 64-bit multiplication where we can |
| 21 | // extract the high and low parts efficiently. this is every 64-bit |
| 22 | // architecture except for sparc, which emulates 128-bit multiplication. |
| 23 | // we might have platforms where `CHAR_BIT` is not 8, so let's avoid |
| 24 | // doing `8 * sizeof(limb)`. |
| 25 | #if defined(BOOST_CHARCONV_FASTFLOAT_64BIT) && !defined(__sparc) |
| 26 | #define BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB 1 |
| 27 | typedef uint64_t limb; |
| 28 | constexpr size_t limb_bits = 64; |
| 29 | #else |
| 30 | #define BOOST_CHARCONV_FASTFLOAT_32BIT_LIMB |
| 31 | typedef uint32_t limb; |
| 32 | constexpr size_t limb_bits = 32; |
| 33 | #endif |
| 34 | |
| 35 | typedef span<limb> limb_span; |
| 36 | |
| 37 | // number of bits in a bigint. this needs to be at least the number |
| 38 | // of bits required to store the largest bigint, which is |
| 39 | // `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or |
| 40 | // ~3600 bits, so we round to 4000. |
| 41 | constexpr size_t bigint_bits = 4000; |
| 42 | constexpr size_t bigint_limbs = bigint_bits / limb_bits; |
| 43 | |
| 44 | // vector-like type that is allocated on the stack. the entire |
| 45 | // buffer is pre-allocated, and only the length changes. |
| 46 | template <uint16_t size> |
| 47 | struct stackvec { |
| 48 | limb data[size]; |
| 49 | // we never need more than 150 limbs |
| 50 | uint16_t length{0}; |
| 51 | |
| 52 | stackvec() = default; |
| 53 | stackvec(const stackvec &) = delete; |
| 54 | stackvec &operator=(const stackvec &) = delete; |
| 55 | stackvec(stackvec &&) = delete; |
| 56 | stackvec &operator=(stackvec &&other) = delete; |
| 57 | |
| 58 | // create stack vector from existing limb span. |
| 59 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { |
| 60 | BOOST_CHARCONV_FASTFLOAT_ASSERT(try_extend(s)); |
| 61 | } |
| 62 | |
| 63 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 limb& operator[](size_t index) noexcept { |
| 64 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(index < length); |
| 65 | return data[index]; |
| 66 | } |
| 67 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 const limb& operator[](size_t index) const noexcept { |
| 68 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(index < length); |
| 69 | return data[index]; |
| 70 | } |
| 71 | // index from the end of the container |
| 72 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 const limb& rindex(size_t index) const noexcept { |
| 73 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(index < length); |
| 74 | size_t rindex = length - index - 1; |
| 75 | return data[rindex]; |
| 76 | } |
| 77 | |
| 78 | // set the length, without bounds checking. |
| 79 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { |
| 80 | length = uint16_t(len); |
| 81 | } |
| 82 | constexpr size_t len() const noexcept { |
| 83 | return length; |
| 84 | } |
| 85 | constexpr bool is_empty() const noexcept { |
| 86 | return length == 0; |
| 87 | } |
| 88 | constexpr size_t capacity() const noexcept { |
| 89 | return size; |
| 90 | } |
| 91 | // append item to vector, without bounds checking |
| 92 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { |
| 93 | data[length] = value; |
| 94 | length++; |
| 95 | } |
| 96 | // append item to vector, returning if item was added |
| 97 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { |
| 98 | if (len() < capacity()) { |
| 99 | push_unchecked(value); |
| 100 | return true; |
| 101 | } else { |
| 102 | return false; |
| 103 | } |
| 104 | } |
| 105 | // add items to the vector, from a span, without bounds checking |
| 106 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { |
| 107 | limb* ptr = data + length; |
| 108 | std::copy_n(first: s.ptr, n: s.len(), result: ptr); |
| 109 | set_len(len() + s.len()); |
| 110 | } |
| 111 | // try to add items to the vector, returning if items were added |
| 112 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { |
| 113 | if (len() + s.len() <= capacity()) { |
| 114 | extend_unchecked(s); |
| 115 | return true; |
| 116 | } else { |
| 117 | return false; |
| 118 | } |
| 119 | } |
| 120 | // resize the vector, without bounds checking |
| 121 | // if the new size is longer than the vector, assign value to each |
| 122 | // appended item. |
| 123 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 124 | void resize_unchecked(size_t new_len, limb value) noexcept { |
| 125 | if (new_len > len()) { |
| 126 | size_t count = new_len - len(); |
| 127 | limb* first = data + len(); |
| 128 | limb* last = first + count; |
| 129 | ::std::fill(first: first, last: last, value: value); |
| 130 | set_len(new_len); |
| 131 | } else { |
| 132 | set_len(new_len); |
| 133 | } |
| 134 | } |
| 135 | // try to resize the vector, returning if the vector was resized. |
| 136 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { |
| 137 | if (new_len > capacity()) { |
| 138 | return false; |
| 139 | } else { |
| 140 | resize_unchecked(new_len, value); |
| 141 | return true; |
| 142 | } |
| 143 | } |
| 144 | // check if any limbs are non-zero after the given index. |
| 145 | // this needs to be done in reverse order, since the index |
| 146 | // is relative to the most significant limbs. |
| 147 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { |
| 148 | while (index < len()) { |
| 149 | if (rindex(index) != 0) { |
| 150 | return true; |
| 151 | } |
| 152 | index++; |
| 153 | } |
| 154 | return false; |
| 155 | } |
| 156 | // normalize the big integer, so most-significant zero limbs are removed. |
| 157 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 void normalize() noexcept { |
| 158 | while (len() > 0 && rindex(index: 0) == 0) { |
| 159 | length--; |
| 160 | } |
| 161 | } |
| 162 | }; |
| 163 | |
| 164 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14 |
| 165 | uint64_t empty_hi64(bool& truncated) noexcept { |
| 166 | truncated = false; |
| 167 | return 0; |
| 168 | } |
| 169 | |
| 170 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 171 | uint64_t uint64_hi64(uint64_t r0, bool& truncated) noexcept { |
| 172 | truncated = false; |
| 173 | int shl = leading_zeroes(input_num: r0); |
| 174 | return r0 << shl; |
| 175 | } |
| 176 | |
| 177 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 178 | uint64_t uint64_hi64(uint64_t r0, uint64_t r1, bool& truncated) noexcept { |
| 179 | int shl = leading_zeroes(input_num: r0); |
| 180 | if (shl == 0) { |
| 181 | truncated = r1 != 0; |
| 182 | return r0; |
| 183 | } else { |
| 184 | int shr = 64 - shl; |
| 185 | truncated = (r1 << shl) != 0; |
| 186 | return (r0 << shl) | (r1 >> shr); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 191 | uint64_t uint32_hi64(uint32_t r0, bool& truncated) noexcept { |
| 192 | return uint64_hi64(r0, truncated); |
| 193 | } |
| 194 | |
| 195 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 196 | uint64_t uint32_hi64(uint32_t r0, uint32_t r1, bool& truncated) noexcept { |
| 197 | uint64_t x0 = r0; |
| 198 | uint64_t x1 = r1; |
| 199 | return uint64_hi64(r0: (x0 << 32) | x1, truncated); |
| 200 | } |
| 201 | |
| 202 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 203 | uint64_t uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool& truncated) noexcept { |
| 204 | uint64_t x0 = r0; |
| 205 | uint64_t x1 = r1; |
| 206 | uint64_t x2 = r2; |
| 207 | return uint64_hi64(r0: x0, r1: (x1 << 32) | x2, truncated); |
| 208 | } |
| 209 | |
| 210 | // add two small integers, checking for overflow. |
| 211 | // we want an efficient operation. for msvc, where |
| 212 | // we don't have built-in intrinsics, this is still |
| 213 | // pretty fast. |
| 214 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 215 | limb scalar_add(limb x, limb y, bool& overflow) noexcept { |
| 216 | limb z; |
| 217 | // gcc and clang |
| 218 | #if defined(__has_builtin) |
| 219 | #if __has_builtin(__builtin_add_overflow) |
| 220 | if (!cpp20_and_in_constexpr()) { |
| 221 | overflow = __builtin_add_overflow(x, y, &z); |
| 222 | return z; |
| 223 | } |
| 224 | #endif |
| 225 | #endif |
| 226 | |
| 227 | // generic, this still optimizes correctly on MSVC. |
| 228 | z = x + y; |
| 229 | overflow = z < x; |
| 230 | return z; |
| 231 | } |
| 232 | |
| 233 | // multiply two small integers, getting both the high and low bits. |
| 234 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 235 | limb scalar_mul(limb x, limb y, limb& carry) noexcept { |
| 236 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 237 | #if defined(__SIZEOF_INT128__) |
| 238 | // GCC and clang both define it as an extension. |
| 239 | __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); |
| 240 | carry = limb(z >> limb_bits); |
| 241 | return limb(z); |
| 242 | #else |
| 243 | // fallback, no native 128-bit integer multiplication with carry. |
| 244 | // on msvc, this optimizes identically, somehow. |
| 245 | value128 z = full_multiplication(x, y); |
| 246 | bool overflow; |
| 247 | z.low = scalar_add(z.low, carry, overflow); |
| 248 | z.high += uint64_t(overflow); // cannot overflow |
| 249 | carry = z.high; |
| 250 | return z.low; |
| 251 | #endif |
| 252 | #else |
| 253 | uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); |
| 254 | carry = limb(z >> limb_bits); |
| 255 | return limb(z); |
| 256 | #endif |
| 257 | } |
| 258 | |
| 259 | // add scalar value to bigint starting from offset. |
| 260 | // used in grade school multiplication |
| 261 | template <uint16_t size> |
| 262 | inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 263 | bool small_add_from(stackvec<size>& vec, limb y, size_t start) noexcept { |
| 264 | size_t index = start; |
| 265 | limb carry = y; |
| 266 | bool overflow; |
| 267 | while (carry != 0 && index < vec.len()) { |
| 268 | vec[index] = scalar_add(vec[index], carry, overflow); |
| 269 | carry = limb(overflow); |
| 270 | index += 1; |
| 271 | } |
| 272 | if (carry != 0) { |
| 273 | BOOST_CHARCONV_FASTFLOAT_TRY(vec.try_push(carry)); |
| 274 | } |
| 275 | return true; |
| 276 | } |
| 277 | |
| 278 | // add scalar value to bigint. |
| 279 | template <uint16_t size> |
| 280 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 281 | bool small_add(stackvec<size>& vec, limb y) noexcept { |
| 282 | return small_add_from(vec, y, 0); |
| 283 | } |
| 284 | |
| 285 | // multiply bigint by scalar value. |
| 286 | template <uint16_t size> |
| 287 | inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 288 | bool small_mul(stackvec<size>& vec, limb y) noexcept { |
| 289 | limb carry = 0; |
| 290 | for (size_t index = 0; index < vec.len(); index++) { |
| 291 | vec[index] = scalar_mul(vec[index], y, carry); |
| 292 | } |
| 293 | if (carry != 0) { |
| 294 | BOOST_CHARCONV_FASTFLOAT_TRY(vec.try_push(carry)); |
| 295 | } |
| 296 | return true; |
| 297 | } |
| 298 | |
| 299 | // add bigint to bigint starting from index. |
| 300 | // used in grade school multiplication |
| 301 | template <uint16_t size> |
| 302 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 303 | bool large_add_from(stackvec<size>& x, limb_span y, size_t start) noexcept { |
| 304 | // the effective x buffer is from `xstart..x.len()`, so exit early |
| 305 | // if we can't get that current range. |
| 306 | if (x.len() < start || y.len() > x.len() - start) { |
| 307 | BOOST_CHARCONV_FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); |
| 308 | } |
| 309 | |
| 310 | bool carry = false; |
| 311 | for (size_t index = 0; index < y.len(); index++) { |
| 312 | limb xi = x[index + start]; |
| 313 | limb yi = y[index]; |
| 314 | bool c1 = false; |
| 315 | bool c2 = false; |
| 316 | xi = scalar_add(x: xi, y: yi, overflow&: c1); |
| 317 | if (carry) { |
| 318 | xi = scalar_add(x: xi, y: 1, overflow&: c2); |
| 319 | } |
| 320 | x[index + start] = xi; |
| 321 | carry = c1 | c2; |
| 322 | } |
| 323 | |
| 324 | // handle overflow |
| 325 | if (carry) { |
| 326 | BOOST_CHARCONV_FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); |
| 327 | } |
| 328 | return true; |
| 329 | } |
| 330 | |
| 331 | // add bigint to bigint. |
| 332 | template <uint16_t size> |
| 333 | BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 334 | bool large_add_from(stackvec<size>& x, limb_span y) noexcept { |
| 335 | return large_add_from(x, y, 0); |
| 336 | } |
| 337 | |
| 338 | // grade-school multiplication algorithm |
| 339 | template <uint16_t size> |
| 340 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 341 | bool long_mul(stackvec<size>& x, limb_span y) noexcept { |
| 342 | limb_span xs = limb_span(x.data, x.len()); |
| 343 | stackvec<size> z(xs); |
| 344 | limb_span zs = limb_span(z.data, z.len()); |
| 345 | |
| 346 | if (y.len() != 0) { |
| 347 | limb y0 = y[0]; |
| 348 | BOOST_CHARCONV_FASTFLOAT_TRY(small_mul(x, y0)); |
| 349 | for (size_t index = 1; index < y.len(); index++) { |
| 350 | limb yi = y[index]; |
| 351 | stackvec<size> zi; |
| 352 | if (yi != 0) { |
| 353 | // re-use the same buffer throughout |
| 354 | zi.set_len(0); |
| 355 | BOOST_CHARCONV_FASTFLOAT_TRY(zi.try_extend(zs)); |
| 356 | BOOST_CHARCONV_FASTFLOAT_TRY(small_mul(zi, yi)); |
| 357 | limb_span zis = limb_span(zi.data, zi.len()); |
| 358 | BOOST_CHARCONV_FASTFLOAT_TRY(large_add_from(x, zis, index)); |
| 359 | } |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | x.normalize(); |
| 364 | return true; |
| 365 | } |
| 366 | |
| 367 | // grade-school multiplication algorithm |
| 368 | template <uint16_t size> |
| 369 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 |
| 370 | bool large_mul(stackvec<size>& x, limb_span y) noexcept { |
| 371 | if (y.len() == 1) { |
| 372 | BOOST_CHARCONV_FASTFLOAT_TRY(small_mul(x, y[0])); |
| 373 | } else { |
| 374 | BOOST_CHARCONV_FASTFLOAT_TRY(long_mul(x, y)); |
| 375 | } |
| 376 | return true; |
| 377 | } |
| 378 | |
| 379 | template <typename = void> |
| 380 | struct pow5_tables { |
| 381 | static constexpr uint32_t large_step = 135; |
| 382 | static constexpr uint64_t small_power_of_5[] = { |
| 383 | 1UL, 5UL, 25UL, 125UL, 625UL, 3125UL, 15625UL, 78125UL, 390625UL, |
| 384 | 1953125UL, 9765625UL, 48828125UL, 244140625UL, 1220703125UL, |
| 385 | 6103515625UL, 30517578125UL, 152587890625UL, 762939453125UL, |
| 386 | 3814697265625UL, 19073486328125UL, 95367431640625UL, 476837158203125UL, |
| 387 | 2384185791015625UL, 11920928955078125UL, 59604644775390625UL, |
| 388 | 298023223876953125UL, 1490116119384765625UL, 7450580596923828125UL, |
| 389 | }; |
| 390 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 391 | constexpr static limb large_power_of_5[] = { |
| 392 | 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, |
| 393 | 10482974169319127550UL, 198276706040285095UL}; |
| 394 | #else |
| 395 | constexpr static limb large_power_of_5[] = { |
| 396 | 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, |
| 397 | 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; |
| 398 | #endif |
| 399 | }; |
| 400 | |
| 401 | template <typename T> |
| 402 | constexpr uint32_t pow5_tables<T>::large_step; |
| 403 | |
| 404 | template <typename T> |
| 405 | constexpr uint64_t pow5_tables<T>::small_power_of_5[]; |
| 406 | |
| 407 | template <typename T> |
| 408 | constexpr limb pow5_tables<T>::large_power_of_5[]; |
| 409 | |
| 410 | // big integer type. implements a small subset of big integer |
| 411 | // arithmetic, using simple algorithms since asymptotically |
| 412 | // faster algorithms are slower for a small number of limbs. |
| 413 | // all operations assume the big-integer is normalized. |
| 414 | struct bigint : pow5_tables<> { |
| 415 | // storage of the limbs, in little-endian order. |
| 416 | stackvec<bigint_limbs> vec; |
| 417 | |
| 418 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bigint(): vec() {} |
| 419 | bigint(const bigint &) = delete; |
| 420 | bigint &operator=(const bigint &) = delete; |
| 421 | bigint(bigint &&) = delete; |
| 422 | bigint &operator=(bigint &&other) = delete; |
| 423 | |
| 424 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bigint(uint64_t value): vec() { |
| 425 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 426 | vec.push_unchecked(value); |
| 427 | #else |
| 428 | vec.push_unchecked(uint32_t(value)); |
| 429 | vec.push_unchecked(uint32_t(value >> 32)); |
| 430 | #endif |
| 431 | vec.normalize(); |
| 432 | } |
| 433 | |
| 434 | // get the high 64 bits from the vector, and if bits were truncated. |
| 435 | // this is to get the significant digits for the float. |
| 436 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool& truncated) const noexcept { |
| 437 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 438 | if (vec.len() == 0) { |
| 439 | return empty_hi64(truncated); |
| 440 | } else if (vec.len() == 1) { |
| 441 | return uint64_hi64(r0: vec.rindex(index: 0), truncated); |
| 442 | } else { |
| 443 | uint64_t result = uint64_hi64(r0: vec.rindex(index: 0), r1: vec.rindex(index: 1), truncated); |
| 444 | truncated |= vec.nonzero(index: 2); |
| 445 | return result; |
| 446 | } |
| 447 | #else |
| 448 | if (vec.len() == 0) { |
| 449 | return empty_hi64(truncated); |
| 450 | } else if (vec.len() == 1) { |
| 451 | return uint32_hi64(vec.rindex(0), truncated); |
| 452 | } else if (vec.len() == 2) { |
| 453 | return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); |
| 454 | } else { |
| 455 | uint64_t result = uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); |
| 456 | truncated |= vec.nonzero(3); |
| 457 | return result; |
| 458 | } |
| 459 | #endif |
| 460 | } |
| 461 | |
| 462 | // compare two big integers, returning the large value. |
| 463 | // assumes both are normalized. if the return value is |
| 464 | // negative, other is larger, if the return value is |
| 465 | // positive, this is larger, otherwise they are equal. |
| 466 | // the limbs are stored in little-endian order, so we |
| 467 | // must compare the limbs in ever order. |
| 468 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 int compare(const bigint& other) const noexcept { |
| 469 | if (vec.len() > other.vec.len()) { |
| 470 | return 1; |
| 471 | } else if (vec.len() < other.vec.len()) { |
| 472 | return -1; |
| 473 | } else { |
| 474 | for (size_t index = vec.len(); index > 0; index--) { |
| 475 | limb xi = vec[index - 1]; |
| 476 | limb yi = other.vec[index - 1]; |
| 477 | if (xi > yi) { |
| 478 | return 1; |
| 479 | } else if (xi < yi) { |
| 480 | return -1; |
| 481 | } |
| 482 | } |
| 483 | return 0; |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | // shift left each limb n bits, carrying over to the new limb |
| 488 | // returns true if we were able to shift all the digits. |
| 489 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { |
| 490 | // Internally, for each item, we shift left by n, and add the previous |
| 491 | // right shifted limb-bits. |
| 492 | // For example, we transform (for u8) shifted left 2, to: |
| 493 | // b10100100 b01000010 |
| 494 | // b10 b10010001 b00001000 |
| 495 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(n != 0); |
| 496 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); |
| 497 | |
| 498 | size_t shl = n; |
| 499 | size_t shr = limb_bits - shl; |
| 500 | limb prev = 0; |
| 501 | for (size_t index = 0; index < vec.len(); index++) { |
| 502 | limb xi = vec[index]; |
| 503 | vec[index] = (xi << shl) | (prev >> shr); |
| 504 | prev = xi; |
| 505 | } |
| 506 | |
| 507 | limb carry = prev >> shr; |
| 508 | if (carry != 0) { |
| 509 | return vec.try_push(value: carry); |
| 510 | } |
| 511 | return true; |
| 512 | } |
| 513 | |
| 514 | // move the limbs left by `n` limbs. |
| 515 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { |
| 516 | BOOST_CHARCONV_FASTFLOAT_DEBUG_ASSERT(n != 0); |
| 517 | if (n + vec.len() > vec.capacity()) { |
| 518 | return false; |
| 519 | } else if (!vec.is_empty()) { |
| 520 | // move limbs |
| 521 | limb* dst = vec.data + n; |
| 522 | const limb* src = vec.data; |
| 523 | std::copy_backward(first: src, last: src + vec.len(), result: dst + vec.len()); |
| 524 | // fill in empty limbs |
| 525 | limb* first = vec.data; |
| 526 | limb* last = first + n; |
| 527 | ::std::fill(first: first, last: last, value: 0); |
| 528 | vec.set_len(n + vec.len()); |
| 529 | return true; |
| 530 | } else { |
| 531 | return true; |
| 532 | } |
| 533 | } |
| 534 | |
| 535 | // move the limbs left by `n` bits. |
| 536 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { |
| 537 | size_t rem = n % limb_bits; |
| 538 | size_t div = n / limb_bits; |
| 539 | if (rem != 0) { |
| 540 | BOOST_CHARCONV_FASTFLOAT_TRY(shl_bits(rem)); |
| 541 | } |
| 542 | if (div != 0) { |
| 543 | BOOST_CHARCONV_FASTFLOAT_TRY(shl_limbs(div)); |
| 544 | } |
| 545 | return true; |
| 546 | } |
| 547 | |
| 548 | // get the number of leading zeros in the bigint. |
| 549 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { |
| 550 | if (vec.is_empty()) { |
| 551 | return 0; |
| 552 | } else { |
| 553 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 554 | return leading_zeroes(input_num: vec.rindex(index: 0)); |
| 555 | #else |
| 556 | // no use defining a specialized leading_zeroes for a 32-bit type. |
| 557 | uint64_t r0 = vec.rindex(0); |
| 558 | return leading_zeroes(r0 << 32); |
| 559 | #endif |
| 560 | } |
| 561 | } |
| 562 | |
| 563 | // get the number of bits in the bigint. |
| 564 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { |
| 565 | int lz = ctlz(); |
| 566 | return int(limb_bits * vec.len()) - lz; |
| 567 | } |
| 568 | |
| 569 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { |
| 570 | return small_mul(vec, y); |
| 571 | } |
| 572 | |
| 573 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { |
| 574 | return small_add(vec, y); |
| 575 | } |
| 576 | |
| 577 | // multiply as if by 2 raised to a power. |
| 578 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { |
| 579 | return shl(n: exp); |
| 580 | } |
| 581 | |
| 582 | // multiply as if by 5 raised to a power. |
| 583 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { |
| 584 | // multiply by a power of 5 |
| 585 | constexpr size_t large_length = sizeof(large_power_of_5) / sizeof(limb); |
| 586 | limb_span large = limb_span(large_power_of_5, large_length); |
| 587 | while (exp >= large_step) { |
| 588 | BOOST_CHARCONV_FASTFLOAT_TRY(large_mul(vec, large)); |
| 589 | exp -= large_step; |
| 590 | } |
| 591 | #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB |
| 592 | constexpr uint32_t small_step = 27; |
| 593 | constexpr limb max_native = 7450580596923828125UL; |
| 594 | #else |
| 595 | constexpr uint32_t small_step = 13; |
| 596 | constexpr limb max_native = 1220703125U; |
| 597 | #endif |
| 598 | while (exp >= small_step) { |
| 599 | BOOST_CHARCONV_FASTFLOAT_TRY(small_mul(vec, max_native)); |
| 600 | exp -= small_step; |
| 601 | } |
| 602 | if (exp != 0) { |
| 603 | // Work around clang bug https://godbolt.org/z/zedh7rrhc |
| 604 | // This is similar to https://github.com/llvm/llvm-project/issues/47746, |
| 605 | // except the workaround described there don't work here |
| 606 | BOOST_CHARCONV_FASTFLOAT_TRY( |
| 607 | small_mul(vec, limb(((void)small_power_of_5[0], small_power_of_5[exp]))) |
| 608 | ); |
| 609 | } |
| 610 | |
| 611 | return true; |
| 612 | } |
| 613 | |
| 614 | // multiply as if by 10 raised to a power. |
| 615 | BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { |
| 616 | BOOST_CHARCONV_FASTFLOAT_TRY(pow5(exp)); |
| 617 | return pow2(exp); |
| 618 | } |
| 619 | }; |
| 620 | |
| 621 | }}}} // namespace fast_float |
| 622 | |
| 623 | #endif |
| 624 | |