1 | //===-- Utilities to convert floating point values to string ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H |
11 | |
12 | #include <stdint.h> |
13 | |
14 | #include "src/__support/CPP/limits.h" |
15 | #include "src/__support/CPP/type_traits.h" |
16 | #include "src/__support/FPUtil/FPBits.h" |
17 | #include "src/__support/FPUtil/dyadic_float.h" |
18 | #include "src/__support/big_int.h" |
19 | #include "src/__support/common.h" |
20 | #include "src/__support/libc_assert.h" |
21 | #include "src/__support/macros/attributes.h" |
22 | |
23 | // This file has 5 compile-time flags to allow the user to configure the float |
24 | // to string behavior. These were used to explore tradeoffs during the design |
25 | // phase, and can still be used to gain specific properties. Unless you |
26 | // specifically know what you're doing, you should leave all these flags off. |
27 | |
28 | // LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD |
29 | // This flag disables the separate long double conversion implementation. It is |
30 | // not based on the Ryu algorithm, instead generating the digits by |
31 | // multiplying/dividing the written-out number by 10^9 to get blocks. It's |
32 | // significantly faster than INT_CALC, only about 10x slower than MEGA_TABLE, |
33 | // and is small in binary size. Its downside is that it always calculates all |
34 | // of the digits above the decimal point, making it inefficient for %e calls |
35 | // with large exponents. This specialization overrides other flags, so this |
36 | // flag must be set for other flags to effect the long double behavior. |
37 | |
38 | // LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE |
39 | // The Mega Table is ~5 megabytes when compiled. It lists the constants needed |
40 | // to perform the Ryu Printf algorithm (described below) for all long double |
41 | // values. This makes it extremely fast for both doubles and long doubles, in |
42 | // exchange for large binary size. |
43 | |
44 | // LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT |
45 | // Dyadic floats are software floating point numbers, and their accuracy can be |
46 | // as high as necessary. This option uses 256 bit dyadic floats to calculate |
47 | // the table values that Ryu Printf needs. This is reasonably fast and very |
48 | // small compared to the Mega Table, but the 256 bit floats only give accurate |
49 | // results for the first ~50 digits of the output. In practice this shouldn't |
50 | // be a problem since long doubles are only accurate for ~35 digits, but the |
51 | // trailing values all being 0s may cause brittle tests to fail. |
52 | |
53 | // LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC |
54 | // Integer Calculation uses wide integers to do the calculations for the Ryu |
55 | // Printf table, which is just as accurate as the Mega Table without requiring |
56 | // as much code size. These integers can be very large (~32KB at max, though |
57 | // always on the stack) to handle the edges of the long double range. They are |
58 | // also very slow, taking multiple seconds on a powerful CPU to calculate the |
59 | // values at the end of the range. If no flag is set, this is used for long |
60 | // doubles, the flag only changes the double behavior. |
61 | |
62 | // LIBC_COPT_FLOAT_TO_STR_NO_TABLE |
63 | // This flag doesn't change the actual calculation method, instead it is used |
64 | // to disable the normal Ryu Printf table for configurations that don't use any |
65 | // table at all. |
66 | |
67 | // Default Config: |
68 | // If no flags are set, doubles use the normal (and much more reasonably sized) |
69 | // Ryu Printf table and long doubles use their specialized implementation. This |
70 | // provides good performance and binary size. |
71 | |
72 | #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE |
73 | #include "src/__support/ryu_long_double_constants.h" |
74 | #elif !defined(LIBC_COPT_FLOAT_TO_STR_NO_TABLE) |
75 | #include "src/__support/ryu_constants.h" |
76 | #else |
77 | constexpr size_t IDX_SIZE = 1; |
78 | constexpr size_t MID_INT_SIZE = 192; |
79 | #endif |
80 | |
81 | // This implementation is based on the Ryu Printf algorithm by Ulf Adams: |
82 | // Ulf Adams. 2019. Ryƫ revisited: printf floating point conversion. |
83 | // Proc. ACM Program. Lang. 3, OOPSLA, Article 169 (October 2019), 23 pages. |
84 | // https://doi.org/10.1145/3360595 |
85 | |
86 | // This version is modified to require significantly less memory (it doesn't use |
87 | // a large buffer to store the result). |
88 | |
89 | // The general concept of this algorithm is as follows: |
90 | // We want to calculate a 9 digit segment of a floating point number using this |
91 | // formula: floor((mantissa * 2^exponent)/10^i) % 10^9. |
92 | // To do so normally would involve large integers (~1000 bits for doubles), so |
93 | // we use a shortcut. We can avoid calculating 2^exponent / 10^i by using a |
94 | // lookup table. The resulting intermediate value needs to be about 192 bits to |
95 | // store the result with enough precision. Since this is all being done with |
96 | // integers for appropriate precision, we would run into a problem if |
97 | // i > exponent since then 2^exponent / 10^i would be less than 1. To correct |
98 | // for this, the actual calculation done is 2^(exponent + c) / 10^i, and then |
99 | // when multiplying by the mantissa we reverse this by dividing by 2^c, like so: |
100 | // floor((mantissa * table[exponent][i])/(2^c)) % 10^9. |
101 | // This gives a 9 digit value, which is small enough to fit in a 32 bit integer, |
102 | // and that integer is converted into a string as normal, and called a block. In |
103 | // this implementation, the most recent block is buffered, so that if rounding |
104 | // is necessary the block can be adjusted before being written to the output. |
105 | // Any block that is all 9s adds one to the max block counter and doesn't clear |
106 | // the buffer because they can cause the block above them to be rounded up. |
107 | |
108 | namespace LIBC_NAMESPACE { |
109 | |
110 | using BlockInt = uint32_t; |
111 | constexpr uint32_t BLOCK_SIZE = 9; |
112 | constexpr uint64_t EXP5_9 = 1953125; |
113 | constexpr uint64_t EXP10_9 = 1000000000; |
114 | |
115 | using FPBits = fputil::FPBits<long double>; |
116 | |
117 | // Larger numbers prefer a slightly larger constant than is used for the smaller |
118 | // numbers. |
119 | constexpr size_t CALC_SHIFT_CONST = 128; |
120 | |
121 | namespace internal { |
122 | |
123 | // Returns floor(log_10(2^e)); requires 0 <= e <= 42039. |
124 | LIBC_INLINE constexpr uint32_t log10_pow2(uint64_t e) { |
125 | LIBC_ASSERT(e <= 42039 && |
126 | "Incorrect exponent to perform log10_pow2 approximation." ); |
127 | // This approximation is based on the float value for log_10(2). It first |
128 | // gives an incorrect result for our purposes at 42039 (well beyond the 16383 |
129 | // maximum for long doubles). |
130 | |
131 | // To get these constants I first evaluated log_10(2) to get an approximation |
132 | // of 0.301029996. Next I passed that value through a string to double |
133 | // conversion to get an explicit mantissa of 0x13441350fbd738 and an exponent |
134 | // of -2 (which becomes -54 when we shift the mantissa to be a non-fractional |
135 | // number). Next I shifted the mantissa right 12 bits to create more space for |
136 | // the multiplication result, adding 12 to the exponent to compensate. To |
137 | // check that this approximation works for our purposes I used the following |
138 | // python code: |
139 | // for i in range(16384): |
140 | // if(len(str(2**i)) != (((i*0x13441350fbd)>>42)+1)): |
141 | // print(i) |
142 | // The reason we add 1 is because this evaluation truncates the result, giving |
143 | // us the floor, whereas counting the digits of the power of 2 gives us the |
144 | // ceiling. With a similar loop I checked the maximum valid value and found |
145 | // 42039. |
146 | return static_cast<uint32_t>((e * 0x13441350fbdll) >> 42); |
147 | } |
148 | |
149 | // Same as above, but with different constants. |
150 | LIBC_INLINE constexpr uint32_t log2_pow5(uint64_t e) { |
151 | return static_cast<uint32_t>((e * 0x12934f0979bll) >> 39); |
152 | } |
153 | |
154 | // Returns 1 + floor(log_10(2^e). This could technically be off by 1 if any |
155 | // power of 2 was also a power of 10, but since that doesn't exist this is |
156 | // always accurate. This is used to calculate the maximum number of base-10 |
157 | // digits a given e-bit number could have. |
158 | LIBC_INLINE constexpr uint32_t ceil_log10_pow2(uint32_t e) { |
159 | return log10_pow2(e) + 1; |
160 | } |
161 | |
162 | LIBC_INLINE constexpr uint32_t div_ceil(uint32_t num, uint32_t denom) { |
163 | return (num + (denom - 1)) / denom; |
164 | } |
165 | |
166 | // Returns the maximum number of 9 digit blocks a number described by the given |
167 | // index (which is ceil(exponent/16)) and mantissa width could need. |
168 | LIBC_INLINE constexpr uint32_t length_for_num(uint32_t idx, |
169 | uint32_t mantissa_width) { |
170 | return div_ceil(num: ceil_log10_pow2(e: idx) + ceil_log10_pow2(e: mantissa_width + 1), |
171 | denom: BLOCK_SIZE); |
172 | } |
173 | |
174 | // The formula for the table when i is positive (or zero) is as follows: |
175 | // floor(10^(-9i) * 2^(e + c_1) + 1) % (10^9 * 2^c_1) |
176 | // Rewritten slightly we get: |
177 | // floor(5^(-9i) * 2^(e + c_1 - 9i) + 1) % (10^9 * 2^c_1) |
178 | |
179 | // TODO: Fix long doubles (needs bigger table or alternate algorithm.) |
180 | // Currently the table values are generated, which is very slow. |
181 | template <size_t INT_SIZE> |
182 | LIBC_INLINE constexpr UInt<MID_INT_SIZE> get_table_positive(int exponent, |
183 | size_t i) { |
184 | // INT_SIZE is the size of int that is used for the internal calculations of |
185 | // this function. It should be large enough to hold 2^(exponent+constant), so |
186 | // ~1000 for double and ~16000 for long double. Be warned that the time |
187 | // complexity of exponentiation is O(n^2 * log_2(m)) where n is the number of |
188 | // bits in the number being exponentiated and m is the exponent. |
189 | const int shift_amount = |
190 | static_cast<int>(exponent + CALC_SHIFT_CONST - (BLOCK_SIZE * i)); |
191 | if (shift_amount < 0) { |
192 | return 1; |
193 | } |
194 | UInt<INT_SIZE> num(0); |
195 | // MOD_SIZE is one of the limiting factors for how big the constant argument |
196 | // can get, since it needs to be small enough to fit in the result UInt, |
197 | // otherwise we'll get truncation on return. |
198 | constexpr UInt<INT_SIZE> MOD_SIZE = |
199 | (UInt<INT_SIZE>(EXP10_9) |
200 | << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); |
201 | |
202 | num = UInt<INT_SIZE>(1) << (shift_amount); |
203 | if (i > 0) { |
204 | UInt<INT_SIZE> fives(EXP5_9); |
205 | fives.pow_n(i); |
206 | num = num / fives; |
207 | } |
208 | |
209 | num = num + 1; |
210 | if (num > MOD_SIZE) { |
211 | auto rem = num.div_uint_half_times_pow_2( |
212 | EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) |
213 | .value(); |
214 | num = rem; |
215 | } |
216 | return num; |
217 | } |
218 | |
219 | template <size_t INT_SIZE> |
220 | LIBC_INLINE UInt<MID_INT_SIZE> get_table_positive_df(int exponent, size_t i) { |
221 | static_assert(INT_SIZE == 256, |
222 | "Only 256 is supported as an int size right now." ); |
223 | // This version uses dyadic floats with 256 bit mantissas to perform the same |
224 | // calculation as above. Due to floating point imprecision it is only accurate |
225 | // for the first 50 digits, but it's much faster. Since even 128 bit long |
226 | // doubles are only accurate to ~35 digits, the 50 digits of accuracy are |
227 | // enough for these floats to be converted back and forth safely. This is |
228 | // ideal for avoiding the size of the long double table. |
229 | const int shift_amount = |
230 | static_cast<int>(exponent + CALC_SHIFT_CONST - (9 * i)); |
231 | if (shift_amount < 0) { |
232 | return 1; |
233 | } |
234 | fputil::DyadicFloat<INT_SIZE> num(false, 0, 1); |
235 | constexpr UInt<INT_SIZE> MOD_SIZE = |
236 | (UInt<INT_SIZE>(EXP10_9) |
237 | << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); |
238 | |
239 | constexpr UInt<INT_SIZE> FIVE_EXP_MINUS_NINE_MANT{ |
240 | {0xf387295d242602a7, 0xfdd7645e011abac9, 0x31680a88f8953030, |
241 | 0x89705f4136b4a597}}; |
242 | |
243 | static const fputil::DyadicFloat<INT_SIZE> FIVE_EXP_MINUS_NINE( |
244 | false, -276, FIVE_EXP_MINUS_NINE_MANT); |
245 | |
246 | if (i > 0) { |
247 | fputil::DyadicFloat<INT_SIZE> fives = fputil::pow_n(FIVE_EXP_MINUS_NINE, i); |
248 | num = fives; |
249 | } |
250 | num = mul_pow_2(num, shift_amount); |
251 | |
252 | // Adding one is part of the formula. |
253 | UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num) + 1; |
254 | if (int_num > MOD_SIZE) { |
255 | auto rem = |
256 | int_num |
257 | .div_uint_half_times_pow_2( |
258 | EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) |
259 | .value(); |
260 | int_num = rem; |
261 | } |
262 | |
263 | UInt<MID_INT_SIZE> result = int_num; |
264 | |
265 | return result; |
266 | } |
267 | |
268 | // The formula for the table when i is negative (or zero) is as follows: |
269 | // floor(10^(-9i) * 2^(c_0 - e)) % (10^9 * 2^c_0) |
270 | // Since we know i is always negative, we just take it as unsigned and treat it |
271 | // as negative. We do the same with exponent, while they're both always negative |
272 | // in theory, in practice they're converted to positive for simpler |
273 | // calculations. |
274 | // The formula being used looks more like this: |
275 | // floor(10^(9*(-i)) * 2^(c_0 + (-e))) % (10^9 * 2^c_0) |
276 | template <size_t INT_SIZE> |
277 | LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative(int exponent, size_t i) { |
278 | int shift_amount = CALC_SHIFT_CONST - exponent; |
279 | UInt<INT_SIZE> num(1); |
280 | constexpr UInt<INT_SIZE> MOD_SIZE = |
281 | (UInt<INT_SIZE>(EXP10_9) |
282 | << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); |
283 | |
284 | size_t ten_blocks = i; |
285 | size_t five_blocks = 0; |
286 | if (shift_amount < 0) { |
287 | int block_shifts = (-shift_amount) / BLOCK_SIZE; |
288 | if (block_shifts < static_cast<int>(ten_blocks)) { |
289 | ten_blocks = ten_blocks - block_shifts; |
290 | five_blocks = block_shifts; |
291 | shift_amount = shift_amount + (block_shifts * BLOCK_SIZE); |
292 | } else { |
293 | ten_blocks = 0; |
294 | five_blocks = i; |
295 | shift_amount = shift_amount + (static_cast<int>(i) * BLOCK_SIZE); |
296 | } |
297 | } |
298 | |
299 | if (five_blocks > 0) { |
300 | UInt<INT_SIZE> fives(EXP5_9); |
301 | fives.pow_n(five_blocks); |
302 | num = fives; |
303 | } |
304 | if (ten_blocks > 0) { |
305 | UInt<INT_SIZE> tens(EXP10_9); |
306 | tens.pow_n(ten_blocks); |
307 | if (five_blocks <= 0) { |
308 | num = tens; |
309 | } else { |
310 | num *= tens; |
311 | } |
312 | } |
313 | |
314 | if (shift_amount > 0) { |
315 | num = num << shift_amount; |
316 | } else { |
317 | num = num >> (-shift_amount); |
318 | } |
319 | if (num > MOD_SIZE) { |
320 | auto rem = num.div_uint_half_times_pow_2( |
321 | EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) |
322 | .value(); |
323 | num = rem; |
324 | } |
325 | return num; |
326 | } |
327 | |
328 | template <size_t INT_SIZE> |
329 | LIBC_INLINE UInt<MID_INT_SIZE> get_table_negative_df(int exponent, size_t i) { |
330 | static_assert(INT_SIZE == 256, |
331 | "Only 256 is supported as an int size right now." ); |
332 | // This version uses dyadic floats with 256 bit mantissas to perform the same |
333 | // calculation as above. Due to floating point imprecision it is only accurate |
334 | // for the first 50 digits, but it's much faster. Since even 128 bit long |
335 | // doubles are only accurate to ~35 digits, the 50 digits of accuracy are |
336 | // enough for these floats to be converted back and forth safely. This is |
337 | // ideal for avoiding the size of the long double table. |
338 | |
339 | int shift_amount = CALC_SHIFT_CONST - exponent; |
340 | |
341 | fputil::DyadicFloat<INT_SIZE> num(false, 0, 1); |
342 | constexpr UInt<INT_SIZE> MOD_SIZE = |
343 | (UInt<INT_SIZE>(EXP10_9) |
344 | << (CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0))); |
345 | |
346 | constexpr UInt<INT_SIZE> TEN_EXP_NINE_MANT(EXP10_9); |
347 | |
348 | static const fputil::DyadicFloat<INT_SIZE> TEN_EXP_NINE(false, 0, |
349 | TEN_EXP_NINE_MANT); |
350 | |
351 | if (i > 0) { |
352 | fputil::DyadicFloat<INT_SIZE> tens = fputil::pow_n(TEN_EXP_NINE, i); |
353 | num = tens; |
354 | } |
355 | num = mul_pow_2(num, shift_amount); |
356 | |
357 | UInt<INT_SIZE> int_num = static_cast<UInt<INT_SIZE>>(num); |
358 | if (int_num > MOD_SIZE) { |
359 | auto rem = |
360 | int_num |
361 | .div_uint_half_times_pow_2( |
362 | EXP10_9, CALC_SHIFT_CONST + (IDX_SIZE > 1 ? IDX_SIZE : 0)) |
363 | .value(); |
364 | int_num = rem; |
365 | } |
366 | |
367 | UInt<MID_INT_SIZE> result = int_num; |
368 | |
369 | return result; |
370 | } |
371 | |
372 | LIBC_INLINE uint32_t fast_uint_mod_1e9(const UInt<MID_INT_SIZE> &val) { |
373 | // The formula for mult_const is: |
374 | // 1 + floor((2^(bits in target integer size + log_2(divider))) / divider) |
375 | // Where divider is 10^9 and target integer size is 128. |
376 | const UInt<MID_INT_SIZE> mult_const( |
377 | {0x31680A88F8953031u, 0x89705F4136B4A597u, 0}); |
378 | const auto middle = (mult_const * val); |
379 | const uint64_t result = static_cast<uint64_t>(middle[2]); |
380 | const uint64_t shifted = result >> 29; |
381 | return static_cast<uint32_t>(static_cast<uint32_t>(val) - |
382 | (EXP10_9 * shifted)); |
383 | } |
384 | |
385 | LIBC_INLINE uint32_t mul_shift_mod_1e9(const FPBits::StorageType mantissa, |
386 | const UInt<MID_INT_SIZE> &large, |
387 | const int32_t shift_amount) { |
388 | UInt<MID_INT_SIZE + FPBits::STORAGE_LEN> val(large); |
389 | val = (val * mantissa) >> shift_amount; |
390 | return static_cast<uint32_t>( |
391 | val.div_uint_half_times_pow_2(x: static_cast<uint32_t>(EXP10_9), e: 0).value()); |
392 | } |
393 | |
394 | } // namespace internal |
395 | |
396 | // Convert floating point values to their string representation. |
397 | // Because the result may not fit in a reasonably sized array, the caller must |
398 | // request blocks of digits and convert them from integers to strings themself. |
399 | // Blocks contain the most digits that can be stored in an BlockInt. This is 9 |
400 | // digits for a 32 bit int and 18 digits for a 64 bit int. |
401 | // The intended use pattern is to create a FloatToString object of the |
402 | // appropriate type, then call get_positive_blocks to get an approximate number |
403 | // of blocks there are before the decimal point. Now the client code can start |
404 | // calling get_positive_block in a loop from the number of positive blocks to |
405 | // zero. This will give all digits before the decimal point. Then the user can |
406 | // start calling get_negative_block in a loop from 0 until the number of digits |
407 | // they need is reached. As an optimization, the client can use |
408 | // zero_blocks_after_point to find the number of blocks that are guaranteed to |
409 | // be zero after the decimal point and before the non-zero digits. Additionally, |
410 | // is_lowest_block will return if the current block is the lowest non-zero |
411 | // block. |
412 | template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0> |
413 | class FloatToString { |
414 | fputil::FPBits<T> float_bits; |
415 | int exponent; |
416 | FPBits::StorageType mantissa; |
417 | |
418 | static constexpr int FRACTION_LEN = fputil::FPBits<T>::FRACTION_LEN; |
419 | static constexpr int EXP_BIAS = fputil::FPBits<T>::EXP_BIAS; |
420 | |
421 | public: |
422 | LIBC_INLINE constexpr FloatToString(T init_float) : float_bits(init_float) { |
423 | exponent = float_bits.get_explicit_exponent(); |
424 | mantissa = float_bits.get_explicit_mantissa(); |
425 | |
426 | // Adjust for the width of the mantissa. |
427 | exponent -= FRACTION_LEN; |
428 | } |
429 | |
430 | LIBC_INLINE constexpr bool is_nan() { return float_bits.is_nan(); } |
431 | LIBC_INLINE constexpr bool is_inf() { return float_bits.is_inf(); } |
432 | LIBC_INLINE constexpr bool is_inf_or_nan() { |
433 | return float_bits.is_inf_or_nan(); |
434 | } |
435 | |
436 | // get_block returns an integer that represents the digits in the requested |
437 | // block. |
438 | LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) { |
439 | if (exponent >= -FRACTION_LEN) { |
440 | // idx is ceil(exponent/16) or 0 if exponent is negative. This is used to |
441 | // find the coarse section of the POW10_SPLIT table that will be used to |
442 | // calculate the 9 digit window, as well as some other related values. |
443 | const uint32_t idx = |
444 | exponent < 0 |
445 | ? 0 |
446 | : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; |
447 | |
448 | // shift_amount = -(c0 - exponent) = c_0 + 16 * ceil(exponent/16) - |
449 | // exponent |
450 | |
451 | const uint32_t pos_exp = idx * IDX_SIZE; |
452 | |
453 | UInt<MID_INT_SIZE> val; |
454 | |
455 | #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) |
456 | // ----------------------- DYADIC FLOAT CALC MODE ------------------------ |
457 | const int32_t SHIFT_CONST = CALC_SHIFT_CONST; |
458 | val = internal::get_table_positive_df<256>(IDX_SIZE * idx, block_index); |
459 | #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) |
460 | |
461 | // ---------------------------- INT CALC MODE ---------------------------- |
462 | const int32_t SHIFT_CONST = CALC_SHIFT_CONST; |
463 | const uint64_t MAX_POW_2_SIZE = |
464 | pos_exp + CALC_SHIFT_CONST - (BLOCK_SIZE * block_index); |
465 | const uint64_t MAX_POW_5_SIZE = |
466 | internal::log2_pow5(BLOCK_SIZE * block_index); |
467 | const uint64_t MAX_INT_SIZE = |
468 | (MAX_POW_2_SIZE > MAX_POW_5_SIZE) ? MAX_POW_2_SIZE : MAX_POW_5_SIZE; |
469 | |
470 | if (MAX_INT_SIZE < 1024) { |
471 | val = internal::get_table_positive<1024>(pos_exp, block_index); |
472 | } else if (MAX_INT_SIZE < 2048) { |
473 | val = internal::get_table_positive<2048>(pos_exp, block_index); |
474 | } else if (MAX_INT_SIZE < 4096) { |
475 | val = internal::get_table_positive<4096>(pos_exp, block_index); |
476 | } else if (MAX_INT_SIZE < 8192) { |
477 | val = internal::get_table_positive<8192>(pos_exp, block_index); |
478 | } else if (MAX_INT_SIZE < 16384) { |
479 | val = internal::get_table_positive<16384>(pos_exp, block_index); |
480 | } else { |
481 | val = internal::get_table_positive<16384 + 128>(pos_exp, block_index); |
482 | } |
483 | #else |
484 | // ----------------------------- TABLE MODE ------------------------------ |
485 | const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; |
486 | |
487 | val = POW10_SPLIT[POW10_OFFSET[idx] + block_index]; |
488 | #endif |
489 | const uint32_t shift_amount = SHIFT_CONST + pos_exp - exponent; |
490 | |
491 | const BlockInt digits = |
492 | internal::mul_shift_mod_1e9(mantissa, large: val, shift_amount: (int32_t)(shift_amount)); |
493 | return digits; |
494 | } else { |
495 | return 0; |
496 | } |
497 | } |
498 | |
499 | LIBC_INLINE constexpr BlockInt get_negative_block(int block_index) { |
500 | if (exponent < 0) { |
501 | const int32_t idx = -exponent / IDX_SIZE; |
502 | |
503 | UInt<MID_INT_SIZE> val; |
504 | |
505 | const uint32_t pos_exp = static_cast<uint32_t>(idx * IDX_SIZE); |
506 | |
507 | #if defined(LIBC_COPT_FLOAT_TO_STR_USE_DYADIC_FLOAT) |
508 | // ----------------------- DYADIC FLOAT CALC MODE ------------------------ |
509 | const int32_t SHIFT_CONST = CALC_SHIFT_CONST; |
510 | val = internal::get_table_negative_df<256>(pos_exp, block_index + 1); |
511 | #elif defined(LIBC_COPT_FLOAT_TO_STR_USE_INT_CALC) |
512 | // ---------------------------- INT CALC MODE ---------------------------- |
513 | const int32_t SHIFT_CONST = CALC_SHIFT_CONST; |
514 | |
515 | const uint64_t NUM_FIVES = (block_index + 1) * BLOCK_SIZE; |
516 | // Round MAX_INT_SIZE up to the nearest 64 (adding 1 because log2_pow5 |
517 | // implicitly rounds down). |
518 | const uint64_t MAX_INT_SIZE = |
519 | ((internal::log2_pow5(NUM_FIVES) / 64) + 1) * 64; |
520 | |
521 | if (MAX_INT_SIZE < 1024) { |
522 | val = internal::get_table_negative<1024>(pos_exp, block_index + 1); |
523 | } else if (MAX_INT_SIZE < 2048) { |
524 | val = internal::get_table_negative<2048>(pos_exp, block_index + 1); |
525 | } else if (MAX_INT_SIZE < 4096) { |
526 | val = internal::get_table_negative<4096>(pos_exp, block_index + 1); |
527 | } else if (MAX_INT_SIZE < 8192) { |
528 | val = internal::get_table_negative<8192>(pos_exp, block_index + 1); |
529 | } else if (MAX_INT_SIZE < 16384) { |
530 | val = internal::get_table_negative<16384>(pos_exp, block_index + 1); |
531 | } else { |
532 | val = internal::get_table_negative<16384 + 8192>(pos_exp, |
533 | block_index + 1); |
534 | } |
535 | #else |
536 | // ----------------------------- TABLE MODE ------------------------------ |
537 | // if the requested block is zero |
538 | const int32_t SHIFT_CONST = TABLE_SHIFT_CONST; |
539 | if (block_index < MIN_BLOCK_2[idx]) { |
540 | return 0; |
541 | } |
542 | const uint32_t p = POW10_OFFSET_2[idx] + block_index - MIN_BLOCK_2[idx]; |
543 | // If every digit after the requested block is zero. |
544 | if (p >= POW10_OFFSET_2[idx + 1]) { |
545 | return 0; |
546 | } |
547 | |
548 | val = POW10_SPLIT_2[p]; |
549 | #endif |
550 | const int32_t shift_amount = |
551 | SHIFT_CONST + (-exponent - static_cast<int32_t>(pos_exp)); |
552 | BlockInt digits = |
553 | internal::mul_shift_mod_1e9(mantissa, large: val, shift_amount); |
554 | return digits; |
555 | } else { |
556 | return 0; |
557 | } |
558 | } |
559 | |
560 | LIBC_INLINE constexpr BlockInt get_block(int block_index) { |
561 | if (block_index >= 0) { |
562 | return get_positive_block(block_index); |
563 | } else { |
564 | return get_negative_block(block_index: -1 - block_index); |
565 | } |
566 | } |
567 | |
568 | LIBC_INLINE constexpr size_t get_positive_blocks() { |
569 | if (exponent < -FRACTION_LEN) |
570 | return 0; |
571 | const uint32_t idx = |
572 | exponent < 0 |
573 | ? 0 |
574 | : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; |
575 | return internal::length_for_num(idx: idx * IDX_SIZE, mantissa_width: FRACTION_LEN); |
576 | } |
577 | |
578 | // This takes the index of a block after the decimal point (a negative block) |
579 | // and return if it's sure that all of the digits after it are zero. |
580 | LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) { |
581 | #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE |
582 | // The decimal representation of 2**(-i) will have exactly i digits after |
583 | // the decimal point. |
584 | int num_requested_digits = |
585 | static_cast<int>((negative_block_index + 1) * BLOCK_SIZE); |
586 | |
587 | return num_requested_digits > -exponent; |
588 | #else |
589 | const int32_t idx = -exponent / IDX_SIZE; |
590 | const size_t p = |
591 | POW10_OFFSET_2[idx] + negative_block_index - MIN_BLOCK_2[idx]; |
592 | // If the remaining digits are all 0, then this is the lowest block. |
593 | return p >= POW10_OFFSET_2[idx + 1]; |
594 | #endif |
595 | } |
596 | |
597 | LIBC_INLINE constexpr size_t zero_blocks_after_point() { |
598 | #ifdef LIBC_COPT_FLOAT_TO_STR_NO_TABLE |
599 | if (exponent < -FRACTION_LEN) { |
600 | const int pos_exp = -exponent - 1; |
601 | const uint32_t pos_idx = |
602 | static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE; |
603 | const int32_t pos_len = ((internal::ceil_log10_pow2(pos_idx * IDX_SIZE) - |
604 | internal::ceil_log10_pow2(FRACTION_LEN + 1)) / |
605 | BLOCK_SIZE) - |
606 | 1; |
607 | return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0); |
608 | } |
609 | return 0; |
610 | #else |
611 | return MIN_BLOCK_2[-exponent / IDX_SIZE]; |
612 | #endif |
613 | } |
614 | }; |
615 | |
616 | #if !defined(LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64) && \ |
617 | !defined(LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD) |
618 | // --------------------------- LONG DOUBLE FUNCTIONS --------------------------- |
619 | |
620 | // this algorithm will work exactly the same for 80 bit and 128 bit long |
621 | // doubles. They have the same max exponent, but even if they didn't the |
622 | // constants should be calculated to be correct for any provided floating point |
623 | // type. |
624 | |
625 | template <> class FloatToString<long double> { |
626 | fputil::FPBits<long double> float_bits; |
627 | bool is_negative = 0; |
628 | int exponent = 0; |
629 | FPBits::StorageType mantissa = 0; |
630 | |
631 | static constexpr int FRACTION_LEN = fputil::FPBits<long double>::FRACTION_LEN; |
632 | static constexpr int EXP_BIAS = fputil::FPBits<long double>::EXP_BIAS; |
633 | static constexpr size_t UINT_WORD_SIZE = 64; |
634 | |
635 | static constexpr size_t FLOAT_AS_INT_WIDTH = |
636 | internal::div_ceil(num: fputil::FPBits<long double>::MAX_BIASED_EXPONENT - |
637 | FPBits::EXP_BIAS, |
638 | denom: UINT_WORD_SIZE) * |
639 | UINT_WORD_SIZE; |
640 | static constexpr size_t = |
641 | internal::div_ceil(num: sizeof(long double) * CHAR_BIT, denom: UINT_WORD_SIZE) * |
642 | UINT_WORD_SIZE; |
643 | |
644 | using wide_int = UInt<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>; |
645 | |
646 | // float_as_fixed represents the floating point number as a fixed point number |
647 | // with the point EXTRA_INT_WIDTH bits from the left of the number. This can |
648 | // store any number with a negative exponent. |
649 | wide_int float_as_fixed = 0; |
650 | int int_block_index = 0; |
651 | |
652 | static constexpr size_t BLOCK_BUFFER_LEN = |
653 | internal::div_ceil(num: internal::log10_pow2(e: FLOAT_AS_INT_WIDTH), denom: BLOCK_SIZE) + |
654 | 1; |
655 | BlockInt block_buffer[BLOCK_BUFFER_LEN] = {0}; |
656 | size_t block_buffer_valid = 0; |
657 | |
658 | template <size_t Bits> |
659 | LIBC_INLINE static constexpr BlockInt grab_digits(UInt<Bits> &int_num) { |
660 | auto wide_result = int_num.div_uint_half_times_pow_2(EXP5_9, 9); |
661 | // the optional only comes into effect when dividing by 0, which will |
662 | // never happen here. Thus, we just assert that it has value. |
663 | LIBC_ASSERT(wide_result.has_value()); |
664 | return static_cast<BlockInt>(wide_result.value()); |
665 | } |
666 | |
667 | LIBC_INLINE static constexpr void zero_leading_digits(wide_int &int_num) { |
668 | // WORD_SIZE is the width of the numbers used to internally represent the |
669 | // UInt |
670 | for (size_t i = 0; i < EXTRA_INT_WIDTH / wide_int::WORD_SIZE; ++i) |
671 | int_num[i + (FLOAT_AS_INT_WIDTH / wide_int::WORD_SIZE)] = 0; |
672 | } |
673 | |
674 | // init_convert initializes float_as_int, cur_block, and block_buffer based on |
675 | // the mantissa and exponent of the initial number. Calling it will always |
676 | // return the class to the starting state. |
677 | LIBC_INLINE constexpr void init_convert() { |
678 | // No calculation necessary for the 0 case. |
679 | if (mantissa == 0 && exponent == 0) |
680 | return; |
681 | |
682 | if (exponent > 0) { |
683 | // if the exponent is positive, then the number is fully above the decimal |
684 | // point. In this case we represent the float as an integer, then divide |
685 | // by 10^BLOCK_SIZE and take the remainder as our next block. This |
686 | // generates the digits from right to left, but the digits will be written |
687 | // from left to right, so it caches the results so they can be read in |
688 | // reverse order. |
689 | |
690 | wide_int float_as_int = mantissa; |
691 | |
692 | float_as_int <<= exponent; |
693 | int_block_index = 0; |
694 | |
695 | while (float_as_int > 0) { |
696 | LIBC_ASSERT(int_block_index < static_cast<int>(BLOCK_BUFFER_LEN)); |
697 | block_buffer[int_block_index] = |
698 | grab_digits<FLOAT_AS_INT_WIDTH + EXTRA_INT_WIDTH>(int_num&: float_as_int); |
699 | ++int_block_index; |
700 | } |
701 | block_buffer_valid = int_block_index; |
702 | |
703 | } else { |
704 | // if the exponent is not positive, then the number is at least partially |
705 | // below the decimal point. In this case we represent the float as a fixed |
706 | // point number with the decimal point after the top EXTRA_INT_WIDTH bits. |
707 | float_as_fixed = mantissa; |
708 | |
709 | const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent; |
710 | static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8); |
711 | float_as_fixed <<= SHIFT_AMOUNT; |
712 | |
713 | // If there are still digits above the decimal point, handle those. |
714 | if (cpp::countl_zero(value: float_as_fixed) < |
715 | static_cast<int>(EXTRA_INT_WIDTH)) { |
716 | UInt<EXTRA_INT_WIDTH> above_decimal_point = |
717 | float_as_fixed >> FLOAT_AS_INT_WIDTH; |
718 | |
719 | size_t positive_int_block_index = 0; |
720 | while (above_decimal_point > 0) { |
721 | block_buffer[positive_int_block_index] = |
722 | grab_digits<EXTRA_INT_WIDTH>(int_num&: above_decimal_point); |
723 | ++positive_int_block_index; |
724 | } |
725 | block_buffer_valid = positive_int_block_index; |
726 | |
727 | // Zero all digits above the decimal point. |
728 | zero_leading_digits(int_num&: float_as_fixed); |
729 | int_block_index = 0; |
730 | } |
731 | } |
732 | } |
733 | |
734 | public: |
735 | LIBC_INLINE constexpr FloatToString(long double init_float) |
736 | : float_bits(init_float) { |
737 | is_negative = float_bits.is_neg(); |
738 | exponent = float_bits.get_explicit_exponent(); |
739 | mantissa = float_bits.get_explicit_mantissa(); |
740 | |
741 | // Adjust for the width of the mantissa. |
742 | exponent -= FRACTION_LEN; |
743 | |
744 | this->init_convert(); |
745 | } |
746 | |
747 | LIBC_INLINE constexpr size_t get_positive_blocks() { |
748 | if (exponent < -FRACTION_LEN) |
749 | return 0; |
750 | |
751 | const uint32_t idx = |
752 | exponent < 0 |
753 | ? 0 |
754 | : static_cast<uint32_t>(exponent + (IDX_SIZE - 1)) / IDX_SIZE; |
755 | return internal::length_for_num(idx: idx * IDX_SIZE, mantissa_width: FRACTION_LEN); |
756 | } |
757 | |
758 | LIBC_INLINE constexpr size_t zero_blocks_after_point() { |
759 | #ifdef LIBC_COPT_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE |
760 | return MIN_BLOCK_2[-exponent / IDX_SIZE]; |
761 | #else |
762 | if (exponent >= -FRACTION_LEN) |
763 | return 0; |
764 | |
765 | const int pos_exp = -exponent - 1; |
766 | const uint32_t pos_idx = |
767 | static_cast<uint32_t>(pos_exp + (IDX_SIZE - 1)) / IDX_SIZE; |
768 | const int32_t pos_len = ((internal::ceil_log10_pow2(e: pos_idx * IDX_SIZE) - |
769 | internal::ceil_log10_pow2(e: FRACTION_LEN + 1)) / |
770 | BLOCK_SIZE) - |
771 | 1; |
772 | return static_cast<uint32_t>(pos_len > 0 ? pos_len : 0); |
773 | #endif |
774 | } |
775 | |
776 | LIBC_INLINE constexpr bool is_lowest_block(size_t negative_block_index) { |
777 | // The decimal representation of 2**(-i) will have exactly i digits after |
778 | // the decimal point. |
779 | const int num_requested_digits = |
780 | static_cast<int>((negative_block_index + 1) * BLOCK_SIZE); |
781 | |
782 | return num_requested_digits > -exponent; |
783 | } |
784 | |
785 | LIBC_INLINE constexpr BlockInt get_positive_block(int block_index) { |
786 | if (exponent < -FRACTION_LEN) |
787 | return 0; |
788 | if (block_index > static_cast<int>(block_buffer_valid) || block_index < 0) |
789 | return 0; |
790 | |
791 | LIBC_ASSERT(block_index < static_cast<int>(BLOCK_BUFFER_LEN)); |
792 | |
793 | return block_buffer[block_index]; |
794 | } |
795 | |
796 | LIBC_INLINE constexpr BlockInt get_negative_block(int negative_block_index) { |
797 | if (exponent >= 0) |
798 | return 0; |
799 | |
800 | // negative_block_index starts at 0 with the first block after the decimal |
801 | // point, and 1 with the second and so on. This converts to the same |
802 | // block_index used everywhere else. |
803 | |
804 | const int block_index = -1 - negative_block_index; |
805 | |
806 | // If we're currently after the requested block (remember these are |
807 | // negative indices) we reset the number to the start. This is only |
808 | // likely to happen in %g calls. This will also reset int_block_index. |
809 | // if (block_index > int_block_index) { |
810 | // init_convert(); |
811 | // } |
812 | |
813 | // Printf is the only existing user of this code and it will only ever move |
814 | // downwards, except for %g but that currently creates a second |
815 | // float_to_string object so this assertion still holds. If a new user needs |
816 | // the ability to step backwards, uncomment the code above. |
817 | LIBC_ASSERT(block_index <= int_block_index); |
818 | |
819 | // If we are currently before the requested block. Step until we reach the |
820 | // requested block. This is likely to only be one step. |
821 | while (block_index < int_block_index) { |
822 | zero_leading_digits(int_num&: float_as_fixed); |
823 | float_as_fixed.mul(x: EXP10_9); |
824 | --int_block_index; |
825 | } |
826 | |
827 | // We're now on the requested block, return the current block. |
828 | return static_cast<BlockInt>(float_as_fixed >> FLOAT_AS_INT_WIDTH); |
829 | } |
830 | |
831 | LIBC_INLINE constexpr BlockInt get_block(int block_index) { |
832 | if (block_index >= 0) |
833 | return get_positive_block(block_index); |
834 | |
835 | return get_negative_block(negative_block_index: -1 - block_index); |
836 | } |
837 | }; |
838 | |
839 | #endif // !LIBC_TYPES_LONG_DOUBLE_IS_FLOAT64 && |
840 | // !LIBC_COPT_FLOAT_TO_STR_NO_SPECIALIZE_LD |
841 | |
842 | } // namespace LIBC_NAMESPACE |
843 | |
844 | #endif // LLVM_LIBC_SRC___SUPPORT_FLOAT_TO_STRING_H |
845 | |