| 1 | //===-- strtofloat_fuzz.cpp -----------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// Fuzzing test for llvm-libc atof implementation. |
| 10 | /// |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | #include "src/stdlib/atof.h" |
| 13 | #include "src/stdlib/strtod.h" |
| 14 | #include "src/stdlib/strtof.h" |
| 15 | #include "src/stdlib/strtold.h" |
| 16 | |
| 17 | #include "src/__support/FPUtil/FPBits.h" |
| 18 | |
| 19 | #include "hdr/math_macros.h" |
| 20 | #include <stddef.h> |
| 21 | #include <stdint.h> |
| 22 | |
| 23 | #include "utils/MPFRWrapper/mpfr_inc.h" |
| 24 | |
| 25 | using LIBC_NAMESPACE::fputil::FPBits; |
| 26 | |
| 27 | // This function calculates the effective precision for a given float type and |
| 28 | // exponent. Subnormals have a lower effective precision since they don't |
| 29 | // necessarily use all of the bits of the mantissa. |
| 30 | template <typename F> inline constexpr int effective_precision(int exponent) { |
| 31 | const int full_precision = FPBits<F>::FRACTION_LEN + 1; |
| 32 | |
| 33 | // This is intended to be 0 when the exponent is the lowest normal and |
| 34 | // increase as the exponent's magnitude increases. |
| 35 | const int bits_below_normal = (-exponent) - (FPBits<F>::EXP_BIAS - 1); |
| 36 | |
| 37 | // The precision should be the normal, full precision, minus the bits lost |
| 38 | // by this being a subnormal, minus one for the implicit leading one. |
| 39 | const int bits_if_subnormal = full_precision - bits_below_normal - 1; |
| 40 | |
| 41 | if (bits_below_normal >= 0) { |
| 42 | return bits_if_subnormal; |
| 43 | } |
| 44 | return full_precision; |
| 45 | } |
| 46 | |
| 47 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { |
| 48 | // const char newstr[] = "123"; |
| 49 | // data = reinterpret_cast<const uint8_t *>(newstr); |
| 50 | // size = sizeof(newstr); |
| 51 | uint8_t *container = new uint8_t[size + 1]; |
| 52 | if (!container) |
| 53 | __builtin_trap(); |
| 54 | size_t i; |
| 55 | |
| 56 | for (i = 0; i < size; ++i) { |
| 57 | // MPFR's strtofr uses "@" as a base-independent exponent symbol |
| 58 | if (data[i] != '@') |
| 59 | container[i] = data[i]; |
| 60 | else { |
| 61 | container[i] = '#'; |
| 62 | } |
| 63 | } |
| 64 | container[size] = '\0'; // Add null terminator to container. |
| 65 | |
| 66 | const char *str_ptr = reinterpret_cast<const char *>(container); |
| 67 | |
| 68 | char *out_ptr = nullptr; |
| 69 | |
| 70 | size_t base = 0; |
| 71 | |
| 72 | // This is just used to determine the base and precision. |
| 73 | mpfr_t result; |
| 74 | mpfr_init2(result, 256); |
| 75 | mpfr_t bin_result; |
| 76 | mpfr_init2(bin_result, 256); |
| 77 | mpfr_strtofr(result, str_ptr, &out_ptr, 0 /* base */, MPFR_RNDN); |
| 78 | ptrdiff_t result_strlen = out_ptr - str_ptr; |
| 79 | mpfr_strtofr(bin_result, str_ptr, &out_ptr, 2 /* base */, MPFR_RNDN); |
| 80 | ptrdiff_t bin_result_strlen = out_ptr - str_ptr; |
| 81 | |
| 82 | long double bin_result_ld = mpfr_get_ld(bin_result, MPFR_RNDN); |
| 83 | long double result_ld = mpfr_get_ld(result, MPFR_RNDN); |
| 84 | |
| 85 | // This detects if mpfr's strtofr selected a base of 2, which libc does not |
| 86 | // support. If a base 2 decoding is detected, it is replaced by a base 10 |
| 87 | // decoding. |
| 88 | if ((bin_result_ld != 0.0 || bin_result_strlen == result_strlen) && |
| 89 | bin_result_ld == result_ld) { |
| 90 | mpfr_strtofr(result, str_ptr, &out_ptr, 10 /* base */, MPFR_RNDN); |
| 91 | result_strlen = out_ptr - str_ptr; |
| 92 | base = 10; |
| 93 | } |
| 94 | |
| 95 | auto result_exp = mpfr_get_exp(result); |
| 96 | |
| 97 | mpfr_clear(result); |
| 98 | mpfr_clear(bin_result); |
| 99 | |
| 100 | // These must be calculated with the correct precision, and not any more, to |
| 101 | // prevent numbers like 66336650.00...01 (many zeroes) from causing an issue. |
| 102 | // 66336650 is exactly between two float values (66336652 and 66336648) so the |
| 103 | // correct float result for 66336650.00...01 is rounding up to 66336652. The |
| 104 | // correct double is instead 66336650, which when converted to float is |
| 105 | // rounded down to 66336648. This means we have to compare against the correct |
| 106 | // precision to get the correct result. |
| 107 | |
| 108 | // TODO: Add support for other rounding modes. |
| 109 | int float_precision = effective_precision<float>(result_exp); |
| 110 | if (float_precision >= 2) { |
| 111 | mpfr_t mpfr_float; |
| 112 | mpfr_init2(mpfr_float, float_precision); |
| 113 | mpfr_strtofr(mpfr_float, str_ptr, &out_ptr, base, MPFR_RNDN); |
| 114 | float volatile float_result = mpfr_get_flt(mpfr_float, MPFR_RNDN); |
| 115 | auto volatile strtof_result = LIBC_NAMESPACE::strtof(str_ptr, &out_ptr); |
| 116 | ptrdiff_t strtof_strlen = out_ptr - str_ptr; |
| 117 | if (result_strlen != strtof_strlen) |
| 118 | __builtin_trap(); |
| 119 | // If any result is NaN, all of them should be NaN. We can't use the usual |
| 120 | // comparisons because NaN != NaN. |
| 121 | if (FPBits<float>(float_result).is_nan() != |
| 122 | FPBits<float>(strtof_result).is_nan()) |
| 123 | __builtin_trap(); |
| 124 | if (!FPBits<float>(float_result).is_nan() && float_result != strtof_result) |
| 125 | __builtin_trap(); |
| 126 | mpfr_clear(mpfr_float); |
| 127 | } |
| 128 | |
| 129 | int double_precision = effective_precision<double>(result_exp); |
| 130 | if (double_precision >= 2) { |
| 131 | mpfr_t mpfr_double; |
| 132 | mpfr_init2(mpfr_double, double_precision); |
| 133 | mpfr_strtofr(mpfr_double, str_ptr, &out_ptr, base, MPFR_RNDN); |
| 134 | double volatile double_result = mpfr_get_d(mpfr_double, MPFR_RNDN); |
| 135 | auto volatile strtod_result = LIBC_NAMESPACE::strtod(str_ptr, &out_ptr); |
| 136 | auto volatile atof_result = LIBC_NAMESPACE::atof(str_ptr); |
| 137 | ptrdiff_t strtod_strlen = out_ptr - str_ptr; |
| 138 | if (result_strlen != strtod_strlen) |
| 139 | __builtin_trap(); |
| 140 | if (FPBits<double>(double_result).is_nan() != |
| 141 | FPBits<double>(strtod_result).is_nan() || |
| 142 | FPBits<double>(double_result).is_nan() != |
| 143 | FPBits<double>(atof_result).is_nan()) |
| 144 | __builtin_trap(); |
| 145 | if (!FPBits<double>(double_result).is_nan() && |
| 146 | (double_result != strtod_result || double_result != atof_result)) |
| 147 | __builtin_trap(); |
| 148 | mpfr_clear(mpfr_double); |
| 149 | } |
| 150 | |
| 151 | int long_double_precision = effective_precision<long double>(result_exp); |
| 152 | if (long_double_precision >= 2) { |
| 153 | mpfr_t mpfr_long_double; |
| 154 | mpfr_init2(mpfr_long_double, long_double_precision); |
| 155 | mpfr_strtofr(mpfr_long_double, str_ptr, &out_ptr, base, MPFR_RNDN); |
| 156 | long double volatile long_double_result = |
| 157 | mpfr_get_ld(mpfr_long_double, MPFR_RNDN); |
| 158 | auto volatile strtold_result = LIBC_NAMESPACE::strtold(str_ptr, &out_ptr); |
| 159 | ptrdiff_t strtold_strlen = out_ptr - str_ptr; |
| 160 | if (result_strlen != strtold_strlen) |
| 161 | __builtin_trap(); |
| 162 | if (FPBits<long double>(long_double_result).is_nan() ^ |
| 163 | FPBits<long double>(strtold_result).is_nan()) |
| 164 | __builtin_trap(); |
| 165 | if (!FPBits<long double>(long_double_result).is_nan() && |
| 166 | long_double_result != strtold_result) |
| 167 | __builtin_trap(); |
| 168 | mpfr_clear(mpfr_long_double); |
| 169 | } |
| 170 | |
| 171 | delete[] container; |
| 172 | return 0; |
| 173 | } |
| 174 | |