1 | //===-- Int type specifier converters for scanf -----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/stdio/scanf_core/float_converter.h" |
10 | |
11 | #include "src/__support/CPP/limits.h" |
12 | #include "src/__support/char_vector.h" |
13 | #include "src/__support/ctype_utils.h" |
14 | #include "src/stdio/scanf_core/converter_utils.h" |
15 | #include "src/stdio/scanf_core/core_structs.h" |
16 | #include "src/stdio/scanf_core/reader.h" |
17 | |
18 | #include <stddef.h> |
19 | |
20 | namespace LIBC_NAMESPACE { |
21 | namespace scanf_core { |
22 | |
23 | // All of the floating point conversions are the same for scanf, every name will |
24 | // accept every style. |
25 | int convert_float(Reader *reader, const FormatSection &to_conv) { |
26 | // %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number, |
27 | // infinity, or NaN, whose format is the same as expected for the subject |
28 | // sequence of the strtod function. The corresponding argument shall be a |
29 | // pointer to floating." |
30 | |
31 | CharVector out_str = CharVector(); |
32 | bool is_number = false; |
33 | |
34 | size_t max_width = cpp::numeric_limits<size_t>::max(); |
35 | if (to_conv.max_width > 0) { |
36 | max_width = to_conv.max_width; |
37 | } |
38 | |
39 | char cur_char = reader->getc(); |
40 | // Handle the sign. |
41 | if (cur_char == '+' || cur_char == '-') { |
42 | if (!out_str.append(new_char: cur_char)) { |
43 | return ALLOCATION_FAILURE; |
44 | } |
45 | if (out_str.length() == max_width) { |
46 | return MATCHING_FAILURE; |
47 | } else { |
48 | cur_char = reader->getc(); |
49 | } |
50 | } |
51 | |
52 | static constexpr char DECIMAL_POINT = '.'; |
53 | static const char inf_string[] = "infinity" ; |
54 | |
55 | // Handle inf |
56 | |
57 | if (to_lower(a: cur_char) == inf_string[0]) { |
58 | size_t inf_index = 0; |
59 | |
60 | for (; inf_index < sizeof(inf_string) && out_str.length() < max_width && |
61 | to_lower(a: cur_char) == inf_string[inf_index]; |
62 | ++inf_index) { |
63 | if (!out_str.append(new_char: cur_char)) { |
64 | return ALLOCATION_FAILURE; |
65 | } |
66 | cur_char = reader->getc(); |
67 | } |
68 | |
69 | if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) { |
70 | write_float_with_length(str: out_str.c_str(), to_conv); |
71 | return READ_OK; |
72 | } else { |
73 | return MATCHING_FAILURE; |
74 | } |
75 | } |
76 | |
77 | static const char nan_string[] = "nan" ; |
78 | |
79 | // Handle nan |
80 | if (to_lower(a: cur_char) == nan_string[0]) { |
81 | size_t nan_index = 0; |
82 | |
83 | for (; nan_index < sizeof(nan_string) && out_str.length() < max_width && |
84 | to_lower(a: cur_char) == nan_string[nan_index]; |
85 | ++nan_index) { |
86 | if (!out_str.append(new_char: cur_char)) { |
87 | return ALLOCATION_FAILURE; |
88 | } |
89 | cur_char = reader->getc(); |
90 | } |
91 | |
92 | if (nan_index == sizeof(nan_string) - 1) { |
93 | write_float_with_length(str: out_str.c_str(), to_conv); |
94 | return READ_OK; |
95 | } else { |
96 | return MATCHING_FAILURE; |
97 | } |
98 | } |
99 | |
100 | // Assume base of 10 by default but check if it is actually base 16. |
101 | int base = 10; |
102 | |
103 | // If the string starts with 0 it might be in hex. |
104 | if (cur_char == '0') { |
105 | is_number = true; |
106 | // Read the next character to check. |
107 | if (!out_str.append(new_char: cur_char)) { |
108 | return ALLOCATION_FAILURE; |
109 | } |
110 | // If we've hit the end, then this is "0", which is valid. |
111 | if (out_str.length() == max_width) { |
112 | write_float_with_length(str: out_str.c_str(), to_conv); |
113 | return READ_OK; |
114 | } else { |
115 | cur_char = reader->getc(); |
116 | } |
117 | |
118 | // If that next character is an 'x' then this is a hexadecimal number. |
119 | if (to_lower(a: cur_char) == 'x') { |
120 | base = 16; |
121 | |
122 | if (!out_str.append(new_char: cur_char)) { |
123 | return ALLOCATION_FAILURE; |
124 | } |
125 | // If we've hit the end here, we have "0x" which is a valid prefix to a |
126 | // floating point number, and will be evaluated to 0. |
127 | if (out_str.length() == max_width) { |
128 | write_float_with_length(str: out_str.c_str(), to_conv); |
129 | return READ_OK; |
130 | } else { |
131 | cur_char = reader->getc(); |
132 | } |
133 | } |
134 | } |
135 | |
136 | const char exponent_mark = ((base == 10) ? 'e' : 'p'); |
137 | bool after_decimal = false; |
138 | |
139 | // The format for the remaining characters at this point is DD.DDe+/-DD for |
140 | // base 10 and XX.XXp+/-DD for base 16 |
141 | |
142 | // This handles the digits before and after the decimal point, but not the |
143 | // exponent. |
144 | while (out_str.length() < max_width) { |
145 | if (internal::isalnum(ch: cur_char) && |
146 | internal::b36_char_to_int(input: cur_char) < base) { |
147 | is_number = true; |
148 | if (!out_str.append(new_char: cur_char)) { |
149 | return ALLOCATION_FAILURE; |
150 | } |
151 | cur_char = reader->getc(); |
152 | } else if (cur_char == DECIMAL_POINT && !after_decimal) { |
153 | after_decimal = true; |
154 | if (!out_str.append(new_char: cur_char)) { |
155 | return ALLOCATION_FAILURE; |
156 | } |
157 | cur_char = reader->getc(); |
158 | } else { |
159 | break; |
160 | } |
161 | } |
162 | |
163 | // Handle the exponent, which has an exponent mark, an optional sign, and |
164 | // decimal digits. |
165 | if (to_lower(a: cur_char) == exponent_mark) { |
166 | if (!out_str.append(new_char: cur_char)) { |
167 | return ALLOCATION_FAILURE; |
168 | } |
169 | if (out_str.length() == max_width) { |
170 | // This is laid out in the standard as being a matching error (100e is not |
171 | // a valid float) but may conflict with existing implementations. |
172 | return MATCHING_FAILURE; |
173 | } else { |
174 | cur_char = reader->getc(); |
175 | } |
176 | |
177 | if (cur_char == '+' || cur_char == '-') { |
178 | if (!out_str.append(new_char: cur_char)) { |
179 | return ALLOCATION_FAILURE; |
180 | } |
181 | if (out_str.length() == max_width) { |
182 | return MATCHING_FAILURE; |
183 | } else { |
184 | cur_char = reader->getc(); |
185 | } |
186 | } |
187 | |
188 | // It is specified by the standard that "100er" is a matching failure since |
189 | // the longest prefix of a possibly valid floating-point number (which is |
190 | // "100e") is not a valid floating-point number. If there is an exponent |
191 | // mark then there must be a digit after it else the number is not valid. |
192 | // Some implementations will roll back two characters (to just "100") and |
193 | // accept that since the prefix is not valid, and some will interpret an |
194 | // exponent mark followed by no digits as an additional exponent of 0 |
195 | // (accepting "100e" and returning 100.0). Both of these behaviors are wrong |
196 | // by the standard, but they may be used in real code, see Hyrum's law. This |
197 | // code follows the standard, but may be incompatible due to code expecting |
198 | // these bugs. |
199 | if (!internal::isdigit(ch: cur_char)) { |
200 | return MATCHING_FAILURE; |
201 | } |
202 | |
203 | while (internal::isdigit(ch: cur_char) && out_str.length() < max_width) { |
204 | if (!out_str.append(new_char: cur_char)) { |
205 | return ALLOCATION_FAILURE; |
206 | } |
207 | cur_char = reader->getc(); |
208 | } |
209 | } |
210 | |
211 | // We always read one more character than will be used, so we have to put the |
212 | // last one back. |
213 | reader->ungetc(c: cur_char); |
214 | |
215 | // If we haven't actually found any digits, this is a matching failure (this |
216 | // catches cases like "+.") |
217 | if (!is_number) { |
218 | return MATCHING_FAILURE; |
219 | } |
220 | write_float_with_length(str: out_str.c_str(), to_conv); |
221 | |
222 | return READ_OK; |
223 | } |
224 | |
225 | } // namespace scanf_core |
226 | } // namespace LIBC_NAMESPACE |
227 | |