Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===-- Int type specifier converter for scanf ------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H |
10 | #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H |
11 | |
12 | #include "src/__support/CPP/limits.h" |
13 | #include "src/__support/ctype_utils.h" |
14 | #include "src/__support/macros/config.h" |
15 | #include "src/stdio/scanf_core/converter_utils.h" |
16 | #include "src/stdio/scanf_core/core_structs.h" |
17 | #include "src/stdio/scanf_core/reader.h" |
18 | |
19 | #include <stddef.h> |
20 | |
21 | namespace LIBC_NAMESPACE_DECL { |
22 | namespace scanf_core { |
23 | |
24 | // This code is very similar to the code in __support/str_to_integer.h but is |
25 | // not quite the same. Here is the list of differences and why they exist: |
26 | // 1) This takes a reader and a format section instead of a char* and the base. |
27 | // This should be fairly self explanatory. While the char* could be adapted |
28 | // to a reader and the base could be calculated ahead of time, the |
29 | // semantics are slightly different, specifically a char* can be indexed |
30 | // freely (I can read str[2] and then str[0]) whereas a File (which the |
31 | // reader may contain) cannot. |
32 | // 2) Because this uses a Reader, this function can only unget once. |
33 | // This is relevant because scanf specifies it reads the "longest sequence |
34 | // of input characters which does not exceed any specified field width and |
35 | // which is, or is a prefix of, a matching input sequence." Whereas the |
36 | // strtol function accepts "the longest initial subsequence of the input |
37 | // string (...) that is of the expected form." This is demonstrated by the |
38 | // differences in how they deal with the string "0xZZZ" when parsing as |
39 | // hexadecimal. Scanf will read the "0x" as a valid prefix and return 0, |
40 | // since it reads the first 'Z', sees that it's not a valid hex digit, and |
41 | // reverses one character. The strtol function on the other hand only |
42 | // accepts the "0" since that's the longest valid hexadecimal sequence. It |
43 | // sees the 'Z' after the "0x" and determines that this is not the prefix |
44 | // to a valid hex string. |
45 | // 3) This conversion may have a maximum width. |
46 | // If a maximum width is specified, this conversion is only allowed to |
47 | // accept a certain number of characters. Strtol doesn't have any such |
48 | // limitation. |
49 | template <typename T> |
50 | int convert_int(Reader<T> *reader, const FormatSection &to_conv) { |
51 | // %d "Matches an optionally signed decimal integer [...] with the value 10 |
52 | // for the base argument. The corresponding argument shall be a pointer to |
53 | // signed integer." |
54 | |
55 | // %i "Matches an optionally signed integer [...] with the value 0 for the |
56 | // base argument. The corresponding argument shall be a pointer to signed |
57 | // integer." |
58 | |
59 | // %u "Matches an optionally signed decimal integer [...] with the value 10 |
60 | // for the base argument. The corresponding argument shall be a pointer to |
61 | // unsigned integer" |
62 | |
63 | // %o "Matches an optionally signed octal integer [...] with the value 8 for |
64 | // the base argument. The corresponding argument shall be a pointer to |
65 | // unsigned integer" |
66 | |
67 | // %x/X "Matches an optionally signed hexadecimal integer [...] with the value |
68 | // 16 for the base argument. The corresponding argument shall be a pointer to |
69 | // unsigned integer" |
70 | |
71 | size_t max_width = cpp::numeric_limits<size_t>::max(); |
72 | if (to_conv.max_width > 0) { |
73 | max_width = to_conv.max_width; |
74 | } |
75 | |
76 | uintmax_t result = 0; |
77 | bool is_number = false; |
78 | bool is_signed = false; |
79 | int base = 0; |
80 | if (to_conv.conv_name == 'i') { |
81 | base = 0; |
82 | is_signed = true; |
83 | } else if (to_conv.conv_name == 'o') { |
84 | base = 8; |
85 | } else if (internal::tolower(to_conv.conv_name) == 'x' || |
86 | to_conv.conv_name == 'p') { |
87 | base = 16; |
88 | } else if (to_conv.conv_name == 'd') { |
89 | base = 10; |
90 | is_signed = true; |
91 | } else { // conv_name must be 'u' |
92 | base = 10; |
93 | } |
94 | |
95 | char cur_char = reader->getc(); |
96 | |
97 | char result_sign = '+'; |
98 | if (cur_char == '+' || cur_char == '-') { |
99 | result_sign = cur_char; |
100 | if (max_width > 1) { |
101 | --max_width; |
102 | cur_char = reader->getc(); |
103 | } else { |
104 | // If the max width has been hit already, then the return value must be 0 |
105 | // since no actual digits of the number have been parsed yet. |
106 | write_int_with_length(0, to_conv); |
107 | return MATCHING_FAILURE; |
108 | } |
109 | } |
110 | const bool is_negative = result_sign == '-'; |
111 | |
112 | // Base of 0 means automatically determine the base. Base of 16 may have a |
113 | // prefix of "0x" |
114 | if (base == 0 || base == 16) { |
115 | // If the first character is 0, then it could be octal or hex. |
116 | if (cur_char == '0') { |
117 | is_number = true; |
118 | |
119 | // Read the next character to check. |
120 | if (max_width > 1) { |
121 | --max_width; |
122 | cur_char = reader->getc(); |
123 | } else { |
124 | write_int_with_length(0, to_conv); |
125 | return READ_OK; |
126 | } |
127 | |
128 | if (internal::tolower(cur_char) == 'x') { |
129 | // This is a valid hex prefix. |
130 | |
131 | is_number = false; |
132 | // A valid hex prefix is not necessarily a valid number. For the |
133 | // conversion to be valid it needs to use all of the characters it |
134 | // consumes. From the standard: |
135 | // 7.23.6.2 paragraph 9: "An input item is defined as the longest |
136 | // sequence of input characters which does not exceed any specified |
137 | // field width and which is, or is a prefix of, a matching input |
138 | // sequence." |
139 | // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence, |
140 | // the execution of the directive fails: this condition is a matching |
141 | // failure" |
142 | base = 16; |
143 | if (max_width > 1) { |
144 | --max_width; |
145 | cur_char = reader->getc(); |
146 | } else { |
147 | return MATCHING_FAILURE; |
148 | } |
149 | |
150 | } else { |
151 | if (base == 0) { |
152 | base = 8; |
153 | } |
154 | } |
155 | } else if (base == 0) { |
156 | if (internal::isdigit(cur_char)) { |
157 | // If the first character is a different number, then it's 10. |
158 | base = 10; |
159 | } else { |
160 | // If the first character isn't a valid digit, then there are no valid |
161 | // digits at all. The number is 0. |
162 | reader->ungetc(cur_char); |
163 | write_int_with_length(0, to_conv); |
164 | return MATCHING_FAILURE; |
165 | } |
166 | } |
167 | } |
168 | |
169 | constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max(); |
170 | constexpr uintmax_t SIGNED_MAX = |
171 | static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()); |
172 | constexpr uintmax_t NEGATIVE_SIGNED_MAX = |
173 | static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1; |
174 | |
175 | const uintmax_t MAX = |
176 | (is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX) |
177 | : UNSIGNED_MAX); |
178 | |
179 | const uintmax_t max_div_by_base = MAX / base; |
180 | |
181 | if (internal::isalnum(cur_char) && |
182 | internal::b36_char_to_int(cur_char) < base) { |
183 | is_number = true; |
184 | } |
185 | |
186 | bool has_overflow = false; |
187 | size_t i = 0; |
188 | for (; i < max_width && internal::isalnum(cur_char) && |
189 | internal::b36_char_to_int(cur_char) < base; |
190 | ++i, cur_char = reader->getc()) { |
191 | |
192 | uintmax_t cur_digit = internal::b36_char_to_int(cur_char); |
193 | |
194 | if (result == MAX) { |
195 | has_overflow = true; |
196 | continue; |
197 | } else if (result > max_div_by_base) { |
198 | result = MAX; |
199 | has_overflow = true; |
200 | } else { |
201 | result = result * base; |
202 | } |
203 | |
204 | if (result > MAX - cur_digit) { |
205 | result = MAX; |
206 | has_overflow = true; |
207 | } else { |
208 | result = result + cur_digit; |
209 | } |
210 | } |
211 | |
212 | // We always read one more character than will be used, so we have to put the |
213 | // last one back. |
214 | reader->ungetc(cur_char); |
215 | |
216 | if (!is_number) |
217 | return MATCHING_FAILURE; |
218 | |
219 | if (has_overflow) { |
220 | write_int_with_length(MAX, to_conv); |
221 | } else { |
222 | if (is_negative) |
223 | result = -result; |
224 | |
225 | write_int_with_length(result, to_conv); |
226 | } |
227 | |
228 | return READ_OK; |
229 | } |
230 | |
231 | } // namespace scanf_core |
232 | } // namespace LIBC_NAMESPACE_DECL |
233 | |
234 | #endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H |
235 |
Warning: This file is not a C or C++ file. It does not have highlighting.