Warning: This file is not a C or C++ file. It does not have highlighting.

1//===-- Int type specifier converter for scanf ------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
10#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
11
12#include "src/__support/CPP/limits.h"
13#include "src/__support/ctype_utils.h"
14#include "src/__support/macros/config.h"
15#include "src/stdio/scanf_core/converter_utils.h"
16#include "src/stdio/scanf_core/core_structs.h"
17#include "src/stdio/scanf_core/reader.h"
18
19#include <stddef.h>
20
21namespace LIBC_NAMESPACE_DECL {
22namespace scanf_core {
23
24// This code is very similar to the code in __support/str_to_integer.h but is
25// not quite the same. Here is the list of differences and why they exist:
26// 1) This takes a reader and a format section instead of a char* and the base.
27// This should be fairly self explanatory. While the char* could be adapted
28// to a reader and the base could be calculated ahead of time, the
29// semantics are slightly different, specifically a char* can be indexed
30// freely (I can read str[2] and then str[0]) whereas a File (which the
31// reader may contain) cannot.
32// 2) Because this uses a Reader, this function can only unget once.
33// This is relevant because scanf specifies it reads the "longest sequence
34// of input characters which does not exceed any specified field width and
35// which is, or is a prefix of, a matching input sequence." Whereas the
36// strtol function accepts "the longest initial subsequence of the input
37// string (...) that is of the expected form." This is demonstrated by the
38// differences in how they deal with the string "0xZZZ" when parsing as
39// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
40// since it reads the first 'Z', sees that it's not a valid hex digit, and
41// reverses one character. The strtol function on the other hand only
42// accepts the "0" since that's the longest valid hexadecimal sequence. It
43// sees the 'Z' after the "0x" and determines that this is not the prefix
44// to a valid hex string.
45// 3) This conversion may have a maximum width.
46// If a maximum width is specified, this conversion is only allowed to
47// accept a certain number of characters. Strtol doesn't have any such
48// limitation.
49template <typename T>
50int convert_int(Reader<T> *reader, const FormatSection &to_conv) {
51 // %d "Matches an optionally signed decimal integer [...] with the value 10
52 // for the base argument. The corresponding argument shall be a pointer to
53 // signed integer."
54
55 // %i "Matches an optionally signed integer [...] with the value 0 for the
56 // base argument. The corresponding argument shall be a pointer to signed
57 // integer."
58
59 // %u "Matches an optionally signed decimal integer [...] with the value 10
60 // for the base argument. The corresponding argument shall be a pointer to
61 // unsigned integer"
62
63 // %o "Matches an optionally signed octal integer [...] with the value 8 for
64 // the base argument. The corresponding argument shall be a pointer to
65 // unsigned integer"
66
67 // %x/X "Matches an optionally signed hexadecimal integer [...] with the value
68 // 16 for the base argument. The corresponding argument shall be a pointer to
69 // unsigned integer"
70
71 size_t max_width = cpp::numeric_limits<size_t>::max();
72 if (to_conv.max_width > 0) {
73 max_width = to_conv.max_width;
74 }
75
76 uintmax_t result = 0;
77 bool is_number = false;
78 bool is_signed = false;
79 int base = 0;
80 if (to_conv.conv_name == 'i') {
81 base = 0;
82 is_signed = true;
83 } else if (to_conv.conv_name == 'o') {
84 base = 8;
85 } else if (internal::tolower(to_conv.conv_name) == 'x' ||
86 to_conv.conv_name == 'p') {
87 base = 16;
88 } else if (to_conv.conv_name == 'd') {
89 base = 10;
90 is_signed = true;
91 } else { // conv_name must be 'u'
92 base = 10;
93 }
94
95 char cur_char = reader->getc();
96
97 char result_sign = '+';
98 if (cur_char == '+' || cur_char == '-') {
99 result_sign = cur_char;
100 if (max_width > 1) {
101 --max_width;
102 cur_char = reader->getc();
103 } else {
104 // If the max width has been hit already, then the return value must be 0
105 // since no actual digits of the number have been parsed yet.
106 write_int_with_length(0, to_conv);
107 return MATCHING_FAILURE;
108 }
109 }
110 const bool is_negative = result_sign == '-';
111
112 // Base of 0 means automatically determine the base. Base of 16 may have a
113 // prefix of "0x"
114 if (base == 0 || base == 16) {
115 // If the first character is 0, then it could be octal or hex.
116 if (cur_char == '0') {
117 is_number = true;
118
119 // Read the next character to check.
120 if (max_width > 1) {
121 --max_width;
122 cur_char = reader->getc();
123 } else {
124 write_int_with_length(0, to_conv);
125 return READ_OK;
126 }
127
128 if (internal::tolower(cur_char) == 'x') {
129 // This is a valid hex prefix.
130
131 is_number = false;
132 // A valid hex prefix is not necessarily a valid number. For the
133 // conversion to be valid it needs to use all of the characters it
134 // consumes. From the standard:
135 // 7.23.6.2 paragraph 9: "An input item is defined as the longest
136 // sequence of input characters which does not exceed any specified
137 // field width and which is, or is a prefix of, a matching input
138 // sequence."
139 // 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
140 // the execution of the directive fails: this condition is a matching
141 // failure"
142 base = 16;
143 if (max_width > 1) {
144 --max_width;
145 cur_char = reader->getc();
146 } else {
147 return MATCHING_FAILURE;
148 }
149
150 } else {
151 if (base == 0) {
152 base = 8;
153 }
154 }
155 } else if (base == 0) {
156 if (internal::isdigit(cur_char)) {
157 // If the first character is a different number, then it's 10.
158 base = 10;
159 } else {
160 // If the first character isn't a valid digit, then there are no valid
161 // digits at all. The number is 0.
162 reader->ungetc(cur_char);
163 write_int_with_length(0, to_conv);
164 return MATCHING_FAILURE;
165 }
166 }
167 }
168
169 constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
170 constexpr uintmax_t SIGNED_MAX =
171 static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
172 constexpr uintmax_t NEGATIVE_SIGNED_MAX =
173 static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
174
175 const uintmax_t MAX =
176 (is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
177 : UNSIGNED_MAX);
178
179 const uintmax_t max_div_by_base = MAX / base;
180
181 if (internal::isalnum(cur_char) &&
182 internal::b36_char_to_int(cur_char) < base) {
183 is_number = true;
184 }
185
186 bool has_overflow = false;
187 size_t i = 0;
188 for (; i < max_width && internal::isalnum(cur_char) &&
189 internal::b36_char_to_int(cur_char) < base;
190 ++i, cur_char = reader->getc()) {
191
192 uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
193
194 if (result == MAX) {
195 has_overflow = true;
196 continue;
197 } else if (result > max_div_by_base) {
198 result = MAX;
199 has_overflow = true;
200 } else {
201 result = result * base;
202 }
203
204 if (result > MAX - cur_digit) {
205 result = MAX;
206 has_overflow = true;
207 } else {
208 result = result + cur_digit;
209 }
210 }
211
212 // We always read one more character than will be used, so we have to put the
213 // last one back.
214 reader->ungetc(cur_char);
215
216 if (!is_number)
217 return MATCHING_FAILURE;
218
219 if (has_overflow) {
220 write_int_with_length(MAX, to_conv);
221 } else {
222 if (is_negative)
223 result = -result;
224
225 write_int_with_length(result, to_conv);
226 }
227
228 return READ_OK;
229}
230
231} // namespace scanf_core
232} // namespace LIBC_NAMESPACE_DECL
233
234#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
235

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of libc/src/stdio/scanf_core/int_converter.h