1//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/stdio/scanf_core/int_converter.h"
10
11#include "src/__support/CPP/limits.h"
12#include "src/__support/ctype_utils.h"
13#include "src/stdio/scanf_core/converter_utils.h"
14#include "src/stdio/scanf_core/core_structs.h"
15#include "src/stdio/scanf_core/reader.h"
16
17#include <stddef.h>
18
19namespace LIBC_NAMESPACE {
20namespace scanf_core {
21
22// This code is very similar to the code in __support/str_to_integer.h but is
23// not quite the same. Here is the list of differences and why they exist:
24// 1) This takes a reader and a format section instead of a char* and the base.
25// This should be fairly self explanatory. While the char* could be adapted
26// to a reader and the base could be calculated ahead of time, the
27// semantics are slightly different, specifically a char* can be indexed
28// freely (I can read str[2] and then str[0]) whereas a File (which the
29// reader may contain) cannot.
30// 2) Because this uses a Reader, this function can only unget once.
31// This is relevant because scanf specifies it reads the "longest sequence
32// of input characters which does not exceed any specified field width and
33// which is, or is a prefix of, a matching input sequence." Whereas the
34// strtol function accepts "the longest initial subsequence of the input
35// string (...) that is of the expected form." This is demonstrated by the
36// differences in how they deal with the string "0xZZZ" when parsing as
37// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
38// since it reads the first 'Z', sees that it's not a valid hex digit, and
39// reverses one character. The strtol function on the other hand only
40// accepts the "0" since that's the longest valid hexadecimal sequence. It
41// sees the 'Z' after the "0x" and determines that this is not the prefix
42// to a valid hex string.
43// 3) This conversion may have a maximum width.
44// If a maximum width is specified, this conversion is only allowed to
45// accept a certain number of characters. Strtol doesn't have any such
46// limitation.
47int convert_int(Reader *reader, const FormatSection &to_conv) {
48 // %d "Matches an optionally signed decimal integer [...] with the value 10
49 // for the base argument. The corresponding argument shall be a pointer to
50 // signed integer."
51
52 // %i "Matches an optionally signed integer [...] with the value 0 for the
53 // base argument. The corresponding argument shall be a pointer to signed
54 // integer."
55
56 // %u "Matches an optionally signed decimal integer [...] with the value 10
57 // for the base argument. The corresponding argument shall be a pointer to
58 // unsigned integer"
59
60 // %o "Matches an optionally signed octal integer [...] with the value 8 for
61 // the base argument. The corresponding argument shall be a pointer to
62 // unsigned integer"
63
64 // %x/X "Matches an optionally signed hexadecimal integer [...] with the value
65 // 16 for the base argument. The corresponding argument shall be a pointer to
66 // unsigned integer"
67
68 size_t max_width = cpp::numeric_limits<size_t>::max();
69 if (to_conv.max_width > 0) {
70 max_width = to_conv.max_width;
71 }
72
73 uintmax_t result = 0;
74 bool is_number = false;
75 bool is_signed = false;
76 int base = 0;
77 if (to_conv.conv_name == 'i') {
78 base = 0;
79 is_signed = true;
80 } else if (to_conv.conv_name == 'o') {
81 base = 8;
82 } else if (to_lower(a: to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') {
83 base = 16;
84 } else if (to_conv.conv_name == 'd') {
85 base = 10;
86 is_signed = true;
87 } else { // conv_name must be 'u'
88 base = 10;
89 }
90
91 char cur_char = reader->getc();
92
93 char result_sign = '+';
94 if (cur_char == '+' || cur_char == '-') {
95 result_sign = cur_char;
96 if (max_width > 1) {
97 --max_width;
98 cur_char = reader->getc();
99 } else {
100 // If the max width has been hit already, then the return value must be 0
101 // since no actual digits of the number have been parsed yet.
102 write_int_with_length(output_val: 0, to_conv);
103 return MATCHING_FAILURE;
104 }
105 }
106 const bool is_negative = result_sign == '-';
107
108 // Base of 0 means automatically determine the base. Base of 16 may have a
109 // prefix of "0x"
110 if (base == 0 || base == 16) {
111 // If the first character is 0, then it could be octal or hex.
112 if (cur_char == '0') {
113 is_number = true;
114
115 // Read the next character to check.
116 if (max_width > 1) {
117 --max_width;
118 cur_char = reader->getc();
119 } else {
120 write_int_with_length(output_val: 0, to_conv);
121 return READ_OK;
122 }
123
124 if (to_lower(a: cur_char) == 'x') {
125 // This is a valid hex prefix.
126 base = 16;
127 if (max_width > 1) {
128 --max_width;
129 cur_char = reader->getc();
130 } else {
131 write_int_with_length(output_val: 0, to_conv);
132 return READ_OK;
133 }
134
135 } else {
136 if (base == 0) {
137 base = 8;
138 }
139 }
140 } else if (base == 0) {
141 if (internal::isdigit(ch: cur_char)) {
142 // If the first character is a different number, then it's 10.
143 base = 10;
144 } else {
145 // If the first character isn't a valid digit, then there are no valid
146 // digits at all. The number is 0.
147 reader->ungetc(c: cur_char);
148 write_int_with_length(output_val: 0, to_conv);
149 return MATCHING_FAILURE;
150 }
151 }
152 }
153
154 constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
155 constexpr uintmax_t SIGNED_MAX =
156 static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
157 constexpr uintmax_t NEGATIVE_SIGNED_MAX =
158 static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
159
160 const uintmax_t MAX =
161 (is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
162 : UNSIGNED_MAX);
163
164 const uintmax_t max_div_by_base = MAX / base;
165
166 if (internal::isalnum(ch: cur_char) && b36_char_to_int(input: cur_char) < base) {
167 is_number = true;
168 }
169
170 bool has_overflow = false;
171 size_t i = 0;
172 for (; i < max_width && internal::isalnum(ch: cur_char) &&
173 b36_char_to_int(input: cur_char) < base;
174 ++i, cur_char = reader->getc()) {
175
176 uintmax_t cur_digit = b36_char_to_int(input: cur_char);
177
178 if (result == MAX) {
179 has_overflow = true;
180 continue;
181 } else if (result > max_div_by_base) {
182 result = MAX;
183 has_overflow = true;
184 } else {
185 result = result * base;
186 }
187
188 if (result > MAX - cur_digit) {
189 result = MAX;
190 has_overflow = true;
191 } else {
192 result = result + cur_digit;
193 }
194 }
195
196 // We always read one more character than will be used, so we have to put the
197 // last one back.
198 reader->ungetc(c: cur_char);
199
200 if (has_overflow) {
201 write_int_with_length(output_val: MAX, to_conv);
202 } else {
203 if (is_negative)
204 result = -result;
205
206 write_int_with_length(output_val: result, to_conv);
207 }
208
209 if (!is_number)
210 return MATCHING_FAILURE;
211 return READ_OK;
212}
213
214} // namespace scanf_core
215} // namespace LIBC_NAMESPACE
216

source code of libc/src/stdio/scanf_core/int_converter.cpp