int_converter.cpp source code [libc/src/stdio/scanf_core/int_converter.cpp]

1	//===-- Int type specifier converters for scanf ------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/stdio/scanf_core/int_converter.h"
10
11	#include "src/__support/CPP/limits.h"
12	#include "src/__support/ctype_utils.h"
13	#include "src/stdio/scanf_core/converter_utils.h"
14	#include "src/stdio/scanf_core/core_structs.h"
15	#include "src/stdio/scanf_core/reader.h"
16
17	#include <stddef.h>
18
19	namespace LIBC_NAMESPACE {
20	namespace scanf_core {
21
22	// This code is very similar to the code in __support/str_to_integer.h but is
23	// not quite the same. Here is the list of differences and why they exist:
24	// 1) This takes a reader and a format section instead of a char and the base.*
25	// This should be fairly self explanatory. While the char could be adapted*
26	// to a reader and the base could be calculated ahead of time, the
27	// semantics are slightly different, specifically a char can be indexed*
28	// freely (I can read str[2] and then str[0]) whereas a File (which the
29	// reader may contain) cannot.
30	// 2) Because this uses a Reader, this function can only unget once.
31	// This is relevant because scanf specifies it reads the "longest sequence
32	// of input characters which does not exceed any specified field width and
33	// which is, or is a prefix of, a matching input sequence." Whereas the
34	// strtol function accepts "the longest initial subsequence of the input
35	// string (...) that is of the expected form." This is demonstrated by the
36	// differences in how they deal with the string "0xZZZ" when parsing as
37	// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
38	// since it reads the first 'Z', sees that it's not a valid hex digit, and
39	// reverses one character. The strtol function on the other hand only
40	// accepts the "0" since that's the longest valid hexadecimal sequence. It
41	// sees the 'Z' after the "0x" and determines that this is not the prefix
42	// to a valid hex string.
43	// 3) This conversion may have a maximum width.
44	// If a maximum width is specified, this conversion is only allowed to
45	// accept a certain number of characters. Strtol doesn't have any such
46	// limitation.
47	int convert_int(Reader reader, const* FormatSection &to_conv) {
48	// %d "Matches an optionally signed decimal integer [...] with the value 10
49	// for the base argument. The corresponding argument shall be a pointer to
50	// signed integer."
51
52	// %i "Matches an optionally signed integer [...] with the value 0 for the
53	// base argument. The corresponding argument shall be a pointer to signed
54	// integer."
55
56	// %u "Matches an optionally signed decimal integer [...] with the value 10
57	// for the base argument. The corresponding argument shall be a pointer to
58	// unsigned integer"
59
60	// %o "Matches an optionally signed octal integer [...] with the value 8 for
61	// the base argument. The corresponding argument shall be a pointer to
62	// unsigned integer"
63
64	// %x/X "Matches an optionally signed hexadecimal integer [...] with the value
65	// 16 for the base argument. The corresponding argument shall be a pointer to
66	// unsigned integer"
67
68	size_t max_width = cpp::numeric_limits<size_t>::max();
69	if (to_conv.max_width > `0`) {
70	max_width = to_conv.max_width;
71	}
72
73	uintmax_t result = `0`;
74	bool is_number = false;
75	bool is_signed = false;
76	int base = `0`;
77	if (to_conv.conv_name == `'i'`) {
78	base = `0`;
79	is_signed = true;
80	} else if (to_conv.conv_name == `'o'`) {
81	base = `8`;
82	} else if (to_lower(a: to_conv.conv_name) == `'x'` \|\| to_conv.conv_name == `'p'`) {
83	base = `16`;
84	} else if (to_conv.conv_name == `'d'`) {
85	base = `10`;
86	is_signed = true;
87	} else { // conv_name must be 'u'
88	base = `10`;
89	}
90
91	char cur_char = reader->getc();
92
93	char result_sign = `'+'`;
94	if (cur_char == `'+'` \|\| cur_char == `'-'`) {
95	result_sign = cur_char;
96	if (max_width > `1`) {
97	--max_width;
98	cur_char = reader->getc();
99	} else {
100	// If the max width has been hit already, then the return value must be 0
101	// since no actual digits of the number have been parsed yet.
102	write_int_with_length(output_val: `0`, to_conv);
103	return MATCHING_FAILURE;
104	}
105	}
106	const bool is_negative = result_sign == `'-'`;
107
108	// Base of 0 means automatically determine the base. Base of 16 may have a
109	// prefix of "0x"
110	if (base == `0` \|\| base == `16`) {
111	// If the first character is 0, then it could be octal or hex.
112	if (cur_char == `'0'`) {
113	is_number = true;
114
115	// Read the next character to check.
116	if (max_width > `1`) {
117	--max_width;
118	cur_char = reader->getc();
119	} else {
120	write_int_with_length(output_val: `0`, to_conv);
121	return READ_OK;
122	}
123
124	if (to_lower(a: cur_char) == `'x'`) {
125	// This is a valid hex prefix.
126	base = `16`;
127	if (max_width > `1`) {
128	--max_width;
129	cur_char = reader->getc();
130	} else {
131	write_int_with_length(output_val: `0`, to_conv);
132	return READ_OK;
133	}
134
135	} else {
136	if (base == `0`) {
137	base = `8`;
138	}
139	}
140	} else if (base == `0`) {
141	if (internal::isdigit(ch: cur_char)) {
142	// If the first character is a different number, then it's 10.
143	base = `10`;
144	} else {
145	// If the first character isn't a valid digit, then there are no valid
146	// digits at all. The number is 0.
147	reader->ungetc(c: cur_char);
148	write_int_with_length(output_val: `0`, to_conv);
149	return MATCHING_FAILURE;
150	}
151	}
152	}
153
154	constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
155	constexpr uintmax_t SIGNED_MAX =
156	static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
157	constexpr uintmax_t NEGATIVE_SIGNED_MAX =
158	static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + `1`;
159
160	const uintmax_t MAX =
161	(is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
162	: UNSIGNED_MAX);
163
164	const uintmax_t max_div_by_base = MAX / base;
165
166	if (internal::isalnum(ch: cur_char) && b36_char_to_int(input: cur_char) < base) {
167	is_number = true;
168	}
169
170	bool has_overflow = false;
171	size_t i = `0`;
172	for (; i < max_width && internal::isalnum(ch: cur_char) &&
173	b36_char_to_int(input: cur_char) < base;
174	++i, cur_char = reader->getc()) {
175
176	uintmax_t cur_digit = b36_char_to_int(input: cur_char);
177
178	if (result == MAX) {
179	has_overflow = true;
180	continue;
181	} else if (result > max_div_by_base) {
182	result = MAX;
183	has_overflow = true;
184	} else {
185	result = result * base;
186	}
187
188	if (result > MAX - cur_digit) {
189	result = MAX;
190	has_overflow = true;
191	} else {
192	result = result + cur_digit;
193	}
194	}
195
196	// We always read one more character than will be used, so we have to put the
197	// last one back.
198	reader->ungetc(c: cur_char);
199
200	if (has_overflow) {
201	write_int_with_length(output_val: MAX, to_conv);
202	} else {
203	if (is_negative)
204	result = -result;
205
206	write_int_with_length(output_val: result, to_conv);
207	}
208
209	if (!is_number)
210	return MATCHING_FAILURE;
211	return READ_OK;
212	}
213
214	} // namespace scanf_core
215	} // namespace LIBC_NAMESPACE
216

Provided by KDAB

Definitions

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of libc/src/stdio/scanf_core/int_converter.cpp