int_converter.h source code [libc/src/stdio/scanf_core/int_converter.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	//===-- Int type specifier converter for scanf ------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
10	#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
11
12	#include "src/__support/CPP/limits.h"
13	#include "src/__support/ctype_utils.h"
14	#include "src/__support/macros/config.h"
15	#include "src/stdio/scanf_core/converter_utils.h"
16	#include "src/stdio/scanf_core/core_structs.h"
17	#include "src/stdio/scanf_core/reader.h"
18
19	#include <stddef.h>
20
21	namespace LIBC_NAMESPACE_DECL {
22	namespace scanf_core {
23
24	// This code is very similar to the code in __support/str_to_integer.h but is
25	// not quite the same. Here is the list of differences and why they exist:
26	// 1) This takes a reader and a format section instead of a char* and the base.
27	// This should be fairly self explanatory. While the char* could be adapted
28	// to a reader and the base could be calculated ahead of time, the
29	// semantics are slightly different, specifically a char* can be indexed
30	// freely (I can read str[2] and then str[0]) whereas a File (which the
31	// reader may contain) cannot.
32	// 2) Because this uses a Reader, this function can only unget once.
33	// This is relevant because scanf specifies it reads the "longest sequence
34	// of input characters which does not exceed any specified field width and
35	// which is, or is a prefix of, a matching input sequence." Whereas the
36	// strtol function accepts "the longest initial subsequence of the input
37	// string (...) that is of the expected form." This is demonstrated by the
38	// differences in how they deal with the string "0xZZZ" when parsing as
39	// hexadecimal. Scanf will read the "0x" as a valid prefix and return 0,
40	// since it reads the first 'Z', sees that it's not a valid hex digit, and
41	// reverses one character. The strtol function on the other hand only
42	// accepts the "0" since that's the longest valid hexadecimal sequence. It
43	// sees the 'Z' after the "0x" and determines that this is not the prefix
44	// to a valid hex string.
45	// 3) This conversion may have a maximum width.
46	// If a maximum width is specified, this conversion is only allowed to
47	// accept a certain number of characters. Strtol doesn't have any such
48	// limitation.
49	template <typename T>
50	int convert_int(Reader<T> *reader, const FormatSection &to_conv) {
51	// %d "Matches an optionally signed decimal integer [...] with the value 10
52	// for the base argument. The corresponding argument shall be a pointer to
53	// signed integer."
54
55	// %i "Matches an optionally signed integer [...] with the value 0 for the
56	// base argument. The corresponding argument shall be a pointer to signed
57	// integer."
58
59	// %u "Matches an optionally signed decimal integer [...] with the value 10
60	// for the base argument. The corresponding argument shall be a pointer to
61	// unsigned integer"
62
63	// %o "Matches an optionally signed octal integer [...] with the value 8 for
64	// the base argument. The corresponding argument shall be a pointer to
65	// unsigned integer"
66
67	// %x/X "Matches an optionally signed hexadecimal integer [...] with the value
68	// 16 for the base argument. The corresponding argument shall be a pointer to
69	// unsigned integer"
70
71	size_t max_width = cpp::numeric_limits<size_t>::max();
72	if (to_conv.max_width > 0) {
73	max_width = to_conv.max_width;
74	}
75
76	uintmax_t result = 0;
77	bool is_number = false;
78	bool is_signed = false;
79	int base = 0;
80	if (to_conv.conv_name == 'i') {
81	base = 0;
82	is_signed = true;
83	} else if (to_conv.conv_name == 'o') {
84	base = 8;
85	} else if (internal::tolower(to_conv.conv_name) == 'x' \|\|
86	to_conv.conv_name == 'p') {
87	base = 16;
88	} else if (to_conv.conv_name == 'd') {
89	base = 10;
90	is_signed = true;
91	} else { // conv_name must be 'u'
92	base = 10;
93	}
94
95	char cur_char = reader->getc();
96
97	char result_sign = '+';
98	if (cur_char == '+' \|\| cur_char == '-') {
99	result_sign = cur_char;
100	if (max_width > 1) {
101	--max_width;
102	cur_char = reader->getc();
103	} else {
104	// If the max width has been hit already, then the return value must be 0
105	// since no actual digits of the number have been parsed yet.
106	write_int_with_length(0, to_conv);
107	return MATCHING_FAILURE;
108	}
109	}
110	const bool is_negative = result_sign == '-';
111
112	// Base of 0 means automatically determine the base. Base of 16 may have a
113	// prefix of "0x"
114	if (base == 0 \|\| base == 16) {
115	// If the first character is 0, then it could be octal or hex.
116	if (cur_char == '0') {
117	is_number = true;
118
119	// Read the next character to check.
120	if (max_width > 1) {
121	--max_width;
122	cur_char = reader->getc();
123	} else {
124	write_int_with_length(0, to_conv);
125	return READ_OK;
126	}
127
128	if (internal::tolower(cur_char) == 'x') {
129	// This is a valid hex prefix.
130
131	is_number = false;
132	// A valid hex prefix is not necessarily a valid number. For the
133	// conversion to be valid it needs to use all of the characters it
134	// consumes. From the standard:
135	// 7.23.6.2 paragraph 9: "An input item is defined as the longest
136	// sequence of input characters which does not exceed any specified
137	// field width and which is, or is a prefix of, a matching input
138	// sequence."
139	// 7.23.6.2 paragraph 10: "If the input item is not a matching sequence,
140	// the execution of the directive fails: this condition is a matching
141	// failure"
142	base = 16;
143	if (max_width > 1) {
144	--max_width;
145	cur_char = reader->getc();
146	} else {
147	return MATCHING_FAILURE;
148	}
149
150	} else {
151	if (base == 0) {
152	base = 8;
153	}
154	}
155	} else if (base == 0) {
156	if (internal::isdigit(cur_char)) {
157	// If the first character is a different number, then it's 10.
158	base = 10;
159	} else {
160	// If the first character isn't a valid digit, then there are no valid
161	// digits at all. The number is 0.
162	reader->ungetc(cur_char);
163	write_int_with_length(0, to_conv);
164	return MATCHING_FAILURE;
165	}
166	}
167	}
168
169	constexpr uintmax_t UNSIGNED_MAX = cpp::numeric_limits<uintmax_t>::max();
170	constexpr uintmax_t SIGNED_MAX =
171	static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max());
172	constexpr uintmax_t NEGATIVE_SIGNED_MAX =
173	static_cast<uintmax_t>(cpp::numeric_limits<intmax_t>::max()) + 1;
174
175	const uintmax_t MAX =
176	(is_signed ? (is_negative ? NEGATIVE_SIGNED_MAX : SIGNED_MAX)
177	: UNSIGNED_MAX);
178
179	const uintmax_t max_div_by_base = MAX / base;
180
181	if (internal::isalnum(cur_char) &&
182	internal::b36_char_to_int(cur_char) < base) {
183	is_number = true;
184	}
185
186	bool has_overflow = false;
187	size_t i = 0;
188	for (; i < max_width && internal::isalnum(cur_char) &&
189	internal::b36_char_to_int(cur_char) < base;
190	++i, cur_char = reader->getc()) {
191
192	uintmax_t cur_digit = internal::b36_char_to_int(cur_char);
193
194	if (result == MAX) {
195	has_overflow = true;
196	continue;
197	} else if (result > max_div_by_base) {
198	result = MAX;
199	has_overflow = true;
200	} else {
201	result = result * base;
202	}
203
204	if (result > MAX - cur_digit) {
205	result = MAX;
206	has_overflow = true;
207	} else {
208	result = result + cur_digit;
209	}
210	}
211
212	// We always read one more character than will be used, so we have to put the
213	// last one back.
214	reader->ungetc(cur_char);
215
216	if (!is_number)
217	return MATCHING_FAILURE;
218
219	if (has_overflow) {
220	write_int_with_length(MAX, to_conv);
221	} else {
222	if (is_negative)
223	result = -result;
224
225	write_int_with_length(result, to_conv);
226	}
227
228	return READ_OK;
229	}
230
231	} // namespace scanf_core
232	} // namespace LIBC_NAMESPACE_DECL
233
234	#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_INT_CONVERTER_H
235

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

source code of libc/src/stdio/scanf_core/int_converter.h