float_converter.cpp source code [libc/src/stdio/scanf_core/float_converter.cpp]

1	//===-- Int type specifier converters for scanf ------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "src/stdio/scanf_core/float_converter.h"
10
11	#include "src/__support/CPP/limits.h"
12	#include "src/__support/char_vector.h"
13	#include "src/__support/ctype_utils.h"
14	#include "src/stdio/scanf_core/converter_utils.h"
15	#include "src/stdio/scanf_core/core_structs.h"
16	#include "src/stdio/scanf_core/reader.h"
17
18	#include <stddef.h>
19
20	namespace LIBC_NAMESPACE {
21	namespace scanf_core {
22
23	// All of the floating point conversions are the same for scanf, every name will
24	// accept every style.
25	int convert_float(Reader reader, const* FormatSection &to_conv) {
26	// %a/A/e/E/f/F/g/G "Matches an optionally signed floating-point number,
27	// infinity, or NaN, whose format is the same as expected for the subject
28	// sequence of the strtod function. The corresponding argument shall be a
29	// pointer to floating."
30
31	CharVector out_str = CharVector ();
32	bool is_number = false;
33
34	size_t max_width = cpp::numeric_limits<size_t>::max();
35	if (to_conv.max_width > `0`) {
36	max_width = to_conv.max_width;
37	}
38
39	char cur_char = reader->getc();
40	// Handle the sign.
41	if (cur_char == `'+'` \|\| cur_char == `'-'`) {
42	if (!out_str.append(new_char: cur_char)) {
43	return ALLOCATION_FAILURE;
44	}
45	if (out_str.length() == max_width) {
46	return MATCHING_FAILURE;
47	} else {
48	cur_char = reader->getc();
49	}
50	}
51
52	static constexpr char DECIMAL_POINT = `'.'`;
53	static const char inf_string[] = "infinity";
54
55	// Handle inf
56
57	if (to_lower(a: cur_char) == inf_string[`0`]) {
58	size_t inf_index = `0`;
59
60	for (; inf_index < sizeof(inf_string) && out_str.length() < max_width &&
61	to_lower(a: cur_char) == inf_string[inf_index];
62	++inf_index) {
63	if (!out_str.append(new_char: cur_char)) {
64	return ALLOCATION_FAILURE;
65	}
66	cur_char = reader->getc();
67	}
68
69	if (inf_index == `3` \|\| inf_index == sizeof(inf_string) - `1`) {
70	write_float_with_length(str: out_str.c_str(), to_conv);
71	return READ_OK;
72	} else {
73	return MATCHING_FAILURE;
74	}
75	}
76
77	static const char nan_string[] = "nan";
78
79	// Handle nan
80	if (to_lower(a: cur_char) == nan_string[`0`]) {
81	size_t nan_index = `0`;
82
83	for (; nan_index < sizeof(nan_string) && out_str.length() < max_width &&
84	to_lower(a: cur_char) == nan_string[nan_index];
85	++nan_index) {
86	if (!out_str.append(new_char: cur_char)) {
87	return ALLOCATION_FAILURE;
88	}
89	cur_char = reader->getc();
90	}
91
92	if (nan_index == sizeof(nan_string) - `1`) {
93	write_float_with_length(str: out_str.c_str(), to_conv);
94	return READ_OK;
95	} else {
96	return MATCHING_FAILURE;
97	}
98	}
99
100	// Assume base of 10 by default but check if it is actually base 16.
101	int base = `10`;
102
103	// If the string starts with 0 it might be in hex.
104	if (cur_char == `'0'`) {
105	is_number = true;
106	// Read the next character to check.
107	if (!out_str.append(new_char: cur_char)) {
108	return ALLOCATION_FAILURE;
109	}
110	// If we've hit the end, then this is "0", which is valid.
111	if (out_str.length() == max_width) {
112	write_float_with_length(str: out_str.c_str(), to_conv);
113	return READ_OK;
114	} else {
115	cur_char = reader->getc();
116	}
117
118	// If that next character is an 'x' then this is a hexadecimal number.
119	if (to_lower(a: cur_char) == `'x'`) {
120	base = `16`;
121
122	if (!out_str.append(new_char: cur_char)) {
123	return ALLOCATION_FAILURE;
124	}
125	// If we've hit the end here, we have "0x" which is a valid prefix to a
126	// floating point number, and will be evaluated to 0.
127	if (out_str.length() == max_width) {
128	write_float_with_length(str: out_str.c_str(), to_conv);
129	return READ_OK;
130	} else {
131	cur_char = reader->getc();
132	}
133	}
134	}
135
136	const char exponent_mark = ((base == `10`) ? `'e'` : `'p'`);
137	bool after_decimal = false;
138
139	// The format for the remaining characters at this point is DD.DDe+/-DD for
140	// base 10 and XX.XXp+/-DD for base 16
141
142	// This handles the digits before and after the decimal point, but not the
143	// exponent.
144	while (out_str.length() < max_width) {
145	if (internal::isalnum(ch: cur_char) &&
146	internal::b36_char_to_int(input: cur_char) < base) {
147	is_number = true;
148	if (!out_str.append(new_char: cur_char)) {
149	return ALLOCATION_FAILURE;
150	}
151	cur_char = reader->getc();
152	} else if (cur_char == DECIMAL_POINT && !after_decimal) {
153	after_decimal = true;
154	if (!out_str.append(new_char: cur_char)) {
155	return ALLOCATION_FAILURE;
156	}
157	cur_char = reader->getc();
158	} else {
159	break;
160	}
161	}
162
163	// Handle the exponent, which has an exponent mark, an optional sign, and
164	// decimal digits.
165	if (to_lower(a: cur_char) == exponent_mark) {
166	if (!out_str.append(new_char: cur_char)) {
167	return ALLOCATION_FAILURE;
168	}
169	if (out_str.length() == max_width) {
170	// This is laid out in the standard as being a matching error (100e is not
171	// a valid float) but may conflict with existing implementations.
172	return MATCHING_FAILURE;
173	} else {
174	cur_char = reader->getc();
175	}
176
177	if (cur_char == `'+'` \|\| cur_char == `'-'`) {
178	if (!out_str.append(new_char: cur_char)) {
179	return ALLOCATION_FAILURE;
180	}
181	if (out_str.length() == max_width) {
182	return MATCHING_FAILURE;
183	} else {
184	cur_char = reader->getc();
185	}
186	}
187
188	// It is specified by the standard that "100er" is a matching failure since
189	// the longest prefix of a possibly valid floating-point number (which is
190	// "100e") is not a valid floating-point number. If there is an exponent
191	// mark then there must be a digit after it else the number is not valid.
192	// Some implementations will roll back two characters (to just "100") and
193	// accept that since the prefix is not valid, and some will interpret an
194	// exponent mark followed by no digits as an additional exponent of 0
195	// (accepting "100e" and returning 100.0). Both of these behaviors are wrong
196	// by the standard, but they may be used in real code, see Hyrum's law. This
197	// code follows the standard, but may be incompatible due to code expecting
198	// these bugs.
199	if (!internal::isdigit(ch: cur_char)) {
200	return MATCHING_FAILURE;
201	}
202
203	while (internal::isdigit(ch: cur_char) && out_str.length() < max_width) {
204	if (!out_str.append(new_char: cur_char)) {
205	return ALLOCATION_FAILURE;
206	}
207	cur_char = reader->getc();
208	}
209	}
210
211	// We always read one more character than will be used, so we have to put the
212	// last one back.
213	reader->ungetc(c: cur_char);
214
215	// If we haven't actually found any digits, this is a matching failure (this
216	// catches cases like "+.")
217	if (!is_number) {
218	return MATCHING_FAILURE;
219	}
220	write_float_with_length(str: out_str.c_str(), to_conv);
221
222	return READ_OK;
223	}
224
225	} // namespace scanf_core
226	} // namespace LIBC_NAMESPACE
227

source code of libc/src/stdio/scanf_core/float_converter.cpp