1// Copyright 2023 Matt Borland
2// Distributed under the Boost Software License, Version 1.0.
3// https://www.boost.org/LICENSE_1_0.txt
4
5#ifndef BOOST_CHARCONV_DETAIL_PARSER_HPP
6#define BOOST_CHARCONV_DETAIL_PARSER_HPP
7
8#include <boost/charconv/detail/config.hpp>
9#include <boost/charconv/detail/from_chars_result.hpp>
10#include <boost/charconv/detail/from_chars_integer_impl.hpp>
11#include <boost/charconv/detail/integer_search_trees.hpp>
12#include <boost/charconv/limits.hpp>
13#include <boost/charconv/chars_format.hpp>
14#include <system_error>
15#include <type_traits>
16#include <limits>
17#include <cerrno>
18#include <cstdint>
19#include <cstring>
20
21#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
22# pragma GCC diagnostic push
23# pragma GCC diagnostic ignored "-Wmissing-field-initializers"
24#endif
25
26namespace boost { namespace charconv { namespace detail {
27
28inline bool is_integer_char(char c) noexcept
29{
30 return (c >= '0') && (c <= '9');
31}
32
33inline bool is_hex_char(char c) noexcept
34{
35 return is_integer_char(c) || (((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')));
36}
37
38inline bool is_delimiter(char c, chars_format fmt) noexcept
39{
40 if (fmt != chars_format::hex)
41 {
42 return !is_integer_char(c) && c != 'e' && c != 'E';
43 }
44
45 return !is_hex_char(c) && c != 'p' && c != 'P';
46}
47
48inline from_chars_result from_chars_dispatch(const char* first, const char* last, std::uint64_t& value, int base) noexcept
49{
50 return boost::charconv::detail::from_chars(first, last, value, base);
51}
52
53inline from_chars_result from_chars_dispatch(const char* first, const char* last, uint128& value, int base) noexcept
54{
55 return boost::charconv::detail::from_chars128(first, last, value, base);
56}
57
58#ifdef BOOST_CHARCONV_HAS_INT128
59inline from_chars_result from_chars_dispatch(const char* first, const char* last, boost::uint128_type& value, int base) noexcept
60{
61 return boost::charconv::detail::from_chars128(first, last, value, base);
62}
63#endif
64
65template <typename Unsigned_Integer, typename Integer>
66inline from_chars_result parser(const char* first, const char* last, bool& sign, Unsigned_Integer& significand, Integer& exponent, chars_format fmt = chars_format::general) noexcept
67{
68 if (first > last)
69 {
70 return {.ptr: first, .ec: std::errc::invalid_argument};
71 }
72
73 auto next = first;
74 bool all_zeros = true;
75
76 // First extract the sign
77 if (*next == '-')
78 {
79 sign = true;
80 ++next;
81 }
82 else if (*next == '+')
83 {
84 return {.ptr: next, .ec: std::errc::invalid_argument};
85 }
86 else
87 {
88 sign = false;
89 }
90
91 // Handle non-finite values
92 // Stl allows for string like "iNf" to return inf
93 //
94 // This is nested ifs rather than a big one-liner to ensure that once we hit an invalid character
95 // or an end of buffer we return the correct value of next
96 if (next != last && (*next == 'i' || *next == 'I'))
97 {
98 ++next;
99 if (next != last && (*next == 'n' || *next == 'N'))
100 {
101 ++next;
102 if (next != last && (*next == 'f' || *next == 'F'))
103 {
104 significand = 0;
105 return {.ptr: next, .ec: std::errc::value_too_large};
106 }
107 }
108
109 return {.ptr: next, .ec: std::errc::invalid_argument};
110 }
111 else if (next != last && (*next == 'n' || *next == 'N'))
112 {
113 ++next;
114 if (next != last && (*next == 'a' || *next == 'A'))
115 {
116 ++next;
117 if (next != last && (*next == 'n' || *next == 'N'))
118 {
119 ++next;
120 if (next != last && (*next == '('))
121 {
122 ++next;
123 if (next != last && (*next == 's' || *next == 'S'))
124 {
125 significand = 1;
126 return {.ptr: next, .ec: std::errc::not_supported};
127 }
128 else if (next != last && (*next == 'i' || *next == 'I'))
129 {
130 significand = 0;
131 return {.ptr: next, .ec: std::errc::not_supported};
132 }
133 }
134 else
135 {
136 significand = 0;
137 return {.ptr: next, .ec: std::errc::not_supported};
138 }
139 }
140 }
141
142 return {.ptr: next, .ec: std::errc::invalid_argument};
143 }
144
145 // Ignore leading zeros (e.g. 00005 or -002.3e+5)
146 while (next != last && *next == '0')
147 {
148 ++next;
149 }
150
151 // If the number is 0 we can abort now
152 char exp_char;
153 char capital_exp_char;
154 if (fmt != chars_format::hex)
155 {
156 exp_char = 'e';
157 capital_exp_char = 'E';
158 }
159 else
160 {
161 exp_char = 'p';
162 capital_exp_char = 'P';
163 }
164
165 if (next == last || *next == exp_char || *next == -capital_exp_char)
166 {
167 significand = 0;
168 exponent = 0;
169 return {.ptr: next, .ec: std::errc()};
170 }
171
172 // Next we get the significand
173 constexpr std::size_t significand_buffer_size = limits<Unsigned_Integer>::max_chars10 - 1; // Base 10 or 16
174 char significand_buffer[significand_buffer_size] {};
175 std::size_t i = 0;
176 std::size_t dot_position = 0;
177 Integer extra_zeros = 0;
178 Integer leading_zero_powers = 0;
179 const auto char_validation_func = (fmt != boost::charconv::chars_format::hex) ? is_integer_char : is_hex_char;
180 const int base = (fmt != boost::charconv::chars_format::hex) ? 10 : 16;
181
182 while (next != last && char_validation_func(*next) && i < significand_buffer_size)
183 {
184 all_zeros = false;
185 significand_buffer[i] = *next;
186 ++next;
187 ++i;
188 }
189
190 bool fractional = false;
191 if (next == last)
192 {
193 // if fmt is chars_format::scientific the e is required
194 if (fmt == chars_format::scientific)
195 {
196 return {.ptr: first, .ec: std::errc::invalid_argument};
197 }
198
199 exponent = 0;
200 std::size_t offset = i;
201
202 from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
203 switch (r.ec)
204 {
205 case std::errc::invalid_argument:
206 return {.ptr: first, .ec: std::errc::invalid_argument};
207 case std::errc::result_out_of_range:
208 return {.ptr: next, .ec: std::errc::result_out_of_range};
209 default:
210 return {.ptr: next, .ec: std::errc()};
211 }
212 }
213 else if (*next == '.')
214 {
215 ++next;
216 fractional = true;
217 dot_position = i;
218
219 // Process the fractional part if we have it
220 //
221 // if fmt is chars_format::scientific the e is required
222 // if fmt is chars_format::fixed and not scientific the e is disallowed
223 // if fmt is chars_format::general (which is scientific and fixed) the e is optional
224
225 // If we have the value 0.00001 we can continue to chop zeros and adjust the exponent
226 // so that we get the useful parts of the fraction
227 if (all_zeros)
228 {
229 while (next != last && *next == '0')
230 {
231 ++next;
232 --leading_zero_powers;
233 }
234
235 if (next == last)
236 {
237 return {.ptr: last, .ec: std::errc()};
238 }
239 }
240
241 while (next != last && char_validation_func(*next) && i < significand_buffer_size)
242 {
243 significand_buffer[i] = *next;
244 ++next;
245 ++i;
246 }
247 }
248
249 if (i == significand_buffer_size)
250 {
251 // We can not process any more significant figures into the significand so skip to the end
252 // or the exponent part and capture the additional orders of magnitude for the exponent
253 bool found_dot = false;
254 while (next != last && (char_validation_func(*next) || *next == '.'))
255 {
256 ++next;
257 if (!fractional && !found_dot)
258 {
259 ++extra_zeros;
260 }
261 if (next != last && *next == '.')
262 {
263 found_dot = true;
264 }
265 }
266 }
267
268 if (next == last || is_delimiter(c: *next, fmt))
269 {
270 if (fmt == chars_format::scientific)
271 {
272 return {.ptr: first, .ec: std::errc::invalid_argument};
273 }
274 if (dot_position != 0 || fractional)
275 {
276 exponent = static_cast<Integer>(dot_position) - static_cast<Integer>(i) + extra_zeros + leading_zero_powers;
277 }
278 else
279 {
280 exponent = extra_zeros + leading_zero_powers;
281 }
282 std::size_t offset = i;
283
284 from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
285 switch (r.ec)
286 {
287 case std::errc::invalid_argument:
288 return {.ptr: first, .ec: std::errc::invalid_argument};
289 case std::errc::result_out_of_range:
290 return {.ptr: next, .ec: std::errc::result_out_of_range};
291 default:
292 return {.ptr: next, .ec: std::errc()};
293 }
294 }
295 else if (*next == exp_char || *next == capital_exp_char)
296 {
297 // Would be a number without a significand e.g. e+03
298 if (next == first)
299 {
300 return {.ptr: next, .ec: std::errc::invalid_argument};
301 }
302
303 ++next;
304 if (fmt == chars_format::fixed)
305 {
306 return {.ptr: first, .ec: std::errc::invalid_argument};
307 }
308
309 exponent = static_cast<Integer>(i - 1);
310 std::size_t offset = i;
311 bool round = false;
312 // If more digits are present than representable in the significand of the target type
313 // we set the maximum
314 if (offset > significand_buffer_size)
315 {
316 offset = significand_buffer_size - 1;
317 i = significand_buffer_size;
318 if (significand_buffer[offset] == '5' ||
319 significand_buffer[offset] == '6' ||
320 significand_buffer[offset] == '7' ||
321 significand_buffer[offset] == '8' ||
322 significand_buffer[offset] == '9')
323 {
324 round = true;
325 }
326 }
327
328 // If the significand is 0 from chars will return std::errc::invalid_argument because there is nothing in the buffer,
329 // but it is a valid value. We need to continue parsing to get the correct value of ptr even
330 // though we know we could bail now.
331 //
332 // See GitHub issue #29: https://github.com/cppalliance/charconv/issues/29
333 if (offset != 0)
334 {
335 from_chars_result r = from_chars_dispatch(significand_buffer, significand_buffer + offset, significand, base);
336 switch (r.ec)
337 {
338 case std::errc::invalid_argument:
339 return {.ptr: first, .ec: std::errc::invalid_argument};
340 case std::errc::result_out_of_range:
341 return {.ptr: next, .ec: std::errc::result_out_of_range};
342 default:
343 break;
344 }
345
346 if (round)
347 {
348 significand += 1;
349 }
350 }
351 }
352 else
353 {
354 return {.ptr: first, .ec: std::errc::invalid_argument};
355 }
356
357 // Finally we get the exponent
358 constexpr std::size_t exponent_buffer_size = 6; // Float128 min exp is −16382
359 char exponent_buffer[exponent_buffer_size] {};
360 const auto significand_digits = i;
361 i = 0;
362
363 // Get the sign first
364 if (next != last && *next == '-')
365 {
366 exponent_buffer[i] = *next;
367 ++next;
368 ++i;
369 }
370 else if (next != last && *next == '+')
371 {
372 ++next;
373 }
374
375 // Next strip any leading zeros
376 while (next != last && *next == '0')
377 {
378 ++next;
379 }
380
381 // Process the significant values
382 while (next != last && is_integer_char(c: *next) && i < exponent_buffer_size)
383 {
384 exponent_buffer[i] = *next;
385 ++next;
386 ++i;
387 }
388
389 // If the exponent can't fit in the buffer the number is not representable
390 if (next != last && i == exponent_buffer_size)
391 {
392 return {.ptr: next, .ec: std::errc::result_out_of_range};
393 }
394
395 // If the exponent was e+00 or e-00
396 if (i == 0 || (i == 1 && exponent_buffer[0] == '-'))
397 {
398 if (fractional)
399 {
400 exponent = static_cast<Integer>(dot_position - significand_digits);
401 }
402 else
403 {
404 exponent = extra_zeros;
405 }
406
407 return {.ptr: next, .ec: std::errc()};
408 }
409
410 const auto r = from_chars(exponent_buffer, exponent_buffer + i, exponent);
411
412 exponent += leading_zero_powers;
413
414 switch (r.ec)
415 {
416 case std::errc::invalid_argument:
417 return {.ptr: first, .ec: std::errc::invalid_argument};
418 case std::errc::result_out_of_range:
419 return {.ptr: next, .ec: std::errc::result_out_of_range};
420 default:
421 if (fractional)
422 {
423 // Need to take the offset from 1.xxx because compute_floatXXX assumes the significand is an integer
424 // so the exponent is off by the number of digits in the significand - 1
425 if (fmt == chars_format::hex)
426 {
427 // In hex the number of digits parsed is possibly less than the number of digits in base10
428 exponent -= num_digits(significand) - static_cast<Integer>(dot_position);
429 }
430 else
431 {
432 exponent -= static_cast<Integer>(significand_digits - dot_position);
433 }
434 }
435 else
436 {
437 exponent += extra_zeros;
438 }
439
440 return {.ptr: next, .ec: std::errc()};
441 }
442}
443
444}}} // Namespaces
445
446#if defined(__GNUC__) && __GNUC__ < 5 && !defined(__clang__)
447# pragma GCC diagnostic pop
448#endif
449
450#endif // BOOST_CHARCONV_DETAIL_PARSER_HPP
451

source code of boost/libs/charconv/include/boost/charconv/detail/parser.hpp