str_to_integer.h source code [libc/src/__support/str_to_integer.h]

1	//===-- String to integer conversion utils ----------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
10	#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
11
12	#include "src/__support/CPP/limits.h"
13	#include "src/__support/CPP/type_traits.h"
14	#include "src/__support/common.h"
15	#include "src/__support/ctype_utils.h"
16	#include "src/__support/str_to_num_result.h"
17	#include "src/__support/uint128.h"
18	#include "src/errno/libc_errno.h" // For ERANGE
19
20	namespace LIBC_NAMESPACE {
21	namespace internal {
22
23	// Returns a pointer to the first character in src that is not a whitespace
24	// character (as determined by isspace())
25	// TODO: Change from returning a pointer to returning a length.
26	LIBC_INLINE const char *
27	first_non_whitespace(const char *__restrict src,
28	size_t src_len = cpp::numeric_limits<size_t>::max()) {
29	size_t src_cur = `0`;
30	while (src_cur < src_len && internal::isspace(ch: src[src_cur])) {
31	++src_cur;
32	}
33	return src + src_cur;
34	}
35
36	LIBC_INLINE int b36_char_to_int(char input) {
37	if (isdigit(ch: input))
38	return input - `'0'`;
39	if (isalpha(ch: input))
40	return (input \| `32`) + `10` - `'a'`;
41	return `0`;
42	}
43
44	// checks if the next 3 characters of the string pointer are the start of a
45	// hexadecimal number. Does not advance the string pointer.
46	LIBC_INLINE bool
47	is_hex_start(const char *__restrict src,
48	size_t src_len = cpp::numeric_limits<size_t>::max()) {
49	if (src_len < `3`)
50	return false;
51	return src == `'0'` && ((src + `1`) \| `32`) == `'x'` && isalnum(ch: *(src + `2`)) &&
52	b36_char_to_int(input: *(src + `2`)) < `16`;
53	}
54
55	// Takes the address of the string pointer and parses the base from the start of
56	// it.
57	LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
58	// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
59	// sequence of the decimal digits and the letters a (or A) through f (or F)
60	// with values 10 through 15 respectively." (C standard 6.4.4.1)
61	if (is_hex_start(src, src_len))
62	return `16`;
63	// An octal number is defined as "the prefix 0 optionally followed by a
64	// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
65	// number that starts with 0, including just 0, is an octal number.
66	if (src_len > `0` && src[`0`] == `'0'`)
67	return `8`;
68	// A decimal number is defined as beginning "with a nonzero digit and
69	// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70	return `10`;
71	}
72
73	// Takes a pointer to a string and the base to convert to. This function is used
74	// as the backend for all of the string to int functions.
75	template <class T>
76	LIBC_INLINE StrToNumResult<T>
77	strtointeger(const char *__restrict src, int base,
78	const size_t src_len = cpp::numeric_limits<size_t>::max()) {
79	using ResultType = typename cpp::conditional_t<(cpp::is_same_v<T, UInt128> \|\|
80	cpp::is_same_v<T, Int128>),
81	UInt128, unsigned long long>;
82
83	ResultType result = `0`;
84
85	bool is_number = false;
86	size_t src_cur = `0`;
87	int error_val = `0`;
88
89	if (src_len == `0`)
90	return {`0`, `0`, `0`};
91
92	if (base < `0` \|\| base == `1` \|\| base > `36`)
93	return {`0`, `0`, EINVAL};
94
95	src_cur = first_non_whitespace(src, src_len) - src;
96
97	char result_sign = `'+'`;
98	if (src[src_cur] == `'+'` \|\| src[src_cur] == `'-'`) {
99	result_sign = src[src_cur];
100	++src_cur;
101	}
102
103	if (base == `0`)
104	base = infer_base(src: src + src_cur, src_len: src_len - src_cur);
105
106	if (base == `16` && is_hex_start(src: src + src_cur, src_len: src_len - src_cur))
107	src_cur = src_cur + `2`;
108
109	constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
110	const bool is_positive = (result_sign == `'+'`);
111
112	ResultType constexpr NEGATIVE_MAX =
113	!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + `1`
114	: cpp::numeric_limits<T>::max();
115	ResultType const abs_max =
116	(is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
117	ResultType const abs_max_div_by_base = abs_max / base;
118
119	while (src_cur < src_len && isalnum(ch: src[src_cur])) {
120	int cur_digit = b36_char_to_int(input: src[src_cur]);
121	if (cur_digit >= base)
122	break;
123
124	is_number = true;
125	++src_cur;
126
127	// If the number has already hit the maximum value for the current type then
128	// the result cannot change, but we still need to advance src to the end of
129	// the number.
130	if (result == abs_max) {
131	error_val = ERANGE;
132	continue;
133	}
134
135	if (result > abs_max_div_by_base) {
136	result = abs_max;
137	error_val = ERANGE;
138	} else {
139	result = result * base;
140	}
141	if (result > abs_max - cur_digit) {
142	result = abs_max;
143	error_val = ERANGE;
144	} else {
145	result = result + cur_digit;
146	}
147	}
148
149	ptrdiff_t str_len = is_number ? (src_cur) : `0`;
150
151	if (error_val == ERANGE) {
152	if (is_positive \|\| IS_UNSIGNED)
153	return {cpp::numeric_limits<T>::max(), str_len, error_val};
154	else // T is signed and there is a negative overflow
155	return {cpp::numeric_limits<T>::min(), str_len, error_val};
156	}
157
158	return {
159	is_positive
160	? static_cast<T>(result)
161	: static_cast<T>(
162	-static_cast<make_integral_or_big_int_unsigned_t<T>>(result)),
163	str_len, error_val};
164	}
165
166	} // namespace internal
167	} // namespace LIBC_NAMESPACE
168
169	#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
170

source code of libc/src/__support/str_to_integer.h