1 | //===-- String to integer conversion utils ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
11 | |
12 | #include "src/__support/CPP/limits.h" |
13 | #include "src/__support/CPP/type_traits.h" |
14 | #include "src/__support/common.h" |
15 | #include "src/__support/ctype_utils.h" |
16 | #include "src/__support/str_to_num_result.h" |
17 | #include "src/__support/uint128.h" |
18 | #include "src/errno/libc_errno.h" // For ERANGE |
19 | |
20 | namespace LIBC_NAMESPACE { |
21 | namespace internal { |
22 | |
23 | // Returns a pointer to the first character in src that is not a whitespace |
24 | // character (as determined by isspace()) |
25 | // TODO: Change from returning a pointer to returning a length. |
26 | LIBC_INLINE const char * |
27 | first_non_whitespace(const char *__restrict src, |
28 | size_t src_len = cpp::numeric_limits<size_t>::max()) { |
29 | size_t src_cur = 0; |
30 | while (src_cur < src_len && internal::isspace(ch: src[src_cur])) { |
31 | ++src_cur; |
32 | } |
33 | return src + src_cur; |
34 | } |
35 | |
36 | LIBC_INLINE int b36_char_to_int(char input) { |
37 | if (isdigit(ch: input)) |
38 | return input - '0'; |
39 | if (isalpha(ch: input)) |
40 | return (input | 32) + 10 - 'a'; |
41 | return 0; |
42 | } |
43 | |
44 | // checks if the next 3 characters of the string pointer are the start of a |
45 | // hexadecimal number. Does not advance the string pointer. |
46 | LIBC_INLINE bool |
47 | is_hex_start(const char *__restrict src, |
48 | size_t src_len = cpp::numeric_limits<size_t>::max()) { |
49 | if (src_len < 3) |
50 | return false; |
51 | return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(ch: *(src + 2)) && |
52 | b36_char_to_int(input: *(src + 2)) < 16; |
53 | } |
54 | |
55 | // Takes the address of the string pointer and parses the base from the start of |
56 | // it. |
57 | LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) { |
58 | // A hexadecimal number is defined as "the prefix 0x or 0X followed by a |
59 | // sequence of the decimal digits and the letters a (or A) through f (or F) |
60 | // with values 10 through 15 respectively." (C standard 6.4.4.1) |
61 | if (is_hex_start(src, src_len)) |
62 | return 16; |
63 | // An octal number is defined as "the prefix 0 optionally followed by a |
64 | // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any |
65 | // number that starts with 0, including just 0, is an octal number. |
66 | if (src_len > 0 && src[0] == '0') |
67 | return 8; |
68 | // A decimal number is defined as beginning "with a nonzero digit and |
69 | // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) |
70 | return 10; |
71 | } |
72 | |
73 | // Takes a pointer to a string and the base to convert to. This function is used |
74 | // as the backend for all of the string to int functions. |
75 | template <class T> |
76 | LIBC_INLINE StrToNumResult<T> |
77 | strtointeger(const char *__restrict src, int base, |
78 | const size_t src_len = cpp::numeric_limits<size_t>::max()) { |
79 | using ResultType = typename cpp::conditional_t<(cpp::is_same_v<T, UInt128> || |
80 | cpp::is_same_v<T, Int128>), |
81 | UInt128, unsigned long long>; |
82 | |
83 | ResultType result = 0; |
84 | |
85 | bool is_number = false; |
86 | size_t src_cur = 0; |
87 | int error_val = 0; |
88 | |
89 | if (src_len == 0) |
90 | return {0, 0, 0}; |
91 | |
92 | if (base < 0 || base == 1 || base > 36) |
93 | return {0, 0, EINVAL}; |
94 | |
95 | src_cur = first_non_whitespace(src, src_len) - src; |
96 | |
97 | char result_sign = '+'; |
98 | if (src[src_cur] == '+' || src[src_cur] == '-') { |
99 | result_sign = src[src_cur]; |
100 | ++src_cur; |
101 | } |
102 | |
103 | if (base == 0) |
104 | base = infer_base(src: src + src_cur, src_len: src_len - src_cur); |
105 | |
106 | if (base == 16 && is_hex_start(src: src + src_cur, src_len: src_len - src_cur)) |
107 | src_cur = src_cur + 2; |
108 | |
109 | constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>; |
110 | const bool is_positive = (result_sign == '+'); |
111 | |
112 | ResultType constexpr NEGATIVE_MAX = |
113 | !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1 |
114 | : cpp::numeric_limits<T>::max(); |
115 | ResultType const abs_max = |
116 | (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX); |
117 | ResultType const abs_max_div_by_base = abs_max / base; |
118 | |
119 | while (src_cur < src_len && isalnum(ch: src[src_cur])) { |
120 | int cur_digit = b36_char_to_int(input: src[src_cur]); |
121 | if (cur_digit >= base) |
122 | break; |
123 | |
124 | is_number = true; |
125 | ++src_cur; |
126 | |
127 | // If the number has already hit the maximum value for the current type then |
128 | // the result cannot change, but we still need to advance src to the end of |
129 | // the number. |
130 | if (result == abs_max) { |
131 | error_val = ERANGE; |
132 | continue; |
133 | } |
134 | |
135 | if (result > abs_max_div_by_base) { |
136 | result = abs_max; |
137 | error_val = ERANGE; |
138 | } else { |
139 | result = result * base; |
140 | } |
141 | if (result > abs_max - cur_digit) { |
142 | result = abs_max; |
143 | error_val = ERANGE; |
144 | } else { |
145 | result = result + cur_digit; |
146 | } |
147 | } |
148 | |
149 | ptrdiff_t str_len = is_number ? (src_cur) : 0; |
150 | |
151 | if (error_val == ERANGE) { |
152 | if (is_positive || IS_UNSIGNED) |
153 | return {cpp::numeric_limits<T>::max(), str_len, error_val}; |
154 | else // T is signed and there is a negative overflow |
155 | return {cpp::numeric_limits<T>::min(), str_len, error_val}; |
156 | } |
157 | |
158 | return { |
159 | is_positive |
160 | ? static_cast<T>(result) |
161 | : static_cast<T>( |
162 | -static_cast<make_integral_or_big_int_unsigned_t<T>>(result)), |
163 | str_len, error_val}; |
164 | } |
165 | |
166 | } // namespace internal |
167 | } // namespace LIBC_NAMESPACE |
168 | |
169 | #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
170 | |