1//===-- String to integer conversion utils ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
10#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
11
12#include "src/__support/CPP/limits.h"
13#include "src/__support/CPP/type_traits.h"
14#include "src/__support/common.h"
15#include "src/__support/ctype_utils.h"
16#include "src/__support/str_to_num_result.h"
17#include "src/__support/uint128.h"
18#include "src/errno/libc_errno.h" // For ERANGE
19
20namespace LIBC_NAMESPACE {
21namespace internal {
22
23// Returns a pointer to the first character in src that is not a whitespace
24// character (as determined by isspace())
25// TODO: Change from returning a pointer to returning a length.
26LIBC_INLINE const char *
27first_non_whitespace(const char *__restrict src,
28 size_t src_len = cpp::numeric_limits<size_t>::max()) {
29 size_t src_cur = 0;
30 while (src_cur < src_len && internal::isspace(ch: src[src_cur])) {
31 ++src_cur;
32 }
33 return src + src_cur;
34}
35
36LIBC_INLINE int b36_char_to_int(char input) {
37 if (isdigit(ch: input))
38 return input - '0';
39 if (isalpha(ch: input))
40 return (input | 32) + 10 - 'a';
41 return 0;
42}
43
44// checks if the next 3 characters of the string pointer are the start of a
45// hexadecimal number. Does not advance the string pointer.
46LIBC_INLINE bool
47is_hex_start(const char *__restrict src,
48 size_t src_len = cpp::numeric_limits<size_t>::max()) {
49 if (src_len < 3)
50 return false;
51 return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(ch: *(src + 2)) &&
52 b36_char_to_int(input: *(src + 2)) < 16;
53}
54
55// Takes the address of the string pointer and parses the base from the start of
56// it.
57LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
58 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
59 // sequence of the decimal digits and the letters a (or A) through f (or F)
60 // with values 10 through 15 respectively." (C standard 6.4.4.1)
61 if (is_hex_start(src, src_len))
62 return 16;
63 // An octal number is defined as "the prefix 0 optionally followed by a
64 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
65 // number that starts with 0, including just 0, is an octal number.
66 if (src_len > 0 && src[0] == '0')
67 return 8;
68 // A decimal number is defined as beginning "with a nonzero digit and
69 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
70 return 10;
71}
72
73// Takes a pointer to a string and the base to convert to. This function is used
74// as the backend for all of the string to int functions.
75template <class T>
76LIBC_INLINE StrToNumResult<T>
77strtointeger(const char *__restrict src, int base,
78 const size_t src_len = cpp::numeric_limits<size_t>::max()) {
79 using ResultType = typename cpp::conditional_t<(cpp::is_same_v<T, UInt128> ||
80 cpp::is_same_v<T, Int128>),
81 UInt128, unsigned long long>;
82
83 ResultType result = 0;
84
85 bool is_number = false;
86 size_t src_cur = 0;
87 int error_val = 0;
88
89 if (src_len == 0)
90 return {0, 0, 0};
91
92 if (base < 0 || base == 1 || base > 36)
93 return {0, 0, EINVAL};
94
95 src_cur = first_non_whitespace(src, src_len) - src;
96
97 char result_sign = '+';
98 if (src[src_cur] == '+' || src[src_cur] == '-') {
99 result_sign = src[src_cur];
100 ++src_cur;
101 }
102
103 if (base == 0)
104 base = infer_base(src: src + src_cur, src_len: src_len - src_cur);
105
106 if (base == 16 && is_hex_start(src: src + src_cur, src_len: src_len - src_cur))
107 src_cur = src_cur + 2;
108
109 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
110 const bool is_positive = (result_sign == '+');
111
112 ResultType constexpr NEGATIVE_MAX =
113 !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
114 : cpp::numeric_limits<T>::max();
115 ResultType const abs_max =
116 (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
117 ResultType const abs_max_div_by_base = abs_max / base;
118
119 while (src_cur < src_len && isalnum(ch: src[src_cur])) {
120 int cur_digit = b36_char_to_int(input: src[src_cur]);
121 if (cur_digit >= base)
122 break;
123
124 is_number = true;
125 ++src_cur;
126
127 // If the number has already hit the maximum value for the current type then
128 // the result cannot change, but we still need to advance src to the end of
129 // the number.
130 if (result == abs_max) {
131 error_val = ERANGE;
132 continue;
133 }
134
135 if (result > abs_max_div_by_base) {
136 result = abs_max;
137 error_val = ERANGE;
138 } else {
139 result = result * base;
140 }
141 if (result > abs_max - cur_digit) {
142 result = abs_max;
143 error_val = ERANGE;
144 } else {
145 result = result + cur_digit;
146 }
147 }
148
149 ptrdiff_t str_len = is_number ? (src_cur) : 0;
150
151 if (error_val == ERANGE) {
152 if (is_positive || IS_UNSIGNED)
153 return {cpp::numeric_limits<T>::max(), str_len, error_val};
154 else // T is signed and there is a negative overflow
155 return {cpp::numeric_limits<T>::min(), str_len, error_val};
156 }
157
158 return {
159 is_positive
160 ? static_cast<T>(result)
161 : static_cast<T>(
162 -static_cast<make_integral_or_big_int_unsigned_t<T>>(result)),
163 str_len, error_val};
164}
165
166} // namespace internal
167} // namespace LIBC_NAMESPACE
168
169#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
170

source code of libc/src/__support/str_to_integer.h