1 | //===-- Utilities to convert integral values to string ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Converts an integer to a string. |
10 | // |
11 | // By default, the string is written as decimal to an internal buffer and |
12 | // accessed via the 'view' method. |
13 | // |
14 | // IntegerToString<int> buffer(42); |
15 | // cpp::string_view view = buffer.view(); |
16 | // |
17 | // The buffer is allocated on the stack and its size is so that the conversion |
18 | // always succeeds. |
19 | // |
20 | // It is also possible to write the data to a preallocated buffer, but this may |
21 | // fail. |
22 | // |
23 | // char buffer[8]; |
24 | // if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) { |
25 | // cpp::string_view view = *maybe_view; |
26 | // } |
27 | // |
28 | // The first template parameter is the type of the integer. |
29 | // The second template parameter defines how the integer is formatted. |
30 | // Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and |
31 | // 'radix::Hex'. |
32 | // |
33 | // For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always |
34 | // interpreted as a positive type but 'radix::Dec' will honor negative values. |
35 | // e.g., |
36 | // |
37 | // IntegerToString<int8_t>(-1) // "-1" |
38 | // IntegerToString<int8_t, radix::Dec>(-1) // "-1" |
39 | // IntegerToString<int8_t, radix::Bin>(-1) // "11111111" |
40 | // IntegerToString<int8_t, radix::Oct>(-1) // "377" |
41 | // IntegerToString<int8_t, radix::Hex>(-1) // "ff" |
42 | // |
43 | // Additionnally, the format can be changed by navigating the subtypes: |
44 | // - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal |
45 | // - WithWidth<XX> : Pad string to XX characters filling leading digits with 0 |
46 | // - Uppercase : Use uppercase letters (only for HexString) |
47 | // - WithSign : Prepend '+' for positive values (only for DecString) |
48 | // |
49 | // Examples |
50 | // -------- |
51 | // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0) : "+00" |
52 | // IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1) : "-01" |
53 | // IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255) : "0xFF" |
54 | // IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF" |
55 | //===----------------------------------------------------------------------===// |
56 | |
57 | #ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H |
58 | #define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H |
59 | |
60 | #include <stdint.h> |
61 | |
62 | #include "src/__support/CPP/algorithm.h" // max |
63 | #include "src/__support/CPP/array.h" |
64 | #include "src/__support/CPP/bit.h" |
65 | #include "src/__support/CPP/limits.h" |
66 | #include "src/__support/CPP/optional.h" |
67 | #include "src/__support/CPP/span.h" |
68 | #include "src/__support/CPP/string_view.h" |
69 | #include "src/__support/CPP/type_traits.h" |
70 | #include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t |
71 | #include "src/__support/common.h" |
72 | |
73 | namespace LIBC_NAMESPACE { |
74 | |
75 | namespace details { |
76 | |
77 | template <uint8_t base, bool prefix = false, bool force_sign = false, |
78 | bool is_uppercase = false, size_t min_digits = 1> |
79 | struct Fmt { |
80 | static constexpr uint8_t BASE = base; |
81 | static constexpr size_t MIN_DIGITS = min_digits; |
82 | static constexpr bool IS_UPPERCASE = is_uppercase; |
83 | static constexpr bool PREFIX = prefix; |
84 | static constexpr char FORCE_SIGN = force_sign; |
85 | |
86 | using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>; |
87 | using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>; |
88 | using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>; |
89 | template <size_t value> |
90 | using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>; |
91 | |
92 | // Invariants |
93 | static constexpr uint8_t NUMERICAL_DIGITS = 10; |
94 | static constexpr uint8_t ALPHA_DIGITS = 26; |
95 | static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS; |
96 | static_assert(BASE > 1 && BASE <= MAX_DIGIT); |
97 | static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10" ); |
98 | static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10" ); |
99 | static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16), |
100 | "WithPrefix is only for radix == 2, 8 or 16" ); |
101 | }; |
102 | |
103 | // Move this to a separate header since it might be useful elsewhere. |
104 | template <bool forward> class StringBufferWriterImpl { |
105 | cpp::span<char> buffer; |
106 | size_t index = 0; |
107 | bool out_of_range = false; |
108 | |
109 | LIBC_INLINE size_t location() const { |
110 | return forward ? index : buffer.size() - 1 - index; |
111 | } |
112 | |
113 | public: |
114 | StringBufferWriterImpl(const StringBufferWriterImpl &) = delete; |
115 | StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {} |
116 | |
117 | LIBC_INLINE size_t size() const { return index; } |
118 | LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); } |
119 | LIBC_INLINE bool empty() const { return size() == 0; } |
120 | LIBC_INLINE bool full() const { return size() == buffer.size(); } |
121 | LIBC_INLINE bool ok() const { return !out_of_range; } |
122 | |
123 | LIBC_INLINE StringBufferWriterImpl &push(char c) { |
124 | if (ok()) { |
125 | if (!full()) { |
126 | buffer[location()] = c; |
127 | ++index; |
128 | } else { |
129 | out_of_range = true; |
130 | } |
131 | } |
132 | return *this; |
133 | } |
134 | |
135 | LIBC_INLINE cpp::span<char> remainder_span() const { |
136 | return forward ? buffer.last(count: remainder_size()) |
137 | : buffer.first(count: remainder_size()); |
138 | } |
139 | |
140 | LIBC_INLINE cpp::span<char> buffer_span() const { |
141 | return forward ? buffer.first(count: size()) : buffer.last(count: size()); |
142 | } |
143 | |
144 | LIBC_INLINE cpp::string_view buffer_view() const { |
145 | const auto s = buffer_span(); |
146 | return {s.data(), s.size()}; |
147 | } |
148 | }; |
149 | |
150 | using StringBufferWriter = StringBufferWriterImpl<true>; |
151 | using BackwardStringBufferWriter = StringBufferWriterImpl<false>; |
152 | |
153 | } // namespace details |
154 | |
155 | namespace radix { |
156 | |
157 | using Bin = details::Fmt<2>; |
158 | using Oct = details::Fmt<8>; |
159 | using Dec = details::Fmt<10>; |
160 | using Hex = details::Fmt<16>; |
161 | template <size_t radix> using Custom = details::Fmt<radix>; |
162 | |
163 | } // namespace radix |
164 | |
165 | // See file header for documentation. |
166 | template <typename T, typename Fmt = radix::Dec> class IntegerToString { |
167 | static_assert(cpp::is_integral_v<T> || is_big_int_v<T>); |
168 | |
169 | LIBC_INLINE static constexpr size_t compute_buffer_size() { |
170 | constexpr auto MAX_DIGITS = []() -> size_t { |
171 | // We size the string buffer for base 10 using an approximation algorithm: |
172 | // |
173 | // size = ceil(sizeof(T) * 5 / 2) |
174 | // |
175 | // If sizeof(T) is 1, then size is 3 (actually need 3) |
176 | // If sizeof(T) is 2, then size is 5 (actually need 5) |
177 | // If sizeof(T) is 4, then size is 10 (actually need 10) |
178 | // If sizeof(T) is 8, then size is 20 (actually need 20) |
179 | // If sizeof(T) is 16, then size is 40 (actually need 39) |
180 | // |
181 | // NOTE: The ceil operation is actually implemented as |
182 | // floor(((sizeof(T) * 5) + 1) / 2) |
183 | // where floor operation is just integer division. |
184 | // |
185 | // This estimation grows slightly faster than the actual value, but the |
186 | // overhead is small enough to tolerate. |
187 | if constexpr (Fmt::BASE == 10) |
188 | return ((sizeof(T) * 5) + 1) / 2; |
189 | // For other bases, we approximate by rounding down to the nearest power |
190 | // of two base, since the space needed is easy to calculate and it won't |
191 | // overestimate by too much. |
192 | constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t { |
193 | size_t i = 0; |
194 | for (; num > 1; num /= 2) |
195 | ++i; |
196 | return i; |
197 | }; |
198 | constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE); |
199 | return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT); |
200 | }; |
201 | constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS); |
202 | constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0; |
203 | constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0; |
204 | return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE; |
205 | } |
206 | |
207 | static constexpr size_t BUFFER_SIZE = compute_buffer_size(); |
208 | static_assert(BUFFER_SIZE > 0); |
209 | |
210 | // An internal stateless structure that handles the number formatting logic. |
211 | struct IntegerWriter { |
212 | static_assert(cpp::is_integral_v<T> || is_big_int_v<T>); |
213 | using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>; |
214 | |
215 | LIBC_INLINE static char digit_char(uint8_t digit) { |
216 | if (digit < 10) |
217 | return '0' + static_cast<char>(digit); |
218 | return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10); |
219 | } |
220 | |
221 | LIBC_INLINE static void |
222 | write_unsigned_number(UNSIGNED_T value, |
223 | details::BackwardStringBufferWriter &sink) { |
224 | for (; sink.ok() && value != 0; value /= Fmt::BASE) { |
225 | const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE)); |
226 | sink.push(c: digit_char(digit)); |
227 | } |
228 | } |
229 | |
230 | // Returns the absolute value of 'value' as 'UNSIGNED_T'. |
231 | LIBC_INLINE static UNSIGNED_T abs(T value) { |
232 | if (cpp::is_unsigned_v<T> || value >= 0) |
233 | return value; // already of the right sign. |
234 | |
235 | // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]). |
236 | // Thus negating the type's minimum value would overflow. |
237 | // From C++20 on, signed types are guaranteed to be represented as 2's |
238 | // complement. We take advantage of this representation and negate the |
239 | // value by using the exact same bit representation, e.g., |
240 | // binary : 0b1000'0000 |
241 | // int8_t : -128 |
242 | // uint8_t: 128 |
243 | |
244 | // Note: the compiler can completely optimize out the two branches and |
245 | // replace them by a simple negate instruction. |
246 | // https://godbolt.org/z/hE7zahT9W |
247 | if (value == cpp::numeric_limits<T>::min()) { |
248 | return cpp::bit_cast<UNSIGNED_T>(value); |
249 | } else { |
250 | return -value; // legal and representable both as T and UNSIGNED_T.` |
251 | } |
252 | } |
253 | |
254 | LIBC_INLINE static void write(T value, |
255 | details::BackwardStringBufferWriter &sink) { |
256 | if constexpr (Fmt::BASE == 10) { |
257 | write_unsigned_number(value: abs(value), sink); |
258 | } else { |
259 | write_unsigned_number(value: static_cast<UNSIGNED_T>(value), sink); |
260 | } |
261 | // width |
262 | while (sink.ok() && sink.size() < Fmt::MIN_DIGITS) |
263 | sink.push(c: '0'); |
264 | // sign |
265 | if constexpr (Fmt::BASE == 10) { |
266 | if (value < 0) |
267 | sink.push(c: '-'); |
268 | else if (Fmt::FORCE_SIGN) |
269 | sink.push(c: '+'); |
270 | } |
271 | // prefix |
272 | if constexpr (Fmt::PREFIX) { |
273 | if constexpr (Fmt::BASE == 2) { |
274 | sink.push(c: 'b'); |
275 | sink.push(c: '0'); |
276 | } |
277 | if constexpr (Fmt::BASE == 16) { |
278 | sink.push(c: 'x'); |
279 | sink.push(c: '0'); |
280 | } |
281 | if constexpr (Fmt::BASE == 8) { |
282 | const cpp::string_view written = sink.buffer_view(); |
283 | if (written.empty() || written.front() != '0') |
284 | sink.push(c: '0'); |
285 | } |
286 | } |
287 | } |
288 | }; |
289 | |
290 | cpp::array<char, BUFFER_SIZE> array; |
291 | size_t written = 0; |
292 | |
293 | public: |
294 | IntegerToString(const IntegerToString &) = delete; |
295 | IntegerToString(T value) { |
296 | details::BackwardStringBufferWriter writer(array); |
297 | IntegerWriter::write(value, writer); |
298 | written = writer.size(); |
299 | } |
300 | |
301 | [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view> |
302 | format_to(cpp::span<char> buffer, T value) { |
303 | details::BackwardStringBufferWriter writer(buffer); |
304 | IntegerWriter::write(value, writer); |
305 | if (writer.ok()) |
306 | return cpp::string_view(buffer.data() + buffer.size() - writer.size(), |
307 | writer.size()); |
308 | return cpp::nullopt; |
309 | } |
310 | |
311 | LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; } |
312 | |
313 | LIBC_INLINE size_t size() const { return written; } |
314 | LIBC_INLINE cpp::string_view view() && = delete; |
315 | LIBC_INLINE cpp::string_view view() const & { |
316 | return cpp::string_view(array.data() + array.size() - size(), size()); |
317 | } |
318 | }; |
319 | |
320 | } // namespace LIBC_NAMESPACE |
321 | |
322 | #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H |
323 | |