1//===-- Utilities to convert integral values to string ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Converts an integer to a string.
10//
11// By default, the string is written as decimal to an internal buffer and
12// accessed via the 'view' method.
13//
14// IntegerToString<int> buffer(42);
15// cpp::string_view view = buffer.view();
16//
17// The buffer is allocated on the stack and its size is so that the conversion
18// always succeeds.
19//
20// It is also possible to write the data to a preallocated buffer, but this may
21// fail.
22//
23// char buffer[8];
24// if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
25// cpp::string_view view = *maybe_view;
26// }
27//
28// The first template parameter is the type of the integer.
29// The second template parameter defines how the integer is formatted.
30// Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
31// 'radix::Hex'.
32//
33// For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
34// interpreted as a positive type but 'radix::Dec' will honor negative values.
35// e.g.,
36//
37// IntegerToString<int8_t>(-1) // "-1"
38// IntegerToString<int8_t, radix::Dec>(-1) // "-1"
39// IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
40// IntegerToString<int8_t, radix::Oct>(-1) // "377"
41// IntegerToString<int8_t, radix::Hex>(-1) // "ff"
42//
43// Additionnally, the format can be changed by navigating the subtypes:
44// - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal
45// - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
46// - Uppercase : Use uppercase letters (only for HexString)
47// - WithSign : Prepend '+' for positive values (only for DecString)
48//
49// Examples
50// --------
51// IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0) : "+00"
52// IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1) : "-01"
53// IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255) : "0xFF"
54// IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
55//===----------------------------------------------------------------------===//
56
57#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
58#define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
59
60#include <stdint.h>
61
62#include "src/__support/CPP/algorithm.h" // max
63#include "src/__support/CPP/array.h"
64#include "src/__support/CPP/bit.h"
65#include "src/__support/CPP/limits.h"
66#include "src/__support/CPP/optional.h"
67#include "src/__support/CPP/span.h"
68#include "src/__support/CPP/string_view.h"
69#include "src/__support/CPP/type_traits.h"
70#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
71#include "src/__support/common.h"
72
73namespace LIBC_NAMESPACE {
74
75namespace details {
76
77template <uint8_t base, bool prefix = false, bool force_sign = false,
78 bool is_uppercase = false, size_t min_digits = 1>
79struct Fmt {
80 static constexpr uint8_t BASE = base;
81 static constexpr size_t MIN_DIGITS = min_digits;
82 static constexpr bool IS_UPPERCASE = is_uppercase;
83 static constexpr bool PREFIX = prefix;
84 static constexpr char FORCE_SIGN = force_sign;
85
86 using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>;
87 using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>;
88 using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>;
89 template <size_t value>
90 using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>;
91
92 // Invariants
93 static constexpr uint8_t NUMERICAL_DIGITS = 10;
94 static constexpr uint8_t ALPHA_DIGITS = 26;
95 static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS;
96 static_assert(BASE > 1 && BASE <= MAX_DIGIT);
97 static_assert(!IS_UPPERCASE || BASE > 10, "Uppercase is only for radix > 10");
98 static_assert(!FORCE_SIGN || BASE == 10, "WithSign is only for radix == 10");
99 static_assert(!PREFIX || (BASE == 2 || BASE == 8 || BASE == 16),
100 "WithPrefix is only for radix == 2, 8 or 16");
101};
102
103// Move this to a separate header since it might be useful elsewhere.
104template <bool forward> class StringBufferWriterImpl {
105 cpp::span<char> buffer;
106 size_t index = 0;
107 bool out_of_range = false;
108
109 LIBC_INLINE size_t location() const {
110 return forward ? index : buffer.size() - 1 - index;
111 }
112
113public:
114 StringBufferWriterImpl(const StringBufferWriterImpl &) = delete;
115 StringBufferWriterImpl(cpp::span<char> buffer) : buffer(buffer) {}
116
117 LIBC_INLINE size_t size() const { return index; }
118 LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); }
119 LIBC_INLINE bool empty() const { return size() == 0; }
120 LIBC_INLINE bool full() const { return size() == buffer.size(); }
121 LIBC_INLINE bool ok() const { return !out_of_range; }
122
123 LIBC_INLINE StringBufferWriterImpl &push(char c) {
124 if (ok()) {
125 if (!full()) {
126 buffer[location()] = c;
127 ++index;
128 } else {
129 out_of_range = true;
130 }
131 }
132 return *this;
133 }
134
135 LIBC_INLINE cpp::span<char> remainder_span() const {
136 return forward ? buffer.last(count: remainder_size())
137 : buffer.first(count: remainder_size());
138 }
139
140 LIBC_INLINE cpp::span<char> buffer_span() const {
141 return forward ? buffer.first(count: size()) : buffer.last(count: size());
142 }
143
144 LIBC_INLINE cpp::string_view buffer_view() const {
145 const auto s = buffer_span();
146 return {s.data(), s.size()};
147 }
148};
149
150using StringBufferWriter = StringBufferWriterImpl<true>;
151using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
152
153} // namespace details
154
155namespace radix {
156
157using Bin = details::Fmt<2>;
158using Oct = details::Fmt<8>;
159using Dec = details::Fmt<10>;
160using Hex = details::Fmt<16>;
161template <size_t radix> using Custom = details::Fmt<radix>;
162
163} // namespace radix
164
165// See file header for documentation.
166template <typename T, typename Fmt = radix::Dec> class IntegerToString {
167 static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
168
169 LIBC_INLINE static constexpr size_t compute_buffer_size() {
170 constexpr auto MAX_DIGITS = []() -> size_t {
171 // We size the string buffer for base 10 using an approximation algorithm:
172 //
173 // size = ceil(sizeof(T) * 5 / 2)
174 //
175 // If sizeof(T) is 1, then size is 3 (actually need 3)
176 // If sizeof(T) is 2, then size is 5 (actually need 5)
177 // If sizeof(T) is 4, then size is 10 (actually need 10)
178 // If sizeof(T) is 8, then size is 20 (actually need 20)
179 // If sizeof(T) is 16, then size is 40 (actually need 39)
180 //
181 // NOTE: The ceil operation is actually implemented as
182 // floor(((sizeof(T) * 5) + 1) / 2)
183 // where floor operation is just integer division.
184 //
185 // This estimation grows slightly faster than the actual value, but the
186 // overhead is small enough to tolerate.
187 if constexpr (Fmt::BASE == 10)
188 return ((sizeof(T) * 5) + 1) / 2;
189 // For other bases, we approximate by rounding down to the nearest power
190 // of two base, since the space needed is easy to calculate and it won't
191 // overestimate by too much.
192 constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t {
193 size_t i = 0;
194 for (; num > 1; num /= 2)
195 ++i;
196 return i;
197 };
198 constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE);
199 return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
200 };
201 constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS);
202 constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0;
203 constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0;
204 return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE;
205 }
206
207 static constexpr size_t BUFFER_SIZE = compute_buffer_size();
208 static_assert(BUFFER_SIZE > 0);
209
210 // An internal stateless structure that handles the number formatting logic.
211 struct IntegerWriter {
212 static_assert(cpp::is_integral_v<T> || is_big_int_v<T>);
213 using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
214
215 LIBC_INLINE static char digit_char(uint8_t digit) {
216 if (digit < 10)
217 return '0' + static_cast<char>(digit);
218 return (Fmt::IS_UPPERCASE ? 'A' : 'a') + static_cast<char>(digit - 10);
219 }
220
221 LIBC_INLINE static void
222 write_unsigned_number(UNSIGNED_T value,
223 details::BackwardStringBufferWriter &sink) {
224 for (; sink.ok() && value != 0; value /= Fmt::BASE) {
225 const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE));
226 sink.push(c: digit_char(digit));
227 }
228 }
229
230 // Returns the absolute value of 'value' as 'UNSIGNED_T'.
231 LIBC_INLINE static UNSIGNED_T abs(T value) {
232 if (cpp::is_unsigned_v<T> || value >= 0)
233 return value; // already of the right sign.
234
235 // Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
236 // Thus negating the type's minimum value would overflow.
237 // From C++20 on, signed types are guaranteed to be represented as 2's
238 // complement. We take advantage of this representation and negate the
239 // value by using the exact same bit representation, e.g.,
240 // binary : 0b1000'0000
241 // int8_t : -128
242 // uint8_t: 128
243
244 // Note: the compiler can completely optimize out the two branches and
245 // replace them by a simple negate instruction.
246 // https://godbolt.org/z/hE7zahT9W
247 if (value == cpp::numeric_limits<T>::min()) {
248 return cpp::bit_cast<UNSIGNED_T>(value);
249 } else {
250 return -value; // legal and representable both as T and UNSIGNED_T.`
251 }
252 }
253
254 LIBC_INLINE static void write(T value,
255 details::BackwardStringBufferWriter &sink) {
256 if constexpr (Fmt::BASE == 10) {
257 write_unsigned_number(value: abs(value), sink);
258 } else {
259 write_unsigned_number(value: static_cast<UNSIGNED_T>(value), sink);
260 }
261 // width
262 while (sink.ok() && sink.size() < Fmt::MIN_DIGITS)
263 sink.push(c: '0');
264 // sign
265 if constexpr (Fmt::BASE == 10) {
266 if (value < 0)
267 sink.push(c: '-');
268 else if (Fmt::FORCE_SIGN)
269 sink.push(c: '+');
270 }
271 // prefix
272 if constexpr (Fmt::PREFIX) {
273 if constexpr (Fmt::BASE == 2) {
274 sink.push(c: 'b');
275 sink.push(c: '0');
276 }
277 if constexpr (Fmt::BASE == 16) {
278 sink.push(c: 'x');
279 sink.push(c: '0');
280 }
281 if constexpr (Fmt::BASE == 8) {
282 const cpp::string_view written = sink.buffer_view();
283 if (written.empty() || written.front() != '0')
284 sink.push(c: '0');
285 }
286 }
287 }
288 };
289
290 cpp::array<char, BUFFER_SIZE> array;
291 size_t written = 0;
292
293public:
294 IntegerToString(const IntegerToString &) = delete;
295 IntegerToString(T value) {
296 details::BackwardStringBufferWriter writer(array);
297 IntegerWriter::write(value, writer);
298 written = writer.size();
299 }
300
301 [[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view>
302 format_to(cpp::span<char> buffer, T value) {
303 details::BackwardStringBufferWriter writer(buffer);
304 IntegerWriter::write(value, writer);
305 if (writer.ok())
306 return cpp::string_view(buffer.data() + buffer.size() - writer.size(),
307 writer.size());
308 return cpp::nullopt;
309 }
310
311 LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; }
312
313 LIBC_INLINE size_t size() const { return written; }
314 LIBC_INLINE cpp::string_view view() && = delete;
315 LIBC_INLINE cpp::string_view view() const & {
316 return cpp::string_view(array.data() + array.size() - size(), size());
317 }
318};
319
320} // namespace LIBC_NAMESPACE
321
322#endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
323

source code of libc/src/__support/integer_to_string.h