integer_to_string.h source code [libc/src/__support/integer_to_string.h]

1	//===-- Utilities to convert integral values to string ----------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Converts an integer to a string.
10	//
11	// By default, the string is written as decimal to an internal buffer and
12	// accessed via the 'view' method.
13	//
14	// IntegerToString<int> buffer(42);
15	// cpp::string_view view = buffer.view();
16	//
17	// The buffer is allocated on the stack and its size is so that the conversion
18	// always succeeds.
19	//
20	// It is also possible to write the data to a preallocated buffer, but this may
21	// fail.
22	//
23	// char buffer[8];
24	// if (auto maybe_view = IntegerToString<int>::write_to_span(buffer, 42)) {
25	// cpp::string_view view = maybe_view;*
26	// }
27	//
28	// The first template parameter is the type of the integer.
29	// The second template parameter defines how the integer is formatted.
30	// Available default are 'radix::Bin', 'radix::Oct', 'radix::Dec' and
31	// 'radix::Hex'.
32	//
33	// For 'radix::Bin', 'radix::Oct' and 'radix::Hex' the value is always
34	// interpreted as a positive type but 'radix::Dec' will honor negative values.
35	// e.g.,
36	//
37	// IntegerToString<int8_t>(-1) // "-1"
38	// IntegerToString<int8_t, radix::Dec>(-1) // "-1"
39	// IntegerToString<int8_t, radix::Bin>(-1) // "11111111"
40	// IntegerToString<int8_t, radix::Oct>(-1) // "377"
41	// IntegerToString<int8_t, radix::Hex>(-1) // "ff"
42	//
43	// Additionnally, the format can be changed by navigating the subtypes:
44	// - WithPrefix : Adds "0b", "0", "0x" for binary, octal and hexadecimal
45	// - WithWidth<XX> : Pad string to XX characters filling leading digits with 0
46	// - Uppercase : Use uppercase letters (only for HexString)
47	// - WithSign : Prepend '+' for positive values (only for DecString)
48	//
49	// Examples
50	// --------
51	// IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(0) : "+00"
52	// IntegerToString<int8_t, radix::Dec::WithWidth<2>::WithSign>(-1) : "-01"
53	// IntegerToString<uint8_t, radix::Hex::WithPrefix::Uppercase>(255) : "0xFF"
54	// IntegerToString<uint8_t, radix::Hex::WithWidth<4>::Uppercase>(255) : "00FF"
55	//===----------------------------------------------------------------------===//
56
57	#ifndef LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
58	#define LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
59
60	#include <stdint.h>
61
62	#include "src/__support/CPP/algorithm.h" // max
63	#include "src/__support/CPP/array.h"
64	#include "src/__support/CPP/bit.h"
65	#include "src/__support/CPP/limits.h"
66	#include "src/__support/CPP/optional.h"
67	#include "src/__support/CPP/span.h"
68	#include "src/__support/CPP/string_view.h"
69	#include "src/__support/CPP/type_traits.h"
70	#include "src/__support/big_int.h" // make_integral_or_big_int_unsigned_t
71	#include "src/__support/common.h"
72
73	namespace LIBC_NAMESPACE {
74
75	namespace details {
76
77	template <uint8_t base, bool prefix = false, bool force_sign = false,
78	bool is_uppercase = false, size_t min_digits = `1`>
79	struct Fmt {
80	static constexpr uint8_t BASE = base;
81	static constexpr size_t MIN_DIGITS = min_digits;
82	static constexpr bool IS_UPPERCASE = is_uppercase;
83	static constexpr bool PREFIX = prefix;
84	static constexpr char FORCE_SIGN = force_sign;
85
86	using WithPrefix = Fmt<BASE, true, FORCE_SIGN, IS_UPPERCASE, MIN_DIGITS>;
87	using WithSign = Fmt<BASE, PREFIX, true, IS_UPPERCASE, MIN_DIGITS>;
88	using Uppercase = Fmt<BASE, PREFIX, FORCE_SIGN, true, MIN_DIGITS>;
89	template <size_t value>
90	using WithWidth = Fmt<BASE, PREFIX, FORCE_SIGN, IS_UPPERCASE, value>;
91
92	// Invariants
93	static constexpr uint8_t NUMERICAL_DIGITS = `10`;
94	static constexpr uint8_t ALPHA_DIGITS = `26`;
95	static constexpr uint8_t MAX_DIGIT = NUMERICAL_DIGITS + ALPHA_DIGITS;
96	static_assert(BASE > `1` && BASE <= MAX_DIGIT);
97	static_assert(!IS_UPPERCASE \|\| BASE > `10`, "Uppercase is only for radix > 10");
98	static_assert(!FORCE_SIGN \|\| BASE == `10`, "WithSign is only for radix == 10");
99	static_assert(!PREFIX \|\| (BASE == `2` \|\| BASE == `8` \|\| BASE == `16`),
100	"WithPrefix is only for radix == 2, 8 or 16");
101	};
102
103	// Move this to a separate header since it might be useful elsewhere.
104	template <bool forward> class StringBufferWriterImpl {
105	cpp::span<char> buffer;
106	size_t index = `0`;
107	bool out_of_range = false;
108
109	LIBC_INLINE size_t location() const {
110	return forward ? index : buffer.size() - `1` - index;
111	}
112
113	public:
114	StringBufferWriterImpl(const StringBufferWriterImpl &) = delete;
115	StringBufferWriterImpl(cpp::span<char> buffer) : buffer (buffer) {}
116
117	LIBC_INLINE size_t size() const { return index; }
118	LIBC_INLINE size_t remainder_size() const { return buffer.size() - size(); }
119	LIBC_INLINE bool empty() const { return size() == `0`; }
120	LIBC_INLINE bool full() const { return size() == buffer.size(); }
121	LIBC_INLINE bool ok() const { return !out_of_range; }
122
123	LIBC_INLINE StringBufferWriterImpl &push(char c) {
124	if (ok()) {
125	if (!full()) {
126	buffer[location()] = c;
127	++index;
128	} else {
129	out_of_range = true;
130	}
131	}
132	return *this;
133	}
134
135	LIBC_INLINE cpp::span<char> remainder_span() const {
136	return forward ? buffer.last(count: remainder_size())
137	: buffer.first(count: remainder_size());
138	}
139
140	LIBC_INLINE cpp::span<char> buffer_span() const {
141	return forward ? buffer.first(count: size()) : buffer.last(count: size());
142	}
143
144	LIBC_INLINE cpp::string_view buffer_view() const {
145	const auto s = buffer_span();
146	return {s.data(), s.size()};
147	}
148	};
149
150	using StringBufferWriter = StringBufferWriterImpl<true>;
151	using BackwardStringBufferWriter = StringBufferWriterImpl<false>;
152
153	} // namespace details
154
155	namespace radix {
156
157	using Bin = details::Fmt<`2`>;
158	using Oct = details::Fmt<`8`>;
159	using Dec = details::Fmt<`10`>;
160	using Hex = details::Fmt<`16`>;
161	template <size_t radix> using Custom = details::Fmt<radix>;
162
163	} // namespace radix
164
165	// See file header for documentation.
166	template <typename T, typename Fmt = radix::Dec> class IntegerToString {
167	static_assert(cpp::is_integral_v<T> \|\| is_big_int_v<T>);
168
169	LIBC_INLINE static constexpr size_t compute_buffer_size() {
170	constexpr auto MAX_DIGITS = []() -> size_t {
171	// We size the string buffer for base 10 using an approximation algorithm:
172	//
173	// size = ceil(sizeof(T) 5 / 2)*
174	//
175	// If sizeof(T) is 1, then size is 3 (actually need 3)
176	// If sizeof(T) is 2, then size is 5 (actually need 5)
177	// If sizeof(T) is 4, then size is 10 (actually need 10)
178	// If sizeof(T) is 8, then size is 20 (actually need 20)
179	// If sizeof(T) is 16, then size is 40 (actually need 39)
180	//
181	// NOTE: The ceil operation is actually implemented as
182	// floor(((sizeof(T) 5) + 1) / 2)*
183	// where floor operation is just integer division.
184	//
185	// This estimation grows slightly faster than the actual value, but the
186	// overhead is small enough to tolerate.
187	if constexpr (Fmt::BASE == `10`)
188	return ((sizeof(T) * `5`) + `1`) / `2`;
189	// For other bases, we approximate by rounding down to the nearest power
190	// of two base, since the space needed is easy to calculate and it won't
191	// overestimate by too much.
192	constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t {
193	size_t i = `0`;
194	for (; num > `1`; num /= `2`)
195	++i;
196	return i;
197	};
198	constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE);
199	return ((sizeof(T) * `8` + (BITS_PER_DIGIT - `1`)) / BITS_PER_DIGIT);
200	};
201	constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS);
202	constexpr size_t SIGN_SIZE = Fmt::BASE == `10` ? `1` : `0`;
203	constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? `2` : `0`;
204	return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE;
205	}
206
207	static constexpr size_t BUFFER_SIZE = compute_buffer_size();
208	static_assert(BUFFER_SIZE > `0`);
209
210	// An internal stateless structure that handles the number formatting logic.
211	struct IntegerWriter {
212	static_assert(cpp::is_integral_v<T> \|\| is_big_int_v<T>);
213	using UNSIGNED_T = make_integral_or_big_int_unsigned_t<T>;
214
215	LIBC_INLINE static char digit_char(uint8_t digit) {
216	if (digit < `10`)
217	return `'0'` + static_cast<char>(digit);
218	return (Fmt::IS_UPPERCASE ? `'A'` : `'a'`) + static_cast<char>(digit - `10`);
219	}
220
221	LIBC_INLINE static void
222	write_unsigned_number(UNSIGNED_T value,
223	details::BackwardStringBufferWriter &sink) {
224	for (; sink.ok() && value != `0`; value /= Fmt::BASE) {
225	const uint8_t digit(static_cast<uint8_t>(value % Fmt::BASE));
226	sink.push(c: digit_char(digit));
227	}
228	}
229
230	// Returns the absolute value of 'value' as 'UNSIGNED_T'.
231	LIBC_INLINE static UNSIGNED_T abs(T value) {
232	if (cpp::is_unsigned_v<T> \|\| value >= `0`)
233	return value; // already of the right sign.
234
235	// Signed integers are asymmetric (e.g., int8_t ∈ [-128, 127]).
236	// Thus negating the type's minimum value would overflow.
237	// From C++20 on, signed types are guaranteed to be represented as 2's
238	// complement. We take advantage of this representation and negate the
239	// value by using the exact same bit representation, e.g.,
240	// binary : 0b1000'0000
241	// int8_t : -128
242	// uint8_t: 128
243
244	// Note: the compiler can completely optimize out the two branches and
245	// replace them by a simple negate instruction.
246	// https://godbolt.org/z/hE7zahT9W
247	if (value == cpp::numeric_limits<T>::min()) {
248	return cpp::bit_cast<UNSIGNED_T>(value);
249	} else {
250	return -value; // legal and representable both as T and UNSIGNED_T.`
251	}
252	}
253
254	LIBC_INLINE static void write(T value,
255	details::BackwardStringBufferWriter &sink) {
256	if constexpr (Fmt::BASE == `10`) {
257	write_unsigned_number(value: abs(value), sink);
258	} else {
259	write_unsigned_number(value: static_cast<UNSIGNED_T>(value), sink);
260	}
261	// width
262	while (sink.ok() && sink.size() < Fmt::MIN_DIGITS)
263	sink.push(c: `'0'`);
264	// sign
265	if constexpr (Fmt::BASE == `10`) {
266	if (value < `0`)
267	sink.push(c: `'-'`);
268	else if (Fmt::FORCE_SIGN)
269	sink.push(c: `'+'`);
270	}
271	// prefix
272	if constexpr (Fmt::PREFIX) {
273	if constexpr (Fmt::BASE == `2`) {
274	sink.push(c: `'b'`);
275	sink.push(c: `'0'`);
276	}
277	if constexpr (Fmt::BASE == `16`) {
278	sink.push(c: `'x'`);
279	sink.push(c: `'0'`);
280	}
281	if constexpr (Fmt::BASE == `8`) {
282	const cpp::string_view written = sink.buffer_view();
283	if (written.empty() \|\| written.front() != `'0'`)
284	sink.push(c: `'0'`);
285	}
286	}
287	}
288	};
289
290	cpp::array<char, BUFFER_SIZE> array;
291	size_t written = `0`;
292
293	public:
294	IntegerToString(const IntegerToString &) = delete;
295	IntegerToString(T value) {
296	details::BackwardStringBufferWriter writer(array);
297	IntegerWriter::write(value, writer);
298	written = writer.size();
299	}
300
301	[[nodiscard]] LIBC_INLINE static cpp::optional<cpp::string_view>
302	format_to(cpp::span<char> buffer, T value) {
303	details::BackwardStringBufferWriter writer(buffer);
304	IntegerWriter::write(value, writer);
305	if (writer.ok())
306	return cpp::string_view (buffer.data() + buffer.size() - writer.size(),
307	writer.size());
308	return cpp::nullopt;
309	}
310
311	LIBC_INLINE static constexpr size_t buffer_size() { return BUFFER_SIZE; }
312
313	LIBC_INLINE size_t size() const { return written; }
314	LIBC_INLINE cpp::string_view view() && = delete;
315	LIBC_INLINE cpp::string_view view() const & {
316	return cpp::string_view(array.data() + array.size() - size(), size());
317	}
318	};
319
320	} // namespace LIBC_NAMESPACE
321
322	#endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_TO_STRING_H
323

Provided by KDAB

Definitions

source code of libc/src/__support/integer_to_string.h