utils.h source code [libc/src/string/memory_utils/utils.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	//===-- Memory utils --------------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
10	#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
11
12	#include "src/__support/CPP/bit.h"
13	#include "src/__support/CPP/cstddef.h"
14	#include "src/__support/CPP/type_traits.h"
15	#include "src/__support/endian_internal.h"
16	#include "src/__support/macros/attributes.h" // LIBC_INLINE
17	#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL
18	#include "src/__support/macros/properties/architectures.h"
19
20	#include <stddef.h> // size_t
21	#include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
22
23	namespace LIBC_NAMESPACE_DECL {
24
25	// Returns the number of bytes to substract from ptr to get to the previous
26	// multiple of alignment. If ptr is already aligned returns 0.
27	template <size_t alignment>
28	LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
29	static_assert(cpp::has_single_bit(alignment),
30	"alignment must be a power of 2");
31	return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
32	}
33
34	// Returns the number of bytes to add to ptr to get to the next multiple of
35	// alignment. If ptr is already aligned returns 0.
36	template <size_t alignment>
37	LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
38	static_assert(cpp::has_single_bit(alignment),
39	"alignment must be a power of 2");
40	// The logic is not straightforward and involves unsigned modulo arithmetic
41	// but the generated code is as fast as it can be.
42	return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
43	}
44
45	// Returns the number of bytes to add to ptr to get to the next multiple of
46	// alignment. If ptr is already aligned returns alignment.
47	template <size_t alignment>
48	LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
49	return alignment - distance_to_align_down<alignment>(ptr);
50	}
51
52	// Returns the same pointer but notifies the compiler that it is aligned.
53	template <size_t alignment, typename T> LIBC_INLINE T assume_aligned(T ptr) {
54	return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
55	}
56
57	// Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
58	// disjoint.
59	LIBC_INLINE bool is_disjoint(const void p1, const void p2, size_t size) {
60	const ptrdiff_t sdiff =
61	static_cast<const char >(p1) - static_cast<const char >(p2);
62	// We use bit_cast to make sure that we don't run into accidental integer
63	// promotion. Notably the unary minus operator goes through integer promotion
64	// at the expression level. We assume arithmetic to be two's complement (i.e.,
65	// bit_cast has the same behavior as a regular signed to unsigned cast).
66	static_assert(-1 == ~0, "not 2's complement");
67	const size_t udiff = cpp::bit_cast<size_t>(sdiff);
68	// Integer promition would be caught here.
69	const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
70	// This is expected to compile a conditional move.
71	return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
72	}
73
74	#if __has_builtin(__builtin_memcpy_inline)
75	#define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
76	#endif
77
78	#if __has_builtin(__builtin_memset_inline)
79	#define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
80	#endif
81
82	// Performs a constant count copy.
83	template <size_t Size>
84	LIBC_INLINE void memcpy_inline(void *__restrict dst,
85	const void *__restrict src) {
86	#ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
87	__builtin_memcpy_inline(dst, src, Size);
88	#else
89	// In memory functions `memcpy_inline` is instantiated several times with
90	// different value of the Size parameter. This doesn't play well with GCC's
91	// Value Range Analysis that wrongly detects out of bounds accesses. We
92	// disable these warnings for the purpose of this function.
93	#pragma GCC diagnostic push
94	#pragma GCC diagnostic ignored "-Warray-bounds"
95	#pragma GCC diagnostic ignored "-Wstringop-overread"
96	#pragma GCC diagnostic ignored "-Wstringop-overflow"
97	for (size_t i = 0; i < Size; ++i)
98	static_cast<char >(dst)[i] = static_cast<const char >(src)[i];
99	#pragma GCC diagnostic pop
100	#endif
101	}
102
103	using Ptr = cpp::byte *; // Pointer to raw data.
104	using CPtr = const cpp::byte *; // Const pointer to raw data.
105
106	// This type makes sure that we don't accidentally promote an integral type to
107	// another one. It is only constructible from the exact T type.
108	template <typename T> struct StrictIntegralType {
109	static_assert(cpp::is_integral_v<T>);
110
111	// Can only be constructed from a T.
112	template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
113	LIBC_INLINE StrictIntegralType(U value) : value(value) {}
114
115	// Allows using the type in an if statement.
116	LIBC_INLINE explicit operator bool() const { return value; }
117
118	// If type is unsigned (bcmp) we allow bitwise OR operations.
119	LIBC_INLINE StrictIntegralType
120	operator\|(const StrictIntegralType &Rhs) const {
121	static_assert(!cpp::is_signed_v<T>);
122	return value \| Rhs.value;
123	}
124
125	// For interation with the C API we allow explicit conversion back to the
126	// `int` type.
127	LIBC_INLINE explicit operator int() const {
128	// bit_cast makes sure that T and int have the same size.
129	return cpp::bit_cast<int>(value);
130	}
131
132	// Helper to get the zero value.
133	LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; }
134	LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; }
135
136	private:
137	T value;
138	};
139
140	using MemcmpReturnType = StrictIntegralType<int32_t>;
141	using BcmpReturnType = StrictIntegralType<uint32_t>;
142
143	// This implements the semantic of 'memcmp' returning a negative value when 'a'
144	// is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
145	LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
146	// We perform the difference as an int64_t.
147	const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
148	// For the int64_t to int32_t conversion we want the following properties:
149	// - int32_t[31:31] == 1 iff diff < 0
150	// - int32_t[31:0] == 0 iff diff == 0
151
152	// We also observe that:
153	// - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
154	// - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
155	// - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
156	// - https://godbolt.org/z/8W7qWP6e5
157	// - This implies that we can only look at diff[32:32] for determining the
158	// sign bit for the returned int32_t.
159
160	// So, we do the following:
161	// - int32_t[31:31] = diff[32:32]
162	// - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
163
164	// And, we can achieve the above by the expression below. We could have also
165	// used (diff64 >> 1) \| (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
166	// (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
167	return static_cast<int32_t>((diff >> 1) \| (diff & 0xFFFF));
168	}
169
170	// Returns a negative value if 'a' is less than 'b' and a positive value
171	// otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
172	// 'b' differ.
173	LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
174	#if defined(LIBC_TARGET_ARCH_IS_X86)
175	// On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
176	// positive and negative value respectively as they are one value apart:
177	// xor eax, eax <- free
178	// cmp rdi, rsi <- serializing
179	// adc eax, 2147483647 <- serializing
180
181	// Unfortunately we found instances of client code that negate the result of
182	// 'memcmp' to reverse ordering. Because signed integers are not symmetric
183	// (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
184	// `-INT_MIN` is not representable as an int32_t.
185
186	// As a consequence, we use 5 and -5 which is still OK nice in terms of
187	// latency.
188	// cmp rdi, rsi <- serializing
189	// mov ecx, -5 <- can be done in parallel
190	// mov eax, 5 <- can be done in parallel
191	// cmovb eax, ecx <- serializing
192	static constexpr int32_t POSITIVE = 5;
193	static constexpr int32_t NEGATIVE = -5;
194	#else
195	// On RISC-V we simply use '1' and '-1' as it leads to branchless code.
196	// On ARMv8, both strategies lead to the same performance.
197	static constexpr int32_t POSITIVE = 1;
198	static constexpr int32_t NEGATIVE = -1;
199	#endif
200	static_assert(POSITIVE > 0);
201	static_assert(NEGATIVE < 0);
202	return a < b ? NEGATIVE : POSITIVE;
203	}
204
205	// Loads bytes from memory (possibly unaligned) and materializes them as
206	// type.
207	template <typename T> LIBC_INLINE T load(CPtr ptr) {
208	T out;
209	memcpy_inline<sizeof(T)>(&out, ptr);
210	return out;
211	}
212
213	// Stores a value of type T in memory (possibly unaligned).
214	template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
215	memcpy_inline<sizeof(T)>(ptr, &value);
216	}
217
218	// On architectures that do not allow for unaligned access we perform several
219	// aligned accesses and recombine them through shifts and logicals operations.
220	// For instance, if we know that the pointer is 2-byte aligned we can decompose
221	// a 64-bit operation into four 16-bit operations.
222
223	// Loads a 'ValueType' by decomposing it into several loads that are assumed to
224	// be aligned.
225	// e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
226	template <typename ValueType, typename T, typename... TS>
227	LIBC_INLINE ValueType load_aligned(CPtr src) {
228	static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
229	const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
230	if constexpr (sizeof...(TS) > 0) {
231	constexpr size_t SHIFT = sizeof(T) * 8;
232	const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
233	if constexpr (Endian::IS_LITTLE)
234	return value \| (next << SHIFT);
235	else if constexpr (Endian::IS_BIG)
236	return (value << SHIFT) \| next;
237	else
238	static_assert(cpp::always_false<T>, "Invalid endianness");
239	} else {
240	return value;
241	}
242	}
243
244	// Alias for loading a 'uint32_t'.
245	template <typename T, typename... TS>
246	LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
247	static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
248	return load_aligned<uint32_t, T, TS...>(src + offset);
249	}
250
251	// Alias for loading a 'uint64_t'.
252	template <typename T, typename... TS>
253	LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
254	static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
255	return load_aligned<uint64_t, T, TS...>(src + offset);
256	}
257
258	// Stores a 'ValueType' by decomposing it into several stores that are assumed
259	// to be aligned.
260	// e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
261	template <typename ValueType, typename T, typename... TS>
262	LIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
263	static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
264	constexpr size_t SHIFT = sizeof(T) * 8;
265	if constexpr (Endian::IS_LITTLE) {
266	store<T>(assume_aligned<sizeof(T)>(dst), T(value & T(~0)));
267	if constexpr (sizeof...(TS) > 0)
268	store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T));
269	} else if constexpr (Endian::IS_BIG) {
270	constexpr size_t OFFSET = (0 + ... + sizeof(TS));
271	store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
272	if constexpr (sizeof...(TS) > 0)
273	store_aligned<ValueType, TS...>(value >> SHIFT, dst);
274	} else {
275	static_assert(cpp::always_false<T>, "Invalid endianness");
276	}
277	}
278
279	// Alias for storing a 'uint32_t'.
280	template <typename T, typename... TS>
281	LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
282	static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
283	store_aligned<uint32_t, T, TS...>(value, dst + offset);
284	}
285
286	// Alias for storing a 'uint64_t'.
287	template <typename T, typename... TS>
288	LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
289	static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
290	store_aligned<uint64_t, T, TS...>(value, dst + offset);
291	}
292
293	// Advances the pointers p1 and p2 by offset bytes and decrease count by the
294	// same amount.
295	template <typename T1, typename T2>
296	LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
297	T2 *__restrict &p2, size_t &count) {
298	p1 += offset;
299	p2 += offset;
300	count -= static_cast<size_t>(offset);
301	}
302
303	// Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
304	// and decrease count by the same amount.
305	// We make sure the compiler knows about the adjusted pointer alignment.
306	template <size_t SIZE, typename T1, typename T2>
307	void align_p1_to_next_boundary(T1 __restrict &p1, T2 __restrict &p2,
308	size_t &count) {
309	adjust(static_cast<ptrdiff_t>(distance_to_next_aligned<SIZE>(p1)), p1, p2,
310	count);
311	p1 = assume_aligned<SIZE>(p1);
312	}
313
314	// Same as align_p1_to_next_boundary above but with a single pointer instead.
315	template <size_t SIZE, typename T>
316	LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) {
317	const T *dummy = p1;
318	align_p1_to_next_boundary<SIZE>(p1, dummy, count);
319	}
320
321	// An enum class that discriminates between the first and second pointer.
322	enum class Arg { P1, P2, Dst = P1, Src = P2 };
323
324	// Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
325	// Precondition: &p1 != &p2
326	template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
327	LIBC_INLINE void align_to_next_boundary(T1 __restrict &p1, T2 __restrict &p2,
328	size_t &count) {
329	if constexpr (AlignOn == Arg::P1)
330	align_p1_to_next_boundary<SIZE>(p1, p2, count);
331	else if constexpr (AlignOn == Arg::P2)
332	align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
333	else
334	static_assert(cpp::always_false<T1>,
335	"AlignOn must be either Arg::P1 or Arg::P2");
336	}
337
338	template <size_t SIZE> struct AlignHelper {
339	LIBC_INLINE AlignHelper(CPtr ptr)
340	: offset(distance_to_next_aligned<SIZE>(ptr)) {}
341
342	LIBC_INLINE bool not_aligned() const { return offset != SIZE; }
343	uintptr_t offset;
344	};
345
346	LIBC_INLINE void prefetch_for_write(CPtr dst) {
347	__builtin_prefetch(dst, /write/ 1, /max locality/ 3);
348	}
349
350	LIBC_INLINE void prefetch_to_local_cache(CPtr dst) {
351	__builtin_prefetch(dst, /read/ 0, /max locality/ 3);
352	}
353
354	} // namespace LIBC_NAMESPACE_DECL
355
356	#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
357

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

source code of libc/src/string/memory_utils/utils.h