1//===-- Memory utils --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
10#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
11
12#include "src/__support/CPP/bit.h"
13#include "src/__support/CPP/cstddef.h"
14#include "src/__support/CPP/type_traits.h"
15#include "src/__support/endian.h"
16#include "src/__support/macros/attributes.h" // LIBC_INLINE
17#include "src/__support/macros/properties/architectures.h"
18
19#include <stddef.h> // size_t
20#include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
21
22namespace LIBC_NAMESPACE {
23
24// Returns the number of bytes to substract from ptr to get to the previous
25// multiple of alignment. If ptr is already aligned returns 0.
26template <size_t alignment>
27LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
28 static_assert(cpp::has_single_bit(value: alignment),
29 "alignment must be a power of 2");
30 return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
31}
32
33// Returns the number of bytes to add to ptr to get to the next multiple of
34// alignment. If ptr is already aligned returns 0.
35template <size_t alignment>
36LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
37 static_assert(cpp::has_single_bit(value: alignment),
38 "alignment must be a power of 2");
39 // The logic is not straightforward and involves unsigned modulo arithmetic
40 // but the generated code is as fast as it can be.
41 return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
42}
43
44// Returns the number of bytes to add to ptr to get to the next multiple of
45// alignment. If ptr is already aligned returns alignment.
46template <size_t alignment>
47LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
48 return alignment - distance_to_align_down<alignment>(ptr);
49}
50
51// Returns the same pointer but notifies the compiler that it is aligned.
52template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
53 return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
54}
55
56// Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
57// disjoint.
58LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
59 const ptrdiff_t sdiff =
60 static_cast<const char *>(p1) - static_cast<const char *>(p2);
61 // We use bit_cast to make sure that we don't run into accidental integer
62 // promotion. Notably the unary minus operator goes through integer promotion
63 // at the expression level. We assume arithmetic to be two's complement (i.e.,
64 // bit_cast has the same behavior as a regular signed to unsigned cast).
65 static_assert(-1 == ~0, "not 2's complement");
66 const size_t udiff = cpp::bit_cast<size_t>(from: sdiff);
67 // Integer promition would be caught here.
68 const size_t neg_udiff = cpp::bit_cast<size_t>(from: -sdiff);
69 // This is expected to compile a conditional move.
70 return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
71}
72
73#if __has_builtin(__builtin_memcpy_inline)
74#define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
75#endif
76
77#if __has_builtin(__builtin_memset_inline)
78#define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
79#endif
80
81// Performs a constant count copy.
82template <size_t Size>
83LIBC_INLINE void memcpy_inline(void *__restrict dst,
84 const void *__restrict src) {
85#ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
86 __builtin_memcpy_inline(dst, src, Size);
87#else
88 // In memory functions `memcpy_inline` is instantiated several times with
89 // different value of the Size parameter. This doesn't play well with GCC's
90 // Value Range Analysis that wrongly detects out of bounds accesses. We
91 // disable these warnings for the purpose of this function.
92#pragma GCC diagnostic push
93#pragma GCC diagnostic ignored "-Warray-bounds"
94#pragma GCC diagnostic ignored "-Wstringop-overread"
95#pragma GCC diagnostic ignored "-Wstringop-overflow"
96 for (size_t i = 0; i < Size; ++i)
97 static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
98#pragma GCC diagnostic pop
99#endif
100}
101
102using Ptr = cpp::byte *; // Pointer to raw data.
103using CPtr = const cpp::byte *; // Const pointer to raw data.
104
105// This type makes sure that we don't accidentally promote an integral type to
106// another one. It is only constructible from the exact T type.
107template <typename T> struct StrictIntegralType {
108 static_assert(cpp::is_integral_v<T>);
109
110 // Can only be constructed from a T.
111 template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
112 LIBC_INLINE StrictIntegralType(U value) : value(value) {}
113
114 // Allows using the type in an if statement.
115 LIBC_INLINE explicit operator bool() const { return value; }
116
117 // If type is unsigned (bcmp) we allow bitwise OR operations.
118 LIBC_INLINE StrictIntegralType
119 operator|(const StrictIntegralType &Rhs) const {
120 static_assert(!cpp::is_signed_v<T>);
121 return value | Rhs.value;
122 }
123
124 // For interation with the C API we allow explicit conversion back to the
125 // `int` type.
126 LIBC_INLINE explicit operator int() const {
127 // bit_cast makes sure that T and int have the same size.
128 return cpp::bit_cast<int>(value);
129 }
130
131 // Helper to get the zero value.
132 LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; }
133 LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; }
134
135private:
136 T value;
137};
138
139using MemcmpReturnType = StrictIntegralType<int32_t>;
140using BcmpReturnType = StrictIntegralType<uint32_t>;
141
142// This implements the semantic of 'memcmp' returning a negative value when 'a'
143// is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
144LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
145 // We perform the difference as an int64_t.
146 const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
147 // For the int64_t to int32_t conversion we want the following properties:
148 // - int32_t[31:31] == 1 iff diff < 0
149 // - int32_t[31:0] == 0 iff diff == 0
150
151 // We also observe that:
152 // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
153 // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
154 // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
155 // - https://godbolt.org/z/8W7qWP6e5
156 // - This implies that we can only look at diff[32:32] for determining the
157 // sign bit for the returned int32_t.
158
159 // So, we do the following:
160 // - int32_t[31:31] = diff[32:32]
161 // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
162
163 // And, we can achieve the above by the expression below. We could have also
164 // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
165 // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
166 return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
167}
168
169// Returns a negative value if 'a' is less than 'b' and a positive value
170// otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
171// 'b' differ.
172LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
173#if defined(LIBC_TARGET_ARCH_IS_X86_64)
174 // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
175 // positive and negative value respectively as they are one value apart:
176 // xor eax, eax <- free
177 // cmp rdi, rsi <- serializing
178 // adc eax, 2147483647 <- serializing
179
180 // Unfortunately we found instances of client code that negate the result of
181 // 'memcmp' to reverse ordering. Because signed integers are not symmetric
182 // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
183 // `-INT_MIN` is not representable as an int32_t.
184
185 // As a consequence, we use 5 and -5 which is still OK nice in terms of
186 // latency.
187 // cmp rdi, rsi <- serializing
188 // mov ecx, -5 <- can be done in parallel
189 // mov eax, 5 <- can be done in parallel
190 // cmovb eax, ecx <- serializing
191 static constexpr int32_t POSITIVE = 5;
192 static constexpr int32_t NEGATIVE = -5;
193#else
194 // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
195 // On ARMv8, both strategies lead to the same performance.
196 static constexpr int32_t POSITIVE = 1;
197 static constexpr int32_t NEGATIVE = -1;
198#endif
199 static_assert(POSITIVE > 0);
200 static_assert(NEGATIVE < 0);
201 return a < b ? NEGATIVE : POSITIVE;
202}
203
204// Loads bytes from memory (possibly unaligned) and materializes them as
205// type.
206template <typename T> LIBC_INLINE T load(CPtr ptr) {
207 T out;
208 memcpy_inline<sizeof(T)>(&out, ptr);
209 return out;
210}
211
212// Stores a value of type T in memory (possibly unaligned).
213template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
214 memcpy_inline<sizeof(T)>(ptr, &value);
215}
216
217// On architectures that do not allow for unaligned access we perform several
218// aligned accesses and recombine them through shifts and logicals operations.
219// For instance, if we know that the pointer is 2-byte aligned we can decompose
220// a 64-bit operation into four 16-bit operations.
221
222// Loads a 'ValueType' by decomposing it into several loads that are assumed to
223// be aligned.
224// e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
225template <typename ValueType, typename T, typename... TS>
226LIBC_INLINE ValueType load_aligned(CPtr src) {
227 static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
228 const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
229 if constexpr (sizeof...(TS) > 0) {
230 constexpr size_t SHIFT = sizeof(T) * 8;
231 const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
232 if constexpr (Endian::IS_LITTLE)
233 return value | (next << SHIFT);
234 else if constexpr (Endian::IS_BIG)
235 return (value << SHIFT) | next;
236 else
237 static_assert(cpp::always_false<T>, "Invalid endianness");
238 } else {
239 return value;
240 }
241}
242
243// Alias for loading a 'uint32_t'.
244template <typename T, typename... TS>
245LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
246 static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
247 return load_aligned<uint32_t, T, TS...>(src + offset);
248}
249
250// Alias for loading a 'uint64_t'.
251template <typename T, typename... TS>
252LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
253 static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
254 return load_aligned<uint64_t, T, TS...>(src + offset);
255}
256
257// Stores a 'ValueType' by decomposing it into several stores that are assumed
258// to be aligned.
259// e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
260template <typename ValueType, typename T, typename... TS>
261LIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
262 static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
263 constexpr size_t SHIFT = sizeof(T) * 8;
264 if constexpr (Endian::IS_LITTLE) {
265 store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
266 if constexpr (sizeof...(TS) > 0)
267 store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T));
268 } else if constexpr (Endian::IS_BIG) {
269 constexpr size_t OFFSET = (0 + ... + sizeof(TS));
270 store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
271 if constexpr (sizeof...(TS) > 0)
272 store_aligned<ValueType, TS...>(value >> SHIFT, dst);
273 } else {
274 static_assert(cpp::always_false<T>, "Invalid endianness");
275 }
276}
277
278// Alias for storing a 'uint32_t'.
279template <typename T, typename... TS>
280LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
281 static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
282 store_aligned<uint32_t, T, TS...>(value, dst + offset);
283}
284
285// Alias for storing a 'uint64_t'.
286template <typename T, typename... TS>
287LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
288 static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
289 store_aligned<uint64_t, T, TS...>(value, dst + offset);
290}
291
292// Advances the pointers p1 and p2 by offset bytes and decrease count by the
293// same amount.
294template <typename T1, typename T2>
295LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
296 T2 *__restrict &p2, size_t &count) {
297 p1 += offset;
298 p2 += offset;
299 count -= offset;
300}
301
302// Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
303// and decrease count by the same amount.
304// We make sure the compiler knows about the adjusted pointer alignment.
305template <size_t SIZE, typename T1, typename T2>
306void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
307 size_t &count) {
308 adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
309 p1 = assume_aligned<SIZE>(p1);
310}
311
312// Same as align_p1_to_next_boundary above but with a single pointer instead.
313template <size_t SIZE, typename T>
314LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) {
315 const T *dummy = p1;
316 align_p1_to_next_boundary<SIZE>(p1, dummy, count);
317}
318
319// An enum class that discriminates between the first and second pointer.
320enum class Arg { P1, P2, Dst = P1, Src = P2 };
321
322// Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
323// Precondition: &p1 != &p2
324template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
325LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
326 size_t &count) {
327 if constexpr (AlignOn == Arg::P1)
328 align_p1_to_next_boundary<SIZE>(p1, p2, count);
329 else if constexpr (AlignOn == Arg::P2)
330 align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
331 else
332 static_assert(cpp::always_false<T1>,
333 "AlignOn must be either Arg::P1 or Arg::P2");
334}
335
336template <size_t SIZE> struct AlignHelper {
337 LIBC_INLINE AlignHelper(CPtr ptr)
338 : offset(distance_to_next_aligned<SIZE>(ptr)) {}
339
340 LIBC_INLINE bool not_aligned() const { return offset != SIZE; }
341 uintptr_t offset;
342};
343
344LIBC_INLINE void prefetch_for_write(CPtr dst) {
345 __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3);
346}
347
348LIBC_INLINE void prefetch_to_local_cache(CPtr dst) {
349 __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3);
350}
351
352} // namespace LIBC_NAMESPACE
353
354#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
355

source code of libc/src/string/memory_utils/utils.h