Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===-- Memory utils --------------------------------------------*- C++ -*-===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H |
| 10 | #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H |
| 11 | |
| 12 | #include "src/__support/CPP/bit.h" |
| 13 | #include "src/__support/CPP/cstddef.h" |
| 14 | #include "src/__support/CPP/type_traits.h" |
| 15 | #include "src/__support/endian_internal.h" |
| 16 | #include "src/__support/macros/attributes.h" // LIBC_INLINE |
| 17 | #include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL |
| 18 | #include "src/__support/macros/properties/architectures.h" |
| 19 | |
| 20 | #include <stddef.h> // size_t |
| 21 | #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN |
| 22 | |
| 23 | namespace LIBC_NAMESPACE_DECL { |
| 24 | |
| 25 | // Returns the number of bytes to substract from ptr to get to the previous |
| 26 | // multiple of alignment. If ptr is already aligned returns 0. |
| 27 | template <size_t alignment> |
| 28 | LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) { |
| 29 | static_assert(cpp::has_single_bit(alignment), |
| 30 | "alignment must be a power of 2"); |
| 31 | return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U); |
| 32 | } |
| 33 | |
| 34 | // Returns the number of bytes to add to ptr to get to the next multiple of |
| 35 | // alignment. If ptr is already aligned returns 0. |
| 36 | template <size_t alignment> |
| 37 | LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) { |
| 38 | static_assert(cpp::has_single_bit(alignment), |
| 39 | "alignment must be a power of 2"); |
| 40 | // The logic is not straightforward and involves unsigned modulo arithmetic |
| 41 | // but the generated code is as fast as it can be. |
| 42 | return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U); |
| 43 | } |
| 44 | |
| 45 | // Returns the number of bytes to add to ptr to get to the next multiple of |
| 46 | // alignment. If ptr is already aligned returns alignment. |
| 47 | template <size_t alignment> |
| 48 | LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) { |
| 49 | return alignment - distance_to_align_down<alignment>(ptr); |
| 50 | } |
| 51 | |
| 52 | // Returns the same pointer but notifies the compiler that it is aligned. |
| 53 | template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) { |
| 54 | return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment)); |
| 55 | } |
| 56 | |
| 57 | // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are |
| 58 | // disjoint. |
| 59 | LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) { |
| 60 | const ptrdiff_t sdiff = |
| 61 | static_cast<const char *>(p1) - static_cast<const char *>(p2); |
| 62 | // We use bit_cast to make sure that we don't run into accidental integer |
| 63 | // promotion. Notably the unary minus operator goes through integer promotion |
| 64 | // at the expression level. We assume arithmetic to be two's complement (i.e., |
| 65 | // bit_cast has the same behavior as a regular signed to unsigned cast). |
| 66 | static_assert(-1 == ~0, "not 2's complement"); |
| 67 | const size_t udiff = cpp::bit_cast<size_t>(sdiff); |
| 68 | // Integer promition would be caught here. |
| 69 | const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff); |
| 70 | // This is expected to compile a conditional move. |
| 71 | return sdiff >= 0 ? size <= udiff : size <= neg_udiff; |
| 72 | } |
| 73 | |
| 74 | #if __has_builtin(__builtin_memcpy_inline) |
| 75 | #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE |
| 76 | #endif |
| 77 | |
| 78 | #if __has_builtin(__builtin_memset_inline) |
| 79 | #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE |
| 80 | #endif |
| 81 | |
| 82 | // Performs a constant count copy. |
| 83 | template <size_t Size> |
| 84 | LIBC_INLINE void memcpy_inline(void *__restrict dst, |
| 85 | const void *__restrict src) { |
| 86 | #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE |
| 87 | __builtin_memcpy_inline(dst, src, Size); |
| 88 | #else |
| 89 | // In memory functions `memcpy_inline` is instantiated several times with |
| 90 | // different value of the Size parameter. This doesn't play well with GCC's |
| 91 | // Value Range Analysis that wrongly detects out of bounds accesses. We |
| 92 | // disable these warnings for the purpose of this function. |
| 93 | #pragma GCC diagnostic push |
| 94 | #pragma GCC diagnostic ignored "-Warray-bounds" |
| 95 | #pragma GCC diagnostic ignored "-Wstringop-overread" |
| 96 | #pragma GCC diagnostic ignored "-Wstringop-overflow" |
| 97 | for (size_t i = 0; i < Size; ++i) |
| 98 | static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i]; |
| 99 | #pragma GCC diagnostic pop |
| 100 | #endif |
| 101 | } |
| 102 | |
| 103 | using Ptr = cpp::byte *; // Pointer to raw data. |
| 104 | using CPtr = const cpp::byte *; // Const pointer to raw data. |
| 105 | |
| 106 | // This type makes sure that we don't accidentally promote an integral type to |
| 107 | // another one. It is only constructible from the exact T type. |
| 108 | template <typename T> struct StrictIntegralType { |
| 109 | static_assert(cpp::is_integral_v<T>); |
| 110 | |
| 111 | // Can only be constructed from a T. |
| 112 | template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0> |
| 113 | LIBC_INLINE StrictIntegralType(U value) : value(value) {} |
| 114 | |
| 115 | // Allows using the type in an if statement. |
| 116 | LIBC_INLINE explicit operator bool() const { return value; } |
| 117 | |
| 118 | // If type is unsigned (bcmp) we allow bitwise OR operations. |
| 119 | LIBC_INLINE StrictIntegralType |
| 120 | operator|(const StrictIntegralType &Rhs) const { |
| 121 | static_assert(!cpp::is_signed_v<T>); |
| 122 | return value | Rhs.value; |
| 123 | } |
| 124 | |
| 125 | // For interation with the C API we allow explicit conversion back to the |
| 126 | // `int` type. |
| 127 | LIBC_INLINE explicit operator int() const { |
| 128 | // bit_cast makes sure that T and int have the same size. |
| 129 | return cpp::bit_cast<int>(value); |
| 130 | } |
| 131 | |
| 132 | // Helper to get the zero value. |
| 133 | LIBC_INLINE static constexpr StrictIntegralType zero() { return {T(0)}; } |
| 134 | LIBC_INLINE static constexpr StrictIntegralType nonzero() { return {T(1)}; } |
| 135 | |
| 136 | private: |
| 137 | T value; |
| 138 | }; |
| 139 | |
| 140 | using MemcmpReturnType = StrictIntegralType<int32_t>; |
| 141 | using BcmpReturnType = StrictIntegralType<uint32_t>; |
| 142 | |
| 143 | // This implements the semantic of 'memcmp' returning a negative value when 'a' |
| 144 | // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise. |
| 145 | LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) { |
| 146 | // We perform the difference as an int64_t. |
| 147 | const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b); |
| 148 | // For the int64_t to int32_t conversion we want the following properties: |
| 149 | // - int32_t[31:31] == 1 iff diff < 0 |
| 150 | // - int32_t[31:0] == 0 iff diff == 0 |
| 151 | |
| 152 | // We also observe that: |
| 153 | // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0 |
| 154 | // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0 |
| 155 | // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0 |
| 156 | // - https://godbolt.org/z/8W7qWP6e5 |
| 157 | // - This implies that we can only look at diff[32:32] for determining the |
| 158 | // sign bit for the returned int32_t. |
| 159 | |
| 160 | // So, we do the following: |
| 161 | // - int32_t[31:31] = diff[32:32] |
| 162 | // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0. |
| 163 | |
| 164 | // And, we can achieve the above by the expression below. We could have also |
| 165 | // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than |
| 166 | // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1 |
| 167 | return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF)); |
| 168 | } |
| 169 | |
| 170 | // Returns a negative value if 'a' is less than 'b' and a positive value |
| 171 | // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and |
| 172 | // 'b' differ. |
| 173 | LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) { |
| 174 | #if defined(LIBC_TARGET_ARCH_IS_X86) |
| 175 | // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for |
| 176 | // positive and negative value respectively as they are one value apart: |
| 177 | // xor eax, eax <- free |
| 178 | // cmp rdi, rsi <- serializing |
| 179 | // adc eax, 2147483647 <- serializing |
| 180 | |
| 181 | // Unfortunately we found instances of client code that negate the result of |
| 182 | // 'memcmp' to reverse ordering. Because signed integers are not symmetric |
| 183 | // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as |
| 184 | // `-INT_MIN` is not representable as an int32_t. |
| 185 | |
| 186 | // As a consequence, we use 5 and -5 which is still OK nice in terms of |
| 187 | // latency. |
| 188 | // cmp rdi, rsi <- serializing |
| 189 | // mov ecx, -5 <- can be done in parallel |
| 190 | // mov eax, 5 <- can be done in parallel |
| 191 | // cmovb eax, ecx <- serializing |
| 192 | static constexpr int32_t POSITIVE = 5; |
| 193 | static constexpr int32_t NEGATIVE = -5; |
| 194 | #else |
| 195 | // On RISC-V we simply use '1' and '-1' as it leads to branchless code. |
| 196 | // On ARMv8, both strategies lead to the same performance. |
| 197 | static constexpr int32_t POSITIVE = 1; |
| 198 | static constexpr int32_t NEGATIVE = -1; |
| 199 | #endif |
| 200 | static_assert(POSITIVE > 0); |
| 201 | static_assert(NEGATIVE < 0); |
| 202 | return a < b ? NEGATIVE : POSITIVE; |
| 203 | } |
| 204 | |
| 205 | // Loads bytes from memory (possibly unaligned) and materializes them as |
| 206 | // type. |
| 207 | template <typename T> LIBC_INLINE T load(CPtr ptr) { |
| 208 | T out; |
| 209 | memcpy_inline<sizeof(T)>(&out, ptr); |
| 210 | return out; |
| 211 | } |
| 212 | |
| 213 | // Stores a value of type T in memory (possibly unaligned). |
| 214 | template <typename T> LIBC_INLINE void store(Ptr ptr, T value) { |
| 215 | memcpy_inline<sizeof(T)>(ptr, &value); |
| 216 | } |
| 217 | |
| 218 | // On architectures that do not allow for unaligned access we perform several |
| 219 | // aligned accesses and recombine them through shifts and logicals operations. |
| 220 | // For instance, if we know that the pointer is 2-byte aligned we can decompose |
| 221 | // a 64-bit operation into four 16-bit operations. |
| 222 | |
| 223 | // Loads a 'ValueType' by decomposing it into several loads that are assumed to |
| 224 | // be aligned. |
| 225 | // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr); |
| 226 | template <typename ValueType, typename T, typename... TS> |
| 227 | LIBC_INLINE ValueType load_aligned(CPtr src) { |
| 228 | static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); |
| 229 | const ValueType value = load<T>(assume_aligned<sizeof(T)>(src)); |
| 230 | if constexpr (sizeof...(TS) > 0) { |
| 231 | constexpr size_t SHIFT = sizeof(T) * 8; |
| 232 | const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T)); |
| 233 | if constexpr (Endian::IS_LITTLE) |
| 234 | return value | (next << SHIFT); |
| 235 | else if constexpr (Endian::IS_BIG) |
| 236 | return (value << SHIFT) | next; |
| 237 | else |
| 238 | static_assert(cpp::always_false<T>, "Invalid endianness"); |
| 239 | } else { |
| 240 | return value; |
| 241 | } |
| 242 | } |
| 243 | |
| 244 | // Alias for loading a 'uint32_t'. |
| 245 | template <typename T, typename... TS> |
| 246 | LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) { |
| 247 | static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); |
| 248 | return load_aligned<uint32_t, T, TS...>(src + offset); |
| 249 | } |
| 250 | |
| 251 | // Alias for loading a 'uint64_t'. |
| 252 | template <typename T, typename... TS> |
| 253 | LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) { |
| 254 | static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); |
| 255 | return load_aligned<uint64_t, T, TS...>(src + offset); |
| 256 | } |
| 257 | |
| 258 | // Stores a 'ValueType' by decomposing it into several stores that are assumed |
| 259 | // to be aligned. |
| 260 | // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr); |
| 261 | template <typename ValueType, typename T, typename... TS> |
| 262 | LIBC_INLINE void store_aligned(ValueType value, Ptr dst) { |
| 263 | static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); |
| 264 | constexpr size_t SHIFT = sizeof(T) * 8; |
| 265 | if constexpr (Endian::IS_LITTLE) { |
| 266 | store<T>(assume_aligned<sizeof(T)>(dst), T(value & T(~0))); |
| 267 | if constexpr (sizeof...(TS) > 0) |
| 268 | store_aligned<ValueType, TS...>(value >> SHIFT, dst + sizeof(T)); |
| 269 | } else if constexpr (Endian::IS_BIG) { |
| 270 | constexpr size_t OFFSET = (0 + ... + sizeof(TS)); |
| 271 | store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0)); |
| 272 | if constexpr (sizeof...(TS) > 0) |
| 273 | store_aligned<ValueType, TS...>(value >> SHIFT, dst); |
| 274 | } else { |
| 275 | static_assert(cpp::always_false<T>, "Invalid endianness"); |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | // Alias for storing a 'uint32_t'. |
| 280 | template <typename T, typename... TS> |
| 281 | LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) { |
| 282 | static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t)); |
| 283 | store_aligned<uint32_t, T, TS...>(value, dst + offset); |
| 284 | } |
| 285 | |
| 286 | // Alias for storing a 'uint64_t'. |
| 287 | template <typename T, typename... TS> |
| 288 | LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) { |
| 289 | static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t)); |
| 290 | store_aligned<uint64_t, T, TS...>(value, dst + offset); |
| 291 | } |
| 292 | |
| 293 | // Advances the pointers p1 and p2 by offset bytes and decrease count by the |
| 294 | // same amount. |
| 295 | template <typename T1, typename T2> |
| 296 | LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1, |
| 297 | T2 *__restrict &p2, size_t &count) { |
| 298 | p1 += offset; |
| 299 | p2 += offset; |
| 300 | count -= static_cast<size_t>(offset); |
| 301 | } |
| 302 | |
| 303 | // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary |
| 304 | // and decrease count by the same amount. |
| 305 | // We make sure the compiler knows about the adjusted pointer alignment. |
| 306 | template <size_t SIZE, typename T1, typename T2> |
| 307 | void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2, |
| 308 | size_t &count) { |
| 309 | adjust(static_cast<ptrdiff_t>(distance_to_next_aligned<SIZE>(p1)), p1, p2, |
| 310 | count); |
| 311 | p1 = assume_aligned<SIZE>(p1); |
| 312 | } |
| 313 | |
| 314 | // Same as align_p1_to_next_boundary above but with a single pointer instead. |
| 315 | template <size_t SIZE, typename T> |
| 316 | LIBC_INLINE void align_to_next_boundary(T *&p1, size_t &count) { |
| 317 | const T *dummy = p1; |
| 318 | align_p1_to_next_boundary<SIZE>(p1, dummy, count); |
| 319 | } |
| 320 | |
| 321 | // An enum class that discriminates between the first and second pointer. |
| 322 | enum class Arg { P1, P2, Dst = P1, Src = P2 }; |
| 323 | |
| 324 | // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1. |
| 325 | // Precondition: &p1 != &p2 |
| 326 | template <size_t SIZE, Arg AlignOn, typename T1, typename T2> |
| 327 | LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2, |
| 328 | size_t &count) { |
| 329 | if constexpr (AlignOn == Arg::P1) |
| 330 | align_p1_to_next_boundary<SIZE>(p1, p2, count); |
| 331 | else if constexpr (AlignOn == Arg::P2) |
| 332 | align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2. |
| 333 | else |
| 334 | static_assert(cpp::always_false<T1>, |
| 335 | "AlignOn must be either Arg::P1 or Arg::P2"); |
| 336 | } |
| 337 | |
| 338 | template <size_t SIZE> struct AlignHelper { |
| 339 | LIBC_INLINE AlignHelper(CPtr ptr) |
| 340 | : offset(distance_to_next_aligned<SIZE>(ptr)) {} |
| 341 | |
| 342 | LIBC_INLINE bool not_aligned() const { return offset != SIZE; } |
| 343 | uintptr_t offset; |
| 344 | }; |
| 345 | |
| 346 | LIBC_INLINE void prefetch_for_write(CPtr dst) { |
| 347 | __builtin_prefetch(dst, /*write*/ 1, /*max locality*/ 3); |
| 348 | } |
| 349 | |
| 350 | LIBC_INLINE void prefetch_to_local_cache(CPtr dst) { |
| 351 | __builtin_prefetch(dst, /*read*/ 0, /*max locality*/ 3); |
| 352 | } |
| 353 | |
| 354 | } // namespace LIBC_NAMESPACE_DECL |
| 355 | |
| 356 | #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H |
| 357 |
Warning: This file is not a C or C++ file. It does not have highlighting.
