1 | //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H |
11 | |
12 | #include "src/__support/CPP/type_traits.h" |
13 | #include "src/__support/GPU/utils.h" |
14 | #include "src/__support/macros/attributes.h" |
15 | #include "src/__support/macros/properties/architectures.h" |
16 | #include "src/__support/threads/sleep.h" |
17 | #include "src/string/memory_utils/generic/byte_per_byte.h" |
18 | #include "src/string/memory_utils/inline_memcpy.h" |
19 | |
20 | namespace LIBC_NAMESPACE { |
21 | namespace rpc { |
22 | |
23 | /// Conditional to indicate if this process is running on the GPU. |
24 | LIBC_INLINE constexpr bool is_process_gpu() { |
25 | #if defined(LIBC_TARGET_ARCH_IS_GPU) |
26 | return true; |
27 | #else |
28 | return false; |
29 | #endif |
30 | } |
31 | |
32 | /// Return \p val aligned "upwards" according to \p align. |
33 | template <typename V, typename A> |
34 | LIBC_INLINE constexpr V align_up(V val, A align) { |
35 | return ((val + V(align) - 1) / V(align)) * V(align); |
36 | } |
37 | |
38 | /// Utility to provide a unified interface between the CPU and GPU's memory |
39 | /// model. On the GPU stack variables are always private to a lane so we can |
40 | /// simply use the variable passed in. On the CPU we need to allocate enough |
41 | /// space for the whole lane and index into it. |
42 | template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) { |
43 | if constexpr (is_process_gpu()) |
44 | return *val; |
45 | return val[id]; |
46 | } |
47 | |
48 | /// Advance the \p p by \p bytes. |
49 | template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) { |
50 | if constexpr (cpp::is_const_v<T>) |
51 | return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) + |
52 | bytes); |
53 | else |
54 | return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes); |
55 | } |
56 | |
57 | /// Wrapper around the optimal memory copy implementation for the target. |
58 | LIBC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) { |
59 | // The built-in memcpy prefers to fully unroll loops. We want to minimize |
60 | // resource usage so we use a single nounroll loop implementation. |
61 | #if defined(LIBC_TARGET_ARCH_IS_AMDGPU) |
62 | inline_memcpy_byte_per_byte(reinterpret_cast<Ptr>(dst), |
63 | reinterpret_cast<CPtr>(src), count); |
64 | #else |
65 | inline_memcpy(dst, src, count); |
66 | #endif |
67 | } |
68 | |
69 | } // namespace rpc |
70 | } // namespace LIBC_NAMESPACE |
71 | |
72 | #endif // LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTIL_H |
73 | |