Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===---------------- Implementation of GPU utils ---------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
11 | |
12 | #include "src/__support/macros/attributes.h" |
13 | #include "src/__support/macros/config.h" |
14 | #include "src/__support/macros/properties/architectures.h" |
15 | |
16 | #if !__has_include(<gpuintrin.h>) |
17 | #error "Unsupported compiler" |
18 | #endif |
19 | |
20 | #include <gpuintrin.h> |
21 | |
22 | namespace LIBC_NAMESPACE_DECL { |
23 | namespace gpu { |
24 | |
25 | template <typename T> using Private = __gpu_private T; |
26 | template <typename T> using Constant = __gpu_constant T; |
27 | template <typename T> using Local = __gpu_local T; |
28 | template <typename T> using Global = __gpu_local T; |
29 | |
30 | LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); } |
31 | |
32 | LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); } |
33 | |
34 | LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); } |
35 | |
36 | LIBC_INLINE uint64_t get_num_blocks() { |
37 | return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z(); |
38 | } |
39 | |
40 | LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); } |
41 | |
42 | LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); } |
43 | |
44 | LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); } |
45 | |
46 | LIBC_INLINE uint64_t get_block_id() { |
47 | return get_block_id_x() + get_num_blocks_x() * get_block_id_y() + |
48 | get_num_blocks_x() * get_num_blocks_y() * get_block_id_z(); |
49 | } |
50 | |
51 | LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); } |
52 | |
53 | LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); } |
54 | |
55 | LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); } |
56 | |
57 | LIBC_INLINE uint64_t get_num_threads() { |
58 | return get_num_threads_x() * get_num_threads_y() * get_num_threads_z(); |
59 | } |
60 | |
61 | LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); } |
62 | |
63 | LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); } |
64 | |
65 | LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); } |
66 | |
67 | LIBC_INLINE uint64_t get_thread_id() { |
68 | return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() + |
69 | get_num_threads_x() * get_num_threads_y() * get_thread_id_z(); |
70 | } |
71 | |
72 | LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); } |
73 | |
74 | LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); } |
75 | |
76 | LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); } |
77 | |
78 | LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) { |
79 | return __gpu_read_first_lane_u32(lane_mask, x); |
80 | } |
81 | |
82 | LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) { |
83 | return __gpu_ballot(lane_mask, x); |
84 | } |
85 | |
86 | LIBC_INLINE void sync_threads() { __gpu_sync_threads(); } |
87 | |
88 | LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); } |
89 | |
90 | LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x, |
91 | uint32_t width = __gpu_num_lanes()) { |
92 | return __gpu_shuffle_idx_u32(lane_mask, idx, x, width); |
93 | } |
94 | |
95 | LIBC_INLINE uint64_t shuffle(uint64_t lane_mask, uint32_t idx, uint64_t x, |
96 | uint32_t width = __gpu_num_lanes()) { |
97 | return __gpu_shuffle_idx_u64(lane_mask, idx, x, width); |
98 | } |
99 | |
100 | template <typename T> |
101 | LIBC_INLINE T *shuffle(uint64_t lane_mask, uint32_t idx, T *x, |
102 | uint32_t width = __gpu_num_lanes()) { |
103 | return reinterpret_cast<T *>(__gpu_shuffle_idx_u64( |
104 | lane_mask, idx, reinterpret_cast<uintptr_t>(x), width)); |
105 | } |
106 | |
107 | LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) { |
108 | return __gpu_match_any_u32(lane_mask, x); |
109 | } |
110 | |
111 | LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) { |
112 | return __gpu_match_all_u32(lane_mask, x); |
113 | } |
114 | |
115 | [[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); } |
116 | |
117 | LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { |
118 | return __gpu_is_first_in_lane(lane_mask); |
119 | } |
120 | |
121 | LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { |
122 | return __gpu_lane_sum_u32(lane_mask, x); |
123 | } |
124 | |
125 | LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { |
126 | return __gpu_lane_scan_u32(lane_mask, x); |
127 | } |
128 | |
129 | LIBC_INLINE uint64_t fixed_frequency_clock() { |
130 | return __builtin_readsteadycounter(); |
131 | } |
132 | |
133 | LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); } |
134 | |
135 | } // namespace gpu |
136 | } // namespace LIBC_NAMESPACE_DECL |
137 | |
138 | #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
139 |
Warning: This file is not a C or C++ file. It does not have highlighting.