1 | //===---------------- Implementation of GPU utils ---------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
11 | |
12 | #include "src/__support/macros/properties/architectures.h" |
13 | |
14 | #if defined(LIBC_TARGET_ARCH_IS_AMDGPU) |
15 | #include "amdgpu/utils.h" |
16 | #elif defined(LIBC_TARGET_ARCH_IS_NVPTX) |
17 | #include "nvptx/utils.h" |
18 | #else |
19 | #include "generic/utils.h" |
20 | #endif |
21 | |
22 | namespace LIBC_NAMESPACE { |
23 | namespace gpu { |
24 | /// Get the first active thread inside the lane. |
25 | LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) { |
26 | return __builtin_ffsll(lane_mask) - 1; |
27 | } |
28 | |
29 | /// Conditional that is only true for a single thread in a lane. |
30 | LIBC_INLINE bool is_first_lane(uint64_t lane_mask) { |
31 | return gpu::get_lane_id() == get_first_lane_id(lane_mask); |
32 | } |
33 | |
34 | /// Gets the sum of all lanes inside the warp or wavefront. |
35 | LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) { |
36 | for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) { |
37 | uint32_t index = step + gpu::get_lane_id(); |
38 | x += gpu::shuffle(lane_mask, index, x); |
39 | } |
40 | return gpu::broadcast_value(lane_mask, x); |
41 | } |
42 | |
43 | /// Gets the accumulator scan of the threads in the warp or wavefront. |
44 | LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) { |
45 | for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) { |
46 | uint32_t index = gpu::get_lane_id() - step; |
47 | uint32_t bitmask = gpu::get_lane_id() >= step; |
48 | x += -bitmask & gpu::shuffle(lane_mask, index, x); |
49 | } |
50 | return x; |
51 | } |
52 | |
53 | } // namespace gpu |
54 | } // namespace LIBC_NAMESPACE |
55 | |
56 | #endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H |
57 |