1//===---------------- Implementation of GPU utils ---------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
10#define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
11
12#include "src/__support/macros/properties/architectures.h"
13
14#if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
15#include "amdgpu/utils.h"
16#elif defined(LIBC_TARGET_ARCH_IS_NVPTX)
17#include "nvptx/utils.h"
18#else
19#include "generic/utils.h"
20#endif
21
22namespace LIBC_NAMESPACE {
23namespace gpu {
24/// Get the first active thread inside the lane.
25LIBC_INLINE uint64_t get_first_lane_id(uint64_t lane_mask) {
26 return __builtin_ffsll(lane_mask) - 1;
27}
28
29/// Conditional that is only true for a single thread in a lane.
30LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
31 return gpu::get_lane_id() == get_first_lane_id(lane_mask);
32}
33
34/// Gets the sum of all lanes inside the warp or wavefront.
35LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
36 for (uint32_t step = gpu::get_lane_size() / 2; step > 0; step /= 2) {
37 uint32_t index = step + gpu::get_lane_id();
38 x += gpu::shuffle(lane_mask, index, x);
39 }
40 return gpu::broadcast_value(lane_mask, x);
41}
42
43/// Gets the accumulator scan of the threads in the warp or wavefront.
44LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
45 for (uint32_t step = 1; step < gpu::get_lane_size(); step *= 2) {
46 uint32_t index = gpu::get_lane_id() - step;
47 uint32_t bitmask = gpu::get_lane_id() >= step;
48 x += -bitmask & gpu::shuffle(lane_mask, index, x);
49 }
50 return x;
51}
52
53} // namespace gpu
54} // namespace LIBC_NAMESPACE
55
56#endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
57

source code of libc/src/__support/GPU/utils.h