1//===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#include "Utils.h"
13
14#include "Debug.h"
15#include "Interface.h"
16#include "Mapping.h"
17
18#pragma omp begin declare target device_type(nohost)
19
20using namespace ompx;
21
22namespace impl {
23
24bool isSharedMemPtr(const void *Ptr) { return false; }
25
26void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
27 static_assert(sizeof(unsigned long) == 8, "");
28 *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL);
29 *HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32);
30}
31
32uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
33 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
34}
35
36int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
37int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
38 int32_t Width);
39
40/// AMDGCN Implementation
41///
42///{
43#pragma omp begin declare variant match(device = {arch(amdgcn)})
44
45int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
46 int Width = mapping::getWarpSize();
47 int Self = mapping::getThreadIdInWarp();
48 int Index = SrcLane + (Self & ~(Width - 1));
49 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
50}
51
52int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
53 int32_t Width) {
54 int Self = mapping::getThreadIdInWarp();
55 int Index = Self + LaneDelta;
56 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
57 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
58}
59
60bool isSharedMemPtr(const void *Ptr) {
61 return __builtin_amdgcn_is_shared(
62 (const __attribute__((address_space(0))) void *)Ptr);
63}
64#pragma omp end declare variant
65///}
66
67/// NVPTX Implementation
68///
69///{
70#pragma omp begin declare variant match( \
71 device = {arch(nvptx, nvptx64)}, \
72 implementation = {extension(match_any)})
73
74int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
75 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
76}
77
78int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
79 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
80 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
81}
82
83bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
84
85#pragma omp end declare variant
86///}
87} // namespace impl
88
89uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
90 return impl::Pack(LowBits, HighBits);
91}
92
93void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
94 impl::Unpack(Val, &LowBits, &HighBits);
95}
96
97int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
98 return impl::shuffle(Mask, Var, SrcLane);
99}
100
101int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
102 int32_t Width) {
103 return impl::shuffleDown(Mask, Var, Delta, Width);
104}
105
106bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
107
108extern "C" {
109int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
110 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
111}
112
113int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
114 uint32_t lo, hi;
115 utils::unpack(Val, lo, hi);
116 hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
117 lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
118 return utils::pack(lo, hi);
119}
120}
121
122#pragma omp end declare target
123

source code of offload/DeviceRTL/src/Utils.cpp