1 | //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #include "Utils.h" |
13 | |
14 | #include "Debug.h" |
15 | #include "Interface.h" |
16 | #include "Mapping.h" |
17 | |
18 | #pragma omp begin declare target device_type(nohost) |
19 | |
20 | using namespace ompx; |
21 | |
22 | namespace impl { |
23 | |
24 | bool isSharedMemPtr(const void *Ptr) { return false; } |
25 | |
26 | void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { |
27 | static_assert(sizeof(unsigned long) == 8, "" ); |
28 | *LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL); |
29 | *HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32); |
30 | } |
31 | |
32 | uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { |
33 | return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; |
34 | } |
35 | |
36 | int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane); |
37 | int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, |
38 | int32_t Width); |
39 | |
40 | /// AMDGCN Implementation |
41 | /// |
42 | ///{ |
43 | #pragma omp begin declare variant match(device = {arch(amdgcn)}) |
44 | |
45 | int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { |
46 | int Width = mapping::getWarpSize(); |
47 | int Self = mapping::getThreadIdInWarp(); |
48 | int Index = SrcLane + (Self & ~(Width - 1)); |
49 | return __builtin_amdgcn_ds_bpermute(Index << 2, Var); |
50 | } |
51 | |
52 | int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, |
53 | int32_t Width) { |
54 | int Self = mapping::getThreadIdInWarp(); |
55 | int Index = Self + LaneDelta; |
56 | Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; |
57 | return __builtin_amdgcn_ds_bpermute(Index << 2, Var); |
58 | } |
59 | |
60 | bool isSharedMemPtr(const void *Ptr) { |
61 | return __builtin_amdgcn_is_shared( |
62 | (const __attribute__((address_space(0))) void *)Ptr); |
63 | } |
64 | #pragma omp end declare variant |
65 | ///} |
66 | |
67 | /// NVPTX Implementation |
68 | /// |
69 | ///{ |
70 | #pragma omp begin declare variant match( \ |
71 | device = {arch(nvptx, nvptx64)}, \ |
72 | implementation = {extension(match_any)}) |
73 | |
74 | int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { |
75 | return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f); |
76 | } |
77 | |
78 | int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { |
79 | int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; |
80 | return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); |
81 | } |
82 | |
83 | bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); } |
84 | |
85 | #pragma omp end declare variant |
86 | ///} |
87 | } // namespace impl |
88 | |
89 | uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { |
90 | return impl::Pack(LowBits, HighBits); |
91 | } |
92 | |
93 | void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { |
94 | impl::Unpack(Val, &LowBits, &HighBits); |
95 | } |
96 | |
97 | int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) { |
98 | return impl::shuffle(Mask, Var, SrcLane); |
99 | } |
100 | |
101 | int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, |
102 | int32_t Width) { |
103 | return impl::shuffleDown(Mask, Var, Delta, Width); |
104 | } |
105 | |
106 | bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } |
107 | |
108 | extern "C" { |
109 | int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { |
110 | return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); |
111 | } |
112 | |
113 | int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { |
114 | uint32_t lo, hi; |
115 | utils::unpack(Val, lo, hi); |
116 | hi = impl::shuffleDown(lanes::All, hi, Delta, Width); |
117 | lo = impl::shuffleDown(lanes::All, lo, Delta, Width); |
118 | return utils::pack(lo, hi); |
119 | } |
120 | } |
121 | |
122 | #pragma omp end declare target |
123 | |