| 1 | //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // |
| 10 | //===----------------------------------------------------------------------===// |
| 11 | |
| 12 | #include "DeviceUtils.h" |
| 13 | |
| 14 | #include "Debug.h" |
| 15 | #include "Interface.h" |
| 16 | #include "Mapping.h" |
| 17 | #include "gpuintrin.h" |
| 18 | |
| 19 | using namespace ompx; |
| 20 | |
| 21 | uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { |
| 22 | return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; |
| 23 | } |
| 24 | |
| 25 | void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { |
| 26 | static_assert(sizeof(unsigned long) == 8, "" ); |
| 27 | LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL); |
| 28 | HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32); |
| 29 | } |
| 30 | |
| 31 | int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, |
| 32 | int32_t Width) { |
| 33 | return __gpu_shuffle_idx_u32(Mask, SrcLane, Var, Width); |
| 34 | } |
| 35 | |
| 36 | int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, |
| 37 | int32_t Width) { |
| 38 | int32_t Self = mapping::getThreadIdInWarp(); |
| 39 | int32_t Index = (Delta + (Self & (Width - 1))) >= Width ? Self : Self + Delta; |
| 40 | return __gpu_shuffle_idx_u64(Mask, Index, Var, Width); |
| 41 | } |
| 42 | |
| 43 | int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, |
| 44 | int32_t Width) { |
| 45 | int32_t Self = mapping::getThreadIdInWarp(); |
| 46 | int32_t Index = (Delta + (Self & (Width - 1))) >= Width ? Self : Self + Delta; |
| 47 | return __gpu_shuffle_idx_u64(Mask, Index, Var, Width); |
| 48 | } |
| 49 | |
| 50 | uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) { |
| 51 | return __gpu_ballot(Mask, Pred); |
| 52 | } |
| 53 | |
| 54 | bool utils::isSharedMemPtr(void *Ptr) { return __gpu_is_ptr_local(Ptr); } |
| 55 | |
| 56 | extern "C" { |
| 57 | int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { |
| 58 | return utils::shuffleDown(lanes::All, Val, Delta, SrcLane); |
| 59 | } |
| 60 | |
| 61 | int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { |
| 62 | return utils::shuffleDown(lanes::All, Val, Delta, Width); |
| 63 | } |
| 64 | } |
| 65 | |