| 1 | //===----------- api.cpp - Target independent OpenMP target RTL -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Implementation of OpenMP API interface functions. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "PluginManager.h" |
| 14 | #include "device.h" |
| 15 | #include "omptarget.h" |
| 16 | #include "rtl.h" |
| 17 | |
| 18 | #include "OpenMP/InternalTypes.h" |
| 19 | #include "OpenMP/Mapping.h" |
| 20 | #include "OpenMP/OMPT/Interface.h" |
| 21 | #include "OpenMP/omp.h" |
| 22 | #include "Shared/Profile.h" |
| 23 | |
| 24 | #include "llvm/ADT/SmallVector.h" |
| 25 | |
| 26 | #include <climits> |
| 27 | #include <cstdlib> |
| 28 | #include <cstring> |
| 29 | #include <mutex> |
| 30 | |
| 31 | EXTERN void ompx_dump_mapping_tables() { |
| 32 | ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;" }; |
| 33 | auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor(); |
| 34 | for (auto &Device : PM->devices(ExclusiveDevicesAccessor)) |
| 35 | dumpTargetPointerMappings(&Loc, Device, true); |
| 36 | } |
| 37 | |
| 38 | #ifdef OMPT_SUPPORT |
| 39 | using namespace llvm::omp::target::ompt; |
| 40 | #endif |
| 41 | |
| 42 | void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, |
| 43 | const char *Name); |
| 44 | void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, |
| 45 | const char *Name); |
| 46 | void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, |
| 47 | const char *Name); |
| 48 | void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name); |
| 49 | |
| 50 | // Implemented in libomp, they are called from within __tgt_* functions. |
| 51 | extern "C" { |
| 52 | int __kmpc_get_target_offload(void) __attribute__((weak)); |
| 53 | kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, |
| 54 | size_t sizeof_kmp_task_t, |
| 55 | size_t sizeof_shareds, |
| 56 | kmp_routine_entry_t task_entry) |
| 57 | __attribute__((weak)); |
| 58 | |
| 59 | kmp_task_t * |
| 60 | __kmpc_omp_target_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, |
| 61 | size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
| 62 | kmp_routine_entry_t task_entry, int64_t device_id) |
| 63 | __attribute__((weak)); |
| 64 | |
| 65 | int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid, |
| 66 | kmp_task_t *new_task, int32_t ndeps, |
| 67 | kmp_depend_info_t *dep_list, |
| 68 | int32_t ndeps_noalias, |
| 69 | kmp_depend_info_t *noalias_dep_list) |
| 70 | __attribute__((weak)); |
| 71 | } |
| 72 | |
| 73 | EXTERN int omp_get_num_devices(void) { |
| 74 | TIMESCOPE(); |
| 75 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 76 | size_t NumDevices = PM->getNumDevices(); |
| 77 | |
| 78 | DP("Call to omp_get_num_devices returning %zd\n" , NumDevices); |
| 79 | |
| 80 | return NumDevices; |
| 81 | } |
| 82 | |
| 83 | EXTERN int omp_get_device_num(void) { |
| 84 | TIMESCOPE(); |
| 85 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 86 | int HostDevice = omp_get_initial_device(); |
| 87 | |
| 88 | DP("Call to omp_get_device_num returning %d\n" , HostDevice); |
| 89 | |
| 90 | return HostDevice; |
| 91 | } |
| 92 | |
| 93 | EXTERN int omp_get_initial_device(void) { |
| 94 | TIMESCOPE(); |
| 95 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 96 | int HostDevice = omp_get_num_devices(); |
| 97 | DP("Call to omp_get_initial_device returning %d\n" , HostDevice); |
| 98 | return HostDevice; |
| 99 | } |
| 100 | |
| 101 | EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { |
| 102 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DeviceNum) + |
| 103 | ";size=" + std::to_string(val: Size)); |
| 104 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 105 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); |
| 106 | } |
| 107 | |
| 108 | EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) { |
| 109 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 110 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__); |
| 111 | } |
| 112 | |
| 113 | EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) { |
| 114 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 115 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__); |
| 116 | } |
| 117 | |
| 118 | EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { |
| 119 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 120 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__); |
| 121 | } |
| 122 | |
| 123 | EXTERN void omp_target_free(void *Ptr, int DeviceNum) { |
| 124 | TIMESCOPE(); |
| 125 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 126 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); |
| 127 | } |
| 128 | |
| 129 | EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) { |
| 130 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 131 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__); |
| 132 | } |
| 133 | |
| 134 | EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) { |
| 135 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 136 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__); |
| 137 | } |
| 138 | |
| 139 | EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) { |
| 140 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 141 | return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__); |
| 142 | } |
| 143 | |
| 144 | EXTERN void *llvm_omp_target_dynamic_shared_alloc() { |
| 145 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 146 | return nullptr; |
| 147 | } |
| 148 | |
| 149 | EXTERN void *llvm_omp_get_dynamic_shared() { |
| 150 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 151 | return nullptr; |
| 152 | } |
| 153 | |
| 154 | EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size, |
| 155 | int DeviceNum) { |
| 156 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 157 | return targetLockExplicit(HostPtr: Ptr, Size, DeviceNum, Name: __func__); |
| 158 | } |
| 159 | |
| 160 | EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) { |
| 161 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 162 | targetUnlockExplicit(HostPtr: Ptr, DeviceNum, Name: __func__); |
| 163 | } |
| 164 | |
| 165 | EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { |
| 166 | TIMESCOPE(); |
| 167 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 168 | DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n" , |
| 169 | DeviceNum, DPxPTR(Ptr)); |
| 170 | |
| 171 | if (!Ptr) { |
| 172 | DP("Call to omp_target_is_present with NULL ptr, returning false\n" ); |
| 173 | return false; |
| 174 | } |
| 175 | |
| 176 | if (DeviceNum == omp_get_initial_device()) { |
| 177 | DP("Call to omp_target_is_present on host, returning true\n" ); |
| 178 | return true; |
| 179 | } |
| 180 | |
| 181 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
| 182 | if (!DeviceOrErr) |
| 183 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
| 184 | |
| 185 | // omp_target_is_present tests whether a host pointer refers to storage that |
| 186 | // is mapped to a given device. However, due to the lack of the storage size, |
| 187 | // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero |
| 188 | // length array) is mapped instead of the referred storage. |
| 189 | TargetPointerResultTy TPR = |
| 190 | DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, |
| 191 | /*UpdateRefCount=*/false, |
| 192 | /*UseHoldRefCount=*/false); |
| 193 | int Rc = TPR.isPresent(); |
| 194 | DP("Call to omp_target_is_present returns %d\n" , Rc); |
| 195 | return Rc; |
| 196 | } |
| 197 | |
| 198 | EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, |
| 199 | size_t DstOffset, size_t SrcOffset, int DstDevice, |
| 200 | int SrcDevice) { |
| 201 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
| 202 | ";src_dev=" + std::to_string(val: SrcDevice) + |
| 203 | ";size=" + std::to_string(val: Length)); |
| 204 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 205 | DP("Call to omp_target_memcpy, dst device %d, src device %d, " |
| 206 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " |
| 207 | "src offset %zu, length %zu\n" , |
| 208 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, |
| 209 | Length); |
| 210 | |
| 211 | if (!Dst || !Src || Length <= 0) { |
| 212 | if (Length == 0) { |
| 213 | DP("Call to omp_target_memcpy with zero length, nothing to do\n" ); |
| 214 | return OFFLOAD_SUCCESS; |
| 215 | } |
| 216 | |
| 217 | REPORT("Call to omp_target_memcpy with invalid arguments\n" ); |
| 218 | return OFFLOAD_FAIL; |
| 219 | } |
| 220 | |
| 221 | int Rc = OFFLOAD_SUCCESS; |
| 222 | void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset; |
| 223 | void *DstAddr = (char *)Dst + DstOffset; |
| 224 | |
| 225 | if (SrcDevice == omp_get_initial_device() && |
| 226 | DstDevice == omp_get_initial_device()) { |
| 227 | DP("copy from host to host\n" ); |
| 228 | const void *P = memcpy(dest: DstAddr, src: SrcAddr, n: Length); |
| 229 | if (P == NULL) |
| 230 | Rc = OFFLOAD_FAIL; |
| 231 | } else if (SrcDevice == omp_get_initial_device()) { |
| 232 | DP("copy from host to device\n" ); |
| 233 | auto DstDeviceOrErr = PM->getDevice(DstDevice); |
| 234 | if (!DstDeviceOrErr) |
| 235 | FATAL_MESSAGE(DstDevice, "%s" , |
| 236 | toString(DstDeviceOrErr.takeError()).c_str()); |
| 237 | AsyncInfoTy AsyncInfo(*DstDeviceOrErr); |
| 238 | Rc = DstDeviceOrErr->submitData(DstAddr, SrcAddr, Length, AsyncInfo); |
| 239 | } else if (DstDevice == omp_get_initial_device()) { |
| 240 | DP("copy from device to host\n" ); |
| 241 | auto SrcDeviceOrErr = PM->getDevice(SrcDevice); |
| 242 | if (!SrcDeviceOrErr) |
| 243 | FATAL_MESSAGE(SrcDevice, "%s" , |
| 244 | toString(SrcDeviceOrErr.takeError()).c_str()); |
| 245 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
| 246 | Rc = SrcDeviceOrErr->retrieveData(DstAddr, SrcAddr, Length, AsyncInfo); |
| 247 | } else { |
| 248 | DP("copy from device to device\n" ); |
| 249 | auto SrcDeviceOrErr = PM->getDevice(SrcDevice); |
| 250 | if (!SrcDeviceOrErr) |
| 251 | FATAL_MESSAGE(SrcDevice, "%s" , |
| 252 | toString(SrcDeviceOrErr.takeError()).c_str()); |
| 253 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
| 254 | auto DstDeviceOrErr = PM->getDevice(DstDevice); |
| 255 | if (!DstDeviceOrErr) |
| 256 | FATAL_MESSAGE(DstDevice, "%s" , |
| 257 | toString(DstDeviceOrErr.takeError()).c_str()); |
| 258 | // First try to use D2D memcpy which is more efficient. If fails, fall back |
| 259 | // to inefficient way. |
| 260 | if (SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) { |
| 261 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
| 262 | Rc = SrcDeviceOrErr->dataExchange(SrcAddr, *DstDeviceOrErr, DstAddr, |
| 263 | Length, AsyncInfo); |
| 264 | if (Rc == OFFLOAD_SUCCESS) |
| 265 | return OFFLOAD_SUCCESS; |
| 266 | } |
| 267 | |
| 268 | void *Buffer = malloc(size: Length); |
| 269 | { |
| 270 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
| 271 | Rc = SrcDeviceOrErr->retrieveData(Buffer, SrcAddr, Length, AsyncInfo); |
| 272 | } |
| 273 | if (Rc == OFFLOAD_SUCCESS) { |
| 274 | AsyncInfoTy AsyncInfo(*DstDeviceOrErr); |
| 275 | Rc = DstDeviceOrErr->submitData(DstAddr, Buffer, Length, AsyncInfo); |
| 276 | } |
| 277 | free(ptr: Buffer); |
| 278 | } |
| 279 | |
| 280 | DP("omp_target_memcpy returns %d\n" , Rc); |
| 281 | return Rc; |
| 282 | } |
| 283 | |
| 284 | // The helper function that calls omp_target_memcpy or omp_target_memcpy_rect |
| 285 | static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { |
| 286 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 287 | if (Task == nullptr) |
| 288 | return OFFLOAD_FAIL; |
| 289 | |
| 290 | TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds; |
| 291 | |
| 292 | if (Args == nullptr) |
| 293 | return OFFLOAD_FAIL; |
| 294 | |
| 295 | // Call blocked version |
| 296 | int Rc = OFFLOAD_SUCCESS; |
| 297 | if (Args->IsRectMemcpy) { |
| 298 | Rc = omp_target_memcpy_rect( |
| 299 | Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume, |
| 300 | Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions, |
| 301 | Args->SrcDimensions, Args->DstDevice, Args->SrcDevice); |
| 302 | |
| 303 | DP("omp_target_memcpy_rect returns %d\n" , Rc); |
| 304 | } else { |
| 305 | Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset, |
| 306 | Args->SrcOffset, Args->DstDevice, Args->SrcDevice); |
| 307 | |
| 308 | DP("omp_target_memcpy returns %d\n" , Rc); |
| 309 | } |
| 310 | |
| 311 | // Release the arguments object |
| 312 | delete Args; |
| 313 | |
| 314 | return Rc; |
| 315 | } |
| 316 | |
| 317 | static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) { |
| 318 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 319 | if (!Task) |
| 320 | return OFFLOAD_FAIL; |
| 321 | |
| 322 | auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds); |
| 323 | if (!Args) |
| 324 | return OFFLOAD_FAIL; |
| 325 | |
| 326 | // call omp_target_memset() |
| 327 | omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum); |
| 328 | |
| 329 | delete Args; |
| 330 | |
| 331 | return OFFLOAD_SUCCESS; |
| 332 | } |
| 333 | |
| 334 | static inline void |
| 335 | convertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount, |
| 336 | omp_depend_t *DepObjList) { |
| 337 | for (int i = 0; i < DepObjCount; ++i) { |
| 338 | omp_depend_t DepObj = DepObjList[i]; |
| 339 | Vec.push_back(*((kmp_depend_info_t *)DepObj)); |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | template <class T> |
| 344 | static inline int |
| 345 | libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *), |
| 346 | int DepObjCount, omp_depend_t *DepObjList) { |
| 347 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 348 | // Create global thread ID |
| 349 | int Gtid = __kmpc_global_thread_num(nullptr); |
| 350 | |
| 351 | // Setup the hidden helper flags |
| 352 | int32_t Flags = 0; |
| 353 | kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags; |
| 354 | InputFlags->hidden_helper = 1; |
| 355 | |
| 356 | // Alloc the helper task |
| 357 | kmp_task_t *Task = __kmpc_omp_target_task_alloc( |
| 358 | nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1); |
| 359 | if (!Task) { |
| 360 | delete Args; |
| 361 | return OFFLOAD_FAIL; |
| 362 | } |
| 363 | |
| 364 | // Setup the arguments for the helper task |
| 365 | Task->shareds = Args; |
| 366 | |
| 367 | // Convert types of depend objects |
| 368 | llvm::SmallVector<kmp_depend_info_t> DepObjs; |
| 369 | convertDepObjVector(DepObjs, DepObjCount, DepObjList); |
| 370 | |
| 371 | // Launch the helper task |
| 372 | int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount, |
| 373 | DepObjs.data(), 0, nullptr); |
| 374 | |
| 375 | return Rc; |
| 376 | } |
| 377 | |
| 378 | EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, |
| 379 | int DeviceNum) { |
| 380 | TIMESCOPE(); |
| 381 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 382 | DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n" , |
| 383 | DeviceNum, Ptr, NumBytes); |
| 384 | |
| 385 | // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation |
| 386 | // of unspecified behavior, see OpenMP spec). |
| 387 | if (!Ptr || NumBytes == 0) { |
| 388 | return Ptr; |
| 389 | } |
| 390 | |
| 391 | if (DeviceNum == omp_get_initial_device()) { |
| 392 | DP("filling memory on host via memset" ); |
| 393 | memset(s: Ptr, c: ByteVal, n: NumBytes); // ignore return value, memset() cannot fail |
| 394 | } else { |
| 395 | // TODO: replace the omp_target_memset() slow path with the fast path. |
| 396 | // That will require the ability to execute a kernel from within |
| 397 | // libomptarget.so (which we do not have at the moment). |
| 398 | |
| 399 | // This is a very slow path: create a filled array on the host and upload |
| 400 | // it to the GPU device. |
| 401 | int InitialDevice = omp_get_initial_device(); |
| 402 | void *Shadow = omp_target_alloc(NumBytes, InitialDevice); |
| 403 | if (Shadow) { |
| 404 | (void)memset(s: Shadow, c: ByteVal, n: NumBytes); |
| 405 | (void)omp_target_memcpy(Ptr, Shadow, NumBytes, 0, 0, DeviceNum, |
| 406 | InitialDevice); |
| 407 | (void)omp_target_free(Shadow, InitialDevice); |
| 408 | } else { |
| 409 | // If the omp_target_alloc has failed, let's just not do anything. |
| 410 | // omp_target_memset does not have any good way to fail, so we |
| 411 | // simply avoid a catastrophic failure of the process for now. |
| 412 | DP("omp_target_memset failed to fill memory due to error with " |
| 413 | "omp_target_alloc" ); |
| 414 | } |
| 415 | } |
| 416 | |
| 417 | DP("omp_target_memset returns %p\n" , Ptr); |
| 418 | return Ptr; |
| 419 | } |
| 420 | |
| 421 | EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes, |
| 422 | int DeviceNum, int DepObjCount, |
| 423 | omp_depend_t *DepObjList) { |
| 424 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 425 | DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu" , |
| 426 | DeviceNum, Ptr, NumBytes); |
| 427 | |
| 428 | // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation |
| 429 | // of unspecified behavior, see OpenMP spec). |
| 430 | if (!Ptr || NumBytes == 0) |
| 431 | return Ptr; |
| 432 | |
| 433 | // Create the task object to deal with the async invocation |
| 434 | auto *Args = new TargetMemsetArgsTy{Ptr, ByteVal, NumBytes, DeviceNum}; |
| 435 | |
| 436 | // omp_target_memset_async() cannot fail via a return code, so ignore the |
| 437 | // return code of the helper function |
| 438 | (void)libomp_helper_task_creation(Args, &libomp_target_memset_async_task, |
| 439 | DepObjCount, DepObjList); |
| 440 | |
| 441 | return Ptr; |
| 442 | } |
| 443 | |
| 444 | EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, |
| 445 | size_t DstOffset, size_t SrcOffset, |
| 446 | int DstDevice, int SrcDevice, |
| 447 | int DepObjCount, omp_depend_t *DepObjList) { |
| 448 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
| 449 | ";src_dev=" + std::to_string(val: SrcDevice) + |
| 450 | ";size=" + std::to_string(val: Length)); |
| 451 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 452 | DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " |
| 453 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " |
| 454 | "src offset %zu, length %zu\n" , |
| 455 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, |
| 456 | Length); |
| 457 | |
| 458 | // Check the source and dest address |
| 459 | if (Dst == nullptr || Src == nullptr) |
| 460 | return OFFLOAD_FAIL; |
| 461 | |
| 462 | // Create task object |
| 463 | TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( |
| 464 | Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); |
| 465 | |
| 466 | // Create and launch helper task |
| 467 | int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, |
| 468 | DepObjCount, DepObjList); |
| 469 | |
| 470 | DP("omp_target_memcpy_async returns %d\n" , Rc); |
| 471 | return Rc; |
| 472 | } |
| 473 | |
| 474 | EXTERN int |
| 475 | omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, |
| 476 | int NumDims, const size_t *Volume, |
| 477 | const size_t *DstOffsets, const size_t *SrcOffsets, |
| 478 | const size_t *DstDimensions, const size_t *SrcDimensions, |
| 479 | int DstDevice, int SrcDevice) { |
| 480 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 481 | DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " |
| 482 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " |
| 483 | "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " |
| 484 | "volume " DPxMOD ", element size %zu, num_dims %d\n" , |
| 485 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), |
| 486 | DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), |
| 487 | DPxPTR(Volume), ElementSize, NumDims); |
| 488 | |
| 489 | if (!(Dst || Src)) { |
| 490 | DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n" , |
| 491 | INT_MAX); |
| 492 | return INT_MAX; |
| 493 | } |
| 494 | |
| 495 | if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume || |
| 496 | !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) { |
| 497 | REPORT("Call to omp_target_memcpy_rect with invalid arguments\n" ); |
| 498 | return OFFLOAD_FAIL; |
| 499 | } |
| 500 | |
| 501 | int Rc; |
| 502 | if (NumDims == 1) { |
| 503 | Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0], |
| 504 | ElementSize * DstOffsets[0], |
| 505 | ElementSize * SrcOffsets[0], DstDevice, SrcDevice); |
| 506 | } else { |
| 507 | size_t DstSliceSize = ElementSize; |
| 508 | size_t SrcSliceSize = ElementSize; |
| 509 | for (int I = 1; I < NumDims; ++I) { |
| 510 | DstSliceSize *= DstDimensions[I]; |
| 511 | SrcSliceSize *= SrcDimensions[I]; |
| 512 | } |
| 513 | |
| 514 | size_t DstOff = DstOffsets[0] * DstSliceSize; |
| 515 | size_t SrcOff = SrcOffsets[0] * SrcSliceSize; |
| 516 | for (size_t I = 0; I < Volume[0]; ++I) { |
| 517 | Rc = omp_target_memcpy_rect( |
| 518 | (char *)Dst + DstOff + DstSliceSize * I, |
| 519 | (char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I, |
| 520 | ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1, |
| 521 | DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice); |
| 522 | |
| 523 | if (Rc) { |
| 524 | DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n" ); |
| 525 | return Rc; |
| 526 | } |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | DP("omp_target_memcpy_rect returns %d\n" , Rc); |
| 531 | return Rc; |
| 532 | } |
| 533 | |
| 534 | EXTERN int omp_target_memcpy_rect_async( |
| 535 | void *Dst, const void *Src, size_t ElementSize, int NumDims, |
| 536 | const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, |
| 537 | const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, |
| 538 | int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { |
| 539 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
| 540 | ";src_dev=" + std::to_string(val: SrcDevice) + |
| 541 | ";size=" + std::to_string(val: ElementSize) + |
| 542 | ";num_dims=" + std::to_string(val: NumDims)); |
| 543 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 544 | DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " |
| 545 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " |
| 546 | "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " |
| 547 | "volume " DPxMOD ", element size %zu, num_dims %d\n" , |
| 548 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), |
| 549 | DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), |
| 550 | DPxPTR(Volume), ElementSize, NumDims); |
| 551 | |
| 552 | // Need to check this first to not return OFFLOAD_FAIL instead |
| 553 | if (!Dst && !Src) { |
| 554 | DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n" , |
| 555 | INT_MAX); |
| 556 | return INT_MAX; |
| 557 | } |
| 558 | |
| 559 | // Check the source and dest address |
| 560 | if (Dst == nullptr || Src == nullptr) |
| 561 | return OFFLOAD_FAIL; |
| 562 | |
| 563 | // Create task object |
| 564 | TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( |
| 565 | Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets, |
| 566 | DstDimensions, SrcDimensions, DstDevice, SrcDevice); |
| 567 | |
| 568 | // Create and launch helper task |
| 569 | int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, |
| 570 | DepObjCount, DepObjList); |
| 571 | |
| 572 | DP("omp_target_memcpy_rect_async returns %d\n" , Rc); |
| 573 | return Rc; |
| 574 | } |
| 575 | |
| 576 | EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, |
| 577 | size_t Size, size_t DeviceOffset, |
| 578 | int DeviceNum) { |
| 579 | TIMESCOPE(); |
| 580 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 581 | DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " |
| 582 | "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n" , |
| 583 | DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); |
| 584 | |
| 585 | if (!HostPtr || !DevicePtr || Size <= 0) { |
| 586 | REPORT("Call to omp_target_associate_ptr with invalid arguments\n" ); |
| 587 | return OFFLOAD_FAIL; |
| 588 | } |
| 589 | |
| 590 | if (DeviceNum == omp_get_initial_device()) { |
| 591 | REPORT("omp_target_associate_ptr: no association possible on the host\n" ); |
| 592 | return OFFLOAD_FAIL; |
| 593 | } |
| 594 | |
| 595 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
| 596 | if (!DeviceOrErr) |
| 597 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
| 598 | |
| 599 | void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset); |
| 600 | |
| 601 | OMPT_IF_BUILT(InterfaceRAII( |
| 602 | RegionInterface.getCallbacks<ompt_target_data_associate>(), DeviceNum, |
| 603 | const_cast<void *>(HostPtr), const_cast<void *>(DevicePtr), Size, |
| 604 | __builtin_return_address(0))); |
| 605 | |
| 606 | int Rc = DeviceOrErr->getMappingInfo().associatePtr( |
| 607 | const_cast<void *>(HostPtr), const_cast<void *>(DeviceAddr), Size); |
| 608 | DP("omp_target_associate_ptr returns %d\n" , Rc); |
| 609 | return Rc; |
| 610 | } |
| 611 | |
| 612 | EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { |
| 613 | TIMESCOPE(); |
| 614 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 615 | DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " |
| 616 | "device_num %d\n" , |
| 617 | DPxPTR(HostPtr), DeviceNum); |
| 618 | |
| 619 | if (!HostPtr) { |
| 620 | REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n" ); |
| 621 | return OFFLOAD_FAIL; |
| 622 | } |
| 623 | |
| 624 | if (DeviceNum == omp_get_initial_device()) { |
| 625 | REPORT( |
| 626 | "omp_target_disassociate_ptr: no association possible on the host\n" ); |
| 627 | return OFFLOAD_FAIL; |
| 628 | } |
| 629 | |
| 630 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
| 631 | if (!DeviceOrErr) |
| 632 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
| 633 | |
| 634 | OMPT_IF_BUILT(InterfaceRAII( |
| 635 | RegionInterface.getCallbacks<ompt_target_data_disassociate>(), DeviceNum, |
| 636 | const_cast<void *>(HostPtr), |
| 637 | /*DevicePtr=*/nullptr, /*Size=*/0, __builtin_return_address(0))); |
| 638 | |
| 639 | int Rc = DeviceOrErr->getMappingInfo().disassociatePtr( |
| 640 | const_cast<void *>(HostPtr)); |
| 641 | DP("omp_target_disassociate_ptr returns %d\n" , Rc); |
| 642 | return Rc; |
| 643 | } |
| 644 | |
| 645 | EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { |
| 646 | TIMESCOPE(); |
| 647 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
| 648 | DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n" , |
| 649 | DPxPTR(Ptr), DeviceNum); |
| 650 | |
| 651 | if (!Ptr) { |
| 652 | REPORT("Call to omp_get_mapped_ptr with nullptr.\n" ); |
| 653 | return nullptr; |
| 654 | } |
| 655 | |
| 656 | int NumDevices = omp_get_initial_device(); |
| 657 | if (DeviceNum == NumDevices) { |
| 658 | DP("Device %d is initial device, returning Ptr " DPxMOD ".\n" , DeviceNum, |
| 659 | DPxPTR(Ptr)); |
| 660 | return const_cast<void *>(Ptr); |
| 661 | } |
| 662 | |
| 663 | if (NumDevices <= DeviceNum) { |
| 664 | DP("DeviceNum %d is invalid, returning nullptr.\n" , DeviceNum); |
| 665 | return nullptr; |
| 666 | } |
| 667 | |
| 668 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
| 669 | if (!DeviceOrErr) |
| 670 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
| 671 | |
| 672 | TargetPointerResultTy TPR = |
| 673 | DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, |
| 674 | /*UpdateRefCount=*/false, |
| 675 | /*UseHoldRefCount=*/false); |
| 676 | if (!TPR.isPresent()) { |
| 677 | DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n" , |
| 678 | DPxPTR(Ptr), DeviceNum); |
| 679 | return nullptr; |
| 680 | } |
| 681 | |
| 682 | DP("omp_get_mapped_ptr returns " DPxMOD ".\n" , DPxPTR(TPR.TargetPointer)); |
| 683 | |
| 684 | return TPR.TargetPointer; |
| 685 | } |
| 686 | |