1//===----------- api.cpp - Target independent OpenMP target RTL -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of OpenMP API interface functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PluginManager.h"
14#include "device.h"
15#include "omptarget.h"
16#include "rtl.h"
17
18#include "OpenMP/InternalTypes.h"
19#include "OpenMP/Mapping.h"
20#include "OpenMP/OMPT/Interface.h"
21#include "OpenMP/omp.h"
22#include "Shared/Profile.h"
23
24#include "llvm/ADT/SmallVector.h"
25
26#include <climits>
27#include <cstdlib>
28#include <cstring>
29#include <mutex>
30
31EXTERN void ompx_dump_mapping_tables() {
32 ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
33 auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor();
34 for (auto &Device : PM->devices(ExclusiveDevicesAccessor))
35 dumpTargetPointerMappings(&Loc, Device, true);
36}
37
38#ifdef OMPT_SUPPORT
39using namespace llvm::omp::target::ompt;
40#endif
41
42void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
43 const char *Name);
44void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
45 const char *Name);
46void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
47 const char *Name);
48void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name);
49
50// Implemented in libomp, they are called from within __tgt_* functions.
51extern "C" {
52int __kmpc_get_target_offload(void) __attribute__((weak));
53kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags,
54 size_t sizeof_kmp_task_t,
55 size_t sizeof_shareds,
56 kmp_routine_entry_t task_entry)
57 __attribute__((weak));
58
59kmp_task_t *
60__kmpc_omp_target_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags,
61 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
62 kmp_routine_entry_t task_entry, int64_t device_id)
63 __attribute__((weak));
64
65int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid,
66 kmp_task_t *new_task, int32_t ndeps,
67 kmp_depend_info_t *dep_list,
68 int32_t ndeps_noalias,
69 kmp_depend_info_t *noalias_dep_list)
70 __attribute__((weak));
71}
72
73EXTERN int omp_get_num_devices(void) {
74 TIMESCOPE();
75 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
76 size_t NumDevices = PM->getNumDevices();
77
78 DP("Call to omp_get_num_devices returning %zd\n", NumDevices);
79
80 return NumDevices;
81}
82
83EXTERN int omp_get_device_num(void) {
84 TIMESCOPE();
85 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
86 int HostDevice = omp_get_initial_device();
87
88 DP("Call to omp_get_device_num returning %d\n", HostDevice);
89
90 return HostDevice;
91}
92
93EXTERN int omp_get_initial_device(void) {
94 TIMESCOPE();
95 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
96 int HostDevice = omp_get_num_devices();
97 DP("Call to omp_get_initial_device returning %d\n", HostDevice);
98 return HostDevice;
99}
100
101EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
102 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DeviceNum) +
103 ";size=" + std::to_string(val: Size));
104 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
105 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
106}
107
108EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) {
109 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
110 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
111}
112
113EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) {
114 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
115 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__);
116}
117
118EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) {
119 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
120 return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__);
121}
122
123EXTERN void omp_target_free(void *Ptr, int DeviceNum) {
124 TIMESCOPE();
125 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
126 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
127}
128
129EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) {
130 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
131 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
132}
133
134EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) {
135 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
136 return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__);
137}
138
139EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) {
140 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
141 return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__);
142}
143
144EXTERN void *llvm_omp_target_dynamic_shared_alloc() {
145 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
146 return nullptr;
147}
148
149EXTERN void *llvm_omp_get_dynamic_shared() {
150 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
151 return nullptr;
152}
153
154EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size,
155 int DeviceNum) {
156 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
157 return targetLockExplicit(HostPtr: Ptr, Size, DeviceNum, Name: __func__);
158}
159
160EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) {
161 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
162 targetUnlockExplicit(HostPtr: Ptr, DeviceNum, Name: __func__);
163}
164
165EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
166 TIMESCOPE();
167 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
168 DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
169 DeviceNum, DPxPTR(Ptr));
170
171 if (!Ptr) {
172 DP("Call to omp_target_is_present with NULL ptr, returning false\n");
173 return false;
174 }
175
176 if (DeviceNum == omp_get_initial_device()) {
177 DP("Call to omp_target_is_present on host, returning true\n");
178 return true;
179 }
180
181 auto DeviceOrErr = PM->getDevice(DeviceNum);
182 if (!DeviceOrErr)
183 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
184
185 // omp_target_is_present tests whether a host pointer refers to storage that
186 // is mapped to a given device. However, due to the lack of the storage size,
187 // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
188 // lengh array) is mapped instead of the referred storage.
189 TargetPointerResultTy TPR =
190 DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
191 /*UpdateRefCount=*/false,
192 /*UseHoldRefCount=*/false);
193 int Rc = TPR.isPresent();
194 DP("Call to omp_target_is_present returns %d\n", Rc);
195 return Rc;
196}
197
198EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
199 size_t DstOffset, size_t SrcOffset, int DstDevice,
200 int SrcDevice) {
201 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) +
202 ";src_dev=" + std::to_string(val: SrcDevice) +
203 ";size=" + std::to_string(val: Length));
204 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
205 DP("Call to omp_target_memcpy, dst device %d, src device %d, "
206 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
207 "src offset %zu, length %zu\n",
208 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
209 Length);
210
211 if (!Dst || !Src || Length <= 0) {
212 if (Length == 0) {
213 DP("Call to omp_target_memcpy with zero length, nothing to do\n");
214 return OFFLOAD_SUCCESS;
215 }
216
217 REPORT("Call to omp_target_memcpy with invalid arguments\n");
218 return OFFLOAD_FAIL;
219 }
220
221 int Rc = OFFLOAD_SUCCESS;
222 void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset;
223 void *DstAddr = (char *)Dst + DstOffset;
224
225 if (SrcDevice == omp_get_initial_device() &&
226 DstDevice == omp_get_initial_device()) {
227 DP("copy from host to host\n");
228 const void *P = memcpy(dest: DstAddr, src: SrcAddr, n: Length);
229 if (P == NULL)
230 Rc = OFFLOAD_FAIL;
231 } else if (SrcDevice == omp_get_initial_device()) {
232 DP("copy from host to device\n");
233 auto DstDeviceOrErr = PM->getDevice(DstDevice);
234 if (!DstDeviceOrErr)
235 FATAL_MESSAGE(DstDevice, "%s",
236 toString(DstDeviceOrErr.takeError()).c_str());
237 AsyncInfoTy AsyncInfo(*DstDeviceOrErr);
238 Rc = DstDeviceOrErr->submitData(DstAddr, SrcAddr, Length, AsyncInfo);
239 } else if (DstDevice == omp_get_initial_device()) {
240 DP("copy from device to host\n");
241 auto SrcDeviceOrErr = PM->getDevice(SrcDevice);
242 if (!SrcDeviceOrErr)
243 FATAL_MESSAGE(SrcDevice, "%s",
244 toString(SrcDeviceOrErr.takeError()).c_str());
245 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
246 Rc = SrcDeviceOrErr->retrieveData(DstAddr, SrcAddr, Length, AsyncInfo);
247 } else {
248 DP("copy from device to device\n");
249 auto SrcDeviceOrErr = PM->getDevice(SrcDevice);
250 if (!SrcDeviceOrErr)
251 FATAL_MESSAGE(SrcDevice, "%s",
252 toString(SrcDeviceOrErr.takeError()).c_str());
253 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
254 auto DstDeviceOrErr = PM->getDevice(DstDevice);
255 if (!DstDeviceOrErr)
256 FATAL_MESSAGE(DstDevice, "%s",
257 toString(DstDeviceOrErr.takeError()).c_str());
258 // First try to use D2D memcpy which is more efficient. If fails, fall back
259 // to unefficient way.
260 if (SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) {
261 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
262 Rc = SrcDeviceOrErr->dataExchange(SrcAddr, *DstDeviceOrErr, DstAddr,
263 Length, AsyncInfo);
264 if (Rc == OFFLOAD_SUCCESS)
265 return OFFLOAD_SUCCESS;
266 }
267
268 void *Buffer = malloc(size: Length);
269 {
270 AsyncInfoTy AsyncInfo(*SrcDeviceOrErr);
271 Rc = SrcDeviceOrErr->retrieveData(Buffer, SrcAddr, Length, AsyncInfo);
272 }
273 if (Rc == OFFLOAD_SUCCESS) {
274 AsyncInfoTy AsyncInfo(*DstDeviceOrErr);
275 Rc = DstDeviceOrErr->submitData(DstAddr, Buffer, Length, AsyncInfo);
276 }
277 free(ptr: Buffer);
278 }
279
280 DP("omp_target_memcpy returns %d\n", Rc);
281 return Rc;
282}
283
284// The helper function that calls omp_target_memcpy or omp_target_memcpy_rect
285static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) {
286 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
287 if (Task == nullptr)
288 return OFFLOAD_FAIL;
289
290 TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds;
291
292 if (Args == nullptr)
293 return OFFLOAD_FAIL;
294
295 // Call blocked version
296 int Rc = OFFLOAD_SUCCESS;
297 if (Args->IsRectMemcpy) {
298 Rc = omp_target_memcpy_rect(
299 Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume,
300 Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions,
301 Args->SrcDimensions, Args->DstDevice, Args->SrcDevice);
302
303 DP("omp_target_memcpy_rect returns %d\n", Rc);
304 } else {
305 Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset,
306 Args->SrcOffset, Args->DstDevice, Args->SrcDevice);
307
308 DP("omp_target_memcpy returns %d\n", Rc);
309 }
310
311 // Release the arguments object
312 delete Args;
313
314 return Rc;
315}
316
317static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) {
318 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
319 if (!Task)
320 return OFFLOAD_FAIL;
321
322 auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds);
323 if (!Args)
324 return OFFLOAD_FAIL;
325
326 // call omp_target_memset()
327 omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum);
328
329 delete Args;
330
331 return OFFLOAD_SUCCESS;
332}
333
334static inline void
335convertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount,
336 omp_depend_t *DepObjList) {
337 for (int i = 0; i < DepObjCount; ++i) {
338 omp_depend_t DepObj = DepObjList[i];
339 Vec.push_back(*((kmp_depend_info_t *)DepObj));
340 }
341}
342
343template <class T>
344static inline int
345libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *),
346 int DepObjCount, omp_depend_t *DepObjList) {
347 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
348 // Create global thread ID
349 int Gtid = __kmpc_global_thread_num(nullptr);
350
351 // Setup the hidden helper flags
352 int32_t Flags = 0;
353 kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags;
354 InputFlags->hidden_helper = 1;
355
356 // Alloc the helper task
357 kmp_task_t *Task = __kmpc_omp_target_task_alloc(
358 nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1);
359 if (!Task) {
360 delete Args;
361 return OFFLOAD_FAIL;
362 }
363
364 // Setup the arguments for the helper task
365 Task->shareds = Args;
366
367 // Convert types of depend objects
368 llvm::SmallVector<kmp_depend_info_t> DepObjs;
369 convertDepObjVector(DepObjs, DepObjCount, DepObjList);
370
371 // Launch the helper task
372 int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount,
373 DepObjs.data(), 0, nullptr);
374
375 return Rc;
376}
377
378EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
379 int DeviceNum) {
380 TIMESCOPE();
381 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
382 DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
383 DeviceNum, Ptr, NumBytes);
384
385 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
386 // of unspecified behavior, see OpenMP spec).
387 if (!Ptr || NumBytes == 0) {
388 return Ptr;
389 }
390
391 if (DeviceNum == omp_get_initial_device()) {
392 DP("filling memory on host via memset");
393 memset(s: Ptr, c: ByteVal, n: NumBytes); // ignore return value, memset() cannot fail
394 } else {
395 // TODO: replace the omp_target_memset() slow path with the fast path.
396 // That will require the ability to execute a kernel from within
397 // libomptarget.so (which we do not have at the moment).
398
399 // This is a very slow path: create a filled array on the host and upload
400 // it to the GPU device.
401 int InitialDevice = omp_get_initial_device();
402 void *Shadow = omp_target_alloc(NumBytes, InitialDevice);
403 if (Shadow) {
404 (void)memset(s: Shadow, c: ByteVal, n: NumBytes);
405 (void)omp_target_memcpy(Ptr, Shadow, NumBytes, 0, 0, DeviceNum,
406 InitialDevice);
407 (void)omp_target_free(Shadow, InitialDevice);
408 } else {
409 // If the omp_target_alloc has failed, let's just not do anything.
410 // omp_target_memset does not have any good way to fail, so we
411 // simply avoid a catastrophic failure of the process for now.
412 DP("omp_target_memset failed to fill memory due to error with "
413 "omp_target_alloc");
414 }
415 }
416
417 DP("omp_target_memset returns %p\n", Ptr);
418 return Ptr;
419}
420
421EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes,
422 int DeviceNum, int DepObjCount,
423 omp_depend_t *DepObjList) {
424 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
425 DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
426 DeviceNum, Ptr, NumBytes);
427
428 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
429 // of unspecified behavior, see OpenMP spec).
430 if (!Ptr || NumBytes == 0)
431 return Ptr;
432
433 // Create the task object to deal with the async invocation
434 auto *Args = new TargetMemsetArgsTy{Ptr, ByteVal, NumBytes, DeviceNum};
435
436 // omp_target_memset_async() cannot fail via a return code, so ignore the
437 // return code of the helper function
438 (void)libomp_helper_task_creation(Args, &libomp_target_memset_async_task,
439 DepObjCount, DepObjList);
440
441 return Ptr;
442}
443
444EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
445 size_t DstOffset, size_t SrcOffset,
446 int DstDevice, int SrcDevice,
447 int DepObjCount, omp_depend_t *DepObjList) {
448 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) +
449 ";src_dev=" + std::to_string(val: SrcDevice) +
450 ";size=" + std::to_string(val: Length));
451 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
452 DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
453 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
454 "src offset %zu, length %zu\n",
455 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset,
456 Length);
457
458 // Check the source and dest address
459 if (Dst == nullptr || Src == nullptr)
460 return OFFLOAD_FAIL;
461
462 // Create task object
463 TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy(
464 Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice);
465
466 // Create and launch helper task
467 int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task,
468 DepObjCount, DepObjList);
469
470 DP("omp_target_memcpy_async returns %d\n", Rc);
471 return Rc;
472}
473
474EXTERN int
475omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
476 int NumDims, const size_t *Volume,
477 const size_t *DstOffsets, const size_t *SrcOffsets,
478 const size_t *DstDimensions, const size_t *SrcDimensions,
479 int DstDevice, int SrcDevice) {
480 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
481 DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
482 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
483 "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
484 "volume " DPxMOD ", element size %zu, num_dims %d\n",
485 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
486 DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
487 DPxPTR(Volume), ElementSize, NumDims);
488
489 if (!(Dst || Src)) {
490 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
491 INT_MAX);
492 return INT_MAX;
493 }
494
495 if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume ||
496 !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) {
497 REPORT("Call to omp_target_memcpy_rect with invalid arguments\n");
498 return OFFLOAD_FAIL;
499 }
500
501 int Rc;
502 if (NumDims == 1) {
503 Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0],
504 ElementSize * DstOffsets[0],
505 ElementSize * SrcOffsets[0], DstDevice, SrcDevice);
506 } else {
507 size_t DstSliceSize = ElementSize;
508 size_t SrcSliceSize = ElementSize;
509 for (int I = 1; I < NumDims; ++I) {
510 DstSliceSize *= DstDimensions[I];
511 SrcSliceSize *= SrcDimensions[I];
512 }
513
514 size_t DstOff = DstOffsets[0] * DstSliceSize;
515 size_t SrcOff = SrcOffsets[0] * SrcSliceSize;
516 for (size_t I = 0; I < Volume[0]; ++I) {
517 Rc = omp_target_memcpy_rect(
518 (char *)Dst + DstOff + DstSliceSize * I,
519 (char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I,
520 ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1,
521 DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice);
522
523 if (Rc) {
524 DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n");
525 return Rc;
526 }
527 }
528 }
529
530 DP("omp_target_memcpy_rect returns %d\n", Rc);
531 return Rc;
532}
533
534EXTERN int omp_target_memcpy_rect_async(
535 void *Dst, const void *Src, size_t ElementSize, int NumDims,
536 const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets,
537 const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice,
538 int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) {
539 TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) +
540 ";src_dev=" + std::to_string(val: SrcDevice) +
541 ";size=" + std::to_string(val: ElementSize) +
542 ";num_dims=" + std::to_string(val: NumDims));
543 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
544 DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
545 "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
546 "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
547 "volume " DPxMOD ", element size %zu, num_dims %d\n",
548 DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets),
549 DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions),
550 DPxPTR(Volume), ElementSize, NumDims);
551
552 // Need to check this first to not return OFFLOAD_FAIL instead
553 if (!Dst && !Src) {
554 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
555 INT_MAX);
556 return INT_MAX;
557 }
558
559 // Check the source and dest address
560 if (Dst == nullptr || Src == nullptr)
561 return OFFLOAD_FAIL;
562
563 // Create task object
564 TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy(
565 Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets,
566 DstDimensions, SrcDimensions, DstDevice, SrcDevice);
567
568 // Create and launch helper task
569 int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task,
570 DepObjCount, DepObjList);
571
572 DP("omp_target_memcpy_rect_async returns %d\n", Rc);
573 return Rc;
574}
575
576EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
577 size_t Size, size_t DeviceOffset,
578 int DeviceNum) {
579 TIMESCOPE();
580 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
581 DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", "
582 "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n",
583 DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum);
584
585 if (!HostPtr || !DevicePtr || Size <= 0) {
586 REPORT("Call to omp_target_associate_ptr with invalid arguments\n");
587 return OFFLOAD_FAIL;
588 }
589
590 if (DeviceNum == omp_get_initial_device()) {
591 REPORT("omp_target_associate_ptr: no association possible on the host\n");
592 return OFFLOAD_FAIL;
593 }
594
595 auto DeviceOrErr = PM->getDevice(DeviceNum);
596 if (!DeviceOrErr)
597 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
598
599 void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset);
600 int Rc = DeviceOrErr->getMappingInfo().associatePtr(
601 const_cast<void *>(HostPtr), const_cast<void *>(DeviceAddr), Size);
602 DP("omp_target_associate_ptr returns %d\n", Rc);
603 return Rc;
604}
605
606EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) {
607 TIMESCOPE();
608 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
609 DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", "
610 "device_num %d\n",
611 DPxPTR(HostPtr), DeviceNum);
612
613 if (!HostPtr) {
614 REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n");
615 return OFFLOAD_FAIL;
616 }
617
618 if (DeviceNum == omp_get_initial_device()) {
619 REPORT(
620 "omp_target_disassociate_ptr: no association possible on the host\n");
621 return OFFLOAD_FAIL;
622 }
623
624 auto DeviceOrErr = PM->getDevice(DeviceNum);
625 if (!DeviceOrErr)
626 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
627
628 int Rc = DeviceOrErr->getMappingInfo().disassociatePtr(
629 const_cast<void *>(HostPtr));
630 DP("omp_target_disassociate_ptr returns %d\n", Rc);
631 return Rc;
632}
633
634EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
635 TIMESCOPE();
636 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
637 DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n",
638 DPxPTR(Ptr), DeviceNum);
639
640 if (!Ptr) {
641 REPORT("Call to omp_get_mapped_ptr with nullptr.\n");
642 return nullptr;
643 }
644
645 size_t NumDevices = omp_get_initial_device();
646 if (DeviceNum == NumDevices) {
647 DP("Device %d is initial device, returning Ptr " DPxMOD ".\n",
648 DeviceNum, DPxPTR(Ptr));
649 return const_cast<void *>(Ptr);
650 }
651
652 if (NumDevices <= DeviceNum) {
653 DP("DeviceNum %d is invalid, returning nullptr.\n", DeviceNum);
654 return nullptr;
655 }
656
657 auto DeviceOrErr = PM->getDevice(DeviceNum);
658 if (!DeviceOrErr)
659 FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
660
661 TargetPointerResultTy TPR =
662 DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
663 /*UpdateRefCount=*/false,
664 /*UseHoldRefCount=*/false);
665 if (!TPR.isPresent()) {
666 DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n",
667 DPxPTR(Ptr), DeviceNum);
668 return nullptr;
669 }
670
671 DP("omp_get_mapped_ptr returns " DPxMOD ".\n", DPxPTR(TPR.TargetPointer));
672
673 return TPR.TargetPointer;
674}
675

source code of offload/src/OpenMP/API.cpp