1 | //===----------- api.cpp - Target independent OpenMP target RTL -----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Implementation of OpenMP API interface functions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "PluginManager.h" |
14 | #include "device.h" |
15 | #include "omptarget.h" |
16 | #include "rtl.h" |
17 | |
18 | #include "OpenMP/InternalTypes.h" |
19 | #include "OpenMP/Mapping.h" |
20 | #include "OpenMP/OMPT/Interface.h" |
21 | #include "OpenMP/omp.h" |
22 | #include "Shared/Profile.h" |
23 | |
24 | #include "llvm/ADT/SmallVector.h" |
25 | |
26 | #include <climits> |
27 | #include <cstdlib> |
28 | #include <cstring> |
29 | #include <mutex> |
30 | |
31 | EXTERN void ompx_dump_mapping_tables() { |
32 | ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;" }; |
33 | auto ExclusiveDevicesAccessor = PM->getExclusiveDevicesAccessor(); |
34 | for (auto &Device : PM->devices(ExclusiveDevicesAccessor)) |
35 | dumpTargetPointerMappings(&Loc, Device, true); |
36 | } |
37 | |
38 | #ifdef OMPT_SUPPORT |
39 | using namespace llvm::omp::target::ompt; |
40 | #endif |
41 | |
42 | void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, |
43 | const char *Name); |
44 | void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind, |
45 | const char *Name); |
46 | void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum, |
47 | const char *Name); |
48 | void targetUnlockExplicit(void *HostPtr, int DeviceNum, const char *Name); |
49 | |
50 | // Implemented in libomp, they are called from within __tgt_* functions. |
51 | extern "C" { |
52 | int __kmpc_get_target_offload(void) __attribute__((weak)); |
53 | kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, |
54 | size_t sizeof_kmp_task_t, |
55 | size_t sizeof_shareds, |
56 | kmp_routine_entry_t task_entry) |
57 | __attribute__((weak)); |
58 | |
59 | kmp_task_t * |
60 | __kmpc_omp_target_task_alloc(ident_t *loc_ref, int32_t gtid, int32_t flags, |
61 | size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
62 | kmp_routine_entry_t task_entry, int64_t device_id) |
63 | __attribute__((weak)); |
64 | |
65 | int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid, |
66 | kmp_task_t *new_task, int32_t ndeps, |
67 | kmp_depend_info_t *dep_list, |
68 | int32_t ndeps_noalias, |
69 | kmp_depend_info_t *noalias_dep_list) |
70 | __attribute__((weak)); |
71 | } |
72 | |
73 | EXTERN int omp_get_num_devices(void) { |
74 | TIMESCOPE(); |
75 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
76 | size_t NumDevices = PM->getNumDevices(); |
77 | |
78 | DP("Call to omp_get_num_devices returning %zd\n" , NumDevices); |
79 | |
80 | return NumDevices; |
81 | } |
82 | |
83 | EXTERN int omp_get_device_num(void) { |
84 | TIMESCOPE(); |
85 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
86 | int HostDevice = omp_get_initial_device(); |
87 | |
88 | DP("Call to omp_get_device_num returning %d\n" , HostDevice); |
89 | |
90 | return HostDevice; |
91 | } |
92 | |
93 | EXTERN int omp_get_initial_device(void) { |
94 | TIMESCOPE(); |
95 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
96 | int HostDevice = omp_get_num_devices(); |
97 | DP("Call to omp_get_initial_device returning %d\n" , HostDevice); |
98 | return HostDevice; |
99 | } |
100 | |
101 | EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { |
102 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DeviceNum) + |
103 | ";size=" + std::to_string(val: Size)); |
104 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
105 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); |
106 | } |
107 | |
108 | EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) { |
109 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
110 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__); |
111 | } |
112 | |
113 | EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) { |
114 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
115 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__); |
116 | } |
117 | |
118 | EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { |
119 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
120 | return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__); |
121 | } |
122 | |
123 | EXTERN void omp_target_free(void *Ptr, int DeviceNum) { |
124 | TIMESCOPE(); |
125 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
126 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); |
127 | } |
128 | |
129 | EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) { |
130 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
131 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__); |
132 | } |
133 | |
134 | EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) { |
135 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
136 | return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__); |
137 | } |
138 | |
139 | EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) { |
140 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
141 | return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__); |
142 | } |
143 | |
144 | EXTERN void *llvm_omp_target_dynamic_shared_alloc() { |
145 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
146 | return nullptr; |
147 | } |
148 | |
149 | EXTERN void *llvm_omp_get_dynamic_shared() { |
150 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
151 | return nullptr; |
152 | } |
153 | |
154 | EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size, |
155 | int DeviceNum) { |
156 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
157 | return targetLockExplicit(HostPtr: Ptr, Size, DeviceNum, Name: __func__); |
158 | } |
159 | |
160 | EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) { |
161 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
162 | targetUnlockExplicit(HostPtr: Ptr, DeviceNum, Name: __func__); |
163 | } |
164 | |
165 | EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { |
166 | TIMESCOPE(); |
167 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
168 | DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n" , |
169 | DeviceNum, DPxPTR(Ptr)); |
170 | |
171 | if (!Ptr) { |
172 | DP("Call to omp_target_is_present with NULL ptr, returning false\n" ); |
173 | return false; |
174 | } |
175 | |
176 | if (DeviceNum == omp_get_initial_device()) { |
177 | DP("Call to omp_target_is_present on host, returning true\n" ); |
178 | return true; |
179 | } |
180 | |
181 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
182 | if (!DeviceOrErr) |
183 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
184 | |
185 | // omp_target_is_present tests whether a host pointer refers to storage that |
186 | // is mapped to a given device. However, due to the lack of the storage size, |
187 | // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero |
188 | // lengh array) is mapped instead of the referred storage. |
189 | TargetPointerResultTy TPR = |
190 | DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, |
191 | /*UpdateRefCount=*/false, |
192 | /*UseHoldRefCount=*/false); |
193 | int Rc = TPR.isPresent(); |
194 | DP("Call to omp_target_is_present returns %d\n" , Rc); |
195 | return Rc; |
196 | } |
197 | |
198 | EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, |
199 | size_t DstOffset, size_t SrcOffset, int DstDevice, |
200 | int SrcDevice) { |
201 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
202 | ";src_dev=" + std::to_string(val: SrcDevice) + |
203 | ";size=" + std::to_string(val: Length)); |
204 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
205 | DP("Call to omp_target_memcpy, dst device %d, src device %d, " |
206 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " |
207 | "src offset %zu, length %zu\n" , |
208 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, |
209 | Length); |
210 | |
211 | if (!Dst || !Src || Length <= 0) { |
212 | if (Length == 0) { |
213 | DP("Call to omp_target_memcpy with zero length, nothing to do\n" ); |
214 | return OFFLOAD_SUCCESS; |
215 | } |
216 | |
217 | REPORT("Call to omp_target_memcpy with invalid arguments\n" ); |
218 | return OFFLOAD_FAIL; |
219 | } |
220 | |
221 | int Rc = OFFLOAD_SUCCESS; |
222 | void *SrcAddr = (char *)const_cast<void *>(Src) + SrcOffset; |
223 | void *DstAddr = (char *)Dst + DstOffset; |
224 | |
225 | if (SrcDevice == omp_get_initial_device() && |
226 | DstDevice == omp_get_initial_device()) { |
227 | DP("copy from host to host\n" ); |
228 | const void *P = memcpy(dest: DstAddr, src: SrcAddr, n: Length); |
229 | if (P == NULL) |
230 | Rc = OFFLOAD_FAIL; |
231 | } else if (SrcDevice == omp_get_initial_device()) { |
232 | DP("copy from host to device\n" ); |
233 | auto DstDeviceOrErr = PM->getDevice(DstDevice); |
234 | if (!DstDeviceOrErr) |
235 | FATAL_MESSAGE(DstDevice, "%s" , |
236 | toString(DstDeviceOrErr.takeError()).c_str()); |
237 | AsyncInfoTy AsyncInfo(*DstDeviceOrErr); |
238 | Rc = DstDeviceOrErr->submitData(DstAddr, SrcAddr, Length, AsyncInfo); |
239 | } else if (DstDevice == omp_get_initial_device()) { |
240 | DP("copy from device to host\n" ); |
241 | auto SrcDeviceOrErr = PM->getDevice(SrcDevice); |
242 | if (!SrcDeviceOrErr) |
243 | FATAL_MESSAGE(SrcDevice, "%s" , |
244 | toString(SrcDeviceOrErr.takeError()).c_str()); |
245 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
246 | Rc = SrcDeviceOrErr->retrieveData(DstAddr, SrcAddr, Length, AsyncInfo); |
247 | } else { |
248 | DP("copy from device to device\n" ); |
249 | auto SrcDeviceOrErr = PM->getDevice(SrcDevice); |
250 | if (!SrcDeviceOrErr) |
251 | FATAL_MESSAGE(SrcDevice, "%s" , |
252 | toString(SrcDeviceOrErr.takeError()).c_str()); |
253 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
254 | auto DstDeviceOrErr = PM->getDevice(DstDevice); |
255 | if (!DstDeviceOrErr) |
256 | FATAL_MESSAGE(DstDevice, "%s" , |
257 | toString(DstDeviceOrErr.takeError()).c_str()); |
258 | // First try to use D2D memcpy which is more efficient. If fails, fall back |
259 | // to unefficient way. |
260 | if (SrcDeviceOrErr->isDataExchangable(*DstDeviceOrErr)) { |
261 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
262 | Rc = SrcDeviceOrErr->dataExchange(SrcAddr, *DstDeviceOrErr, DstAddr, |
263 | Length, AsyncInfo); |
264 | if (Rc == OFFLOAD_SUCCESS) |
265 | return OFFLOAD_SUCCESS; |
266 | } |
267 | |
268 | void *Buffer = malloc(size: Length); |
269 | { |
270 | AsyncInfoTy AsyncInfo(*SrcDeviceOrErr); |
271 | Rc = SrcDeviceOrErr->retrieveData(Buffer, SrcAddr, Length, AsyncInfo); |
272 | } |
273 | if (Rc == OFFLOAD_SUCCESS) { |
274 | AsyncInfoTy AsyncInfo(*DstDeviceOrErr); |
275 | Rc = DstDeviceOrErr->submitData(DstAddr, Buffer, Length, AsyncInfo); |
276 | } |
277 | free(ptr: Buffer); |
278 | } |
279 | |
280 | DP("omp_target_memcpy returns %d\n" , Rc); |
281 | return Rc; |
282 | } |
283 | |
284 | // The helper function that calls omp_target_memcpy or omp_target_memcpy_rect |
285 | static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) { |
286 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
287 | if (Task == nullptr) |
288 | return OFFLOAD_FAIL; |
289 | |
290 | TargetMemcpyArgsTy *Args = (TargetMemcpyArgsTy *)Task->shareds; |
291 | |
292 | if (Args == nullptr) |
293 | return OFFLOAD_FAIL; |
294 | |
295 | // Call blocked version |
296 | int Rc = OFFLOAD_SUCCESS; |
297 | if (Args->IsRectMemcpy) { |
298 | Rc = omp_target_memcpy_rect( |
299 | Args->Dst, Args->Src, Args->ElementSize, Args->NumDims, Args->Volume, |
300 | Args->DstOffsets, Args->SrcOffsets, Args->DstDimensions, |
301 | Args->SrcDimensions, Args->DstDevice, Args->SrcDevice); |
302 | |
303 | DP("omp_target_memcpy_rect returns %d\n" , Rc); |
304 | } else { |
305 | Rc = omp_target_memcpy(Args->Dst, Args->Src, Args->Length, Args->DstOffset, |
306 | Args->SrcOffset, Args->DstDevice, Args->SrcDevice); |
307 | |
308 | DP("omp_target_memcpy returns %d\n" , Rc); |
309 | } |
310 | |
311 | // Release the arguments object |
312 | delete Args; |
313 | |
314 | return Rc; |
315 | } |
316 | |
317 | static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) { |
318 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
319 | if (!Task) |
320 | return OFFLOAD_FAIL; |
321 | |
322 | auto *Args = reinterpret_cast<TargetMemsetArgsTy *>(Task->shareds); |
323 | if (!Args) |
324 | return OFFLOAD_FAIL; |
325 | |
326 | // call omp_target_memset() |
327 | omp_target_memset(Args->Ptr, Args->C, Args->N, Args->DeviceNum); |
328 | |
329 | delete Args; |
330 | |
331 | return OFFLOAD_SUCCESS; |
332 | } |
333 | |
334 | static inline void |
335 | convertDepObjVector(llvm::SmallVector<kmp_depend_info_t> &Vec, int DepObjCount, |
336 | omp_depend_t *DepObjList) { |
337 | for (int i = 0; i < DepObjCount; ++i) { |
338 | omp_depend_t DepObj = DepObjList[i]; |
339 | Vec.push_back(*((kmp_depend_info_t *)DepObj)); |
340 | } |
341 | } |
342 | |
343 | template <class T> |
344 | static inline int |
345 | libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *), |
346 | int DepObjCount, omp_depend_t *DepObjList) { |
347 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
348 | // Create global thread ID |
349 | int Gtid = __kmpc_global_thread_num(nullptr); |
350 | |
351 | // Setup the hidden helper flags |
352 | int32_t Flags = 0; |
353 | kmp_tasking_flags_t *InputFlags = (kmp_tasking_flags_t *)&Flags; |
354 | InputFlags->hidden_helper = 1; |
355 | |
356 | // Alloc the helper task |
357 | kmp_task_t *Task = __kmpc_omp_target_task_alloc( |
358 | nullptr, Gtid, Flags, sizeof(kmp_task_t), 0, Fn, -1); |
359 | if (!Task) { |
360 | delete Args; |
361 | return OFFLOAD_FAIL; |
362 | } |
363 | |
364 | // Setup the arguments for the helper task |
365 | Task->shareds = Args; |
366 | |
367 | // Convert types of depend objects |
368 | llvm::SmallVector<kmp_depend_info_t> DepObjs; |
369 | convertDepObjVector(DepObjs, DepObjCount, DepObjList); |
370 | |
371 | // Launch the helper task |
372 | int Rc = __kmpc_omp_task_with_deps(nullptr, Gtid, Task, DepObjCount, |
373 | DepObjs.data(), 0, nullptr); |
374 | |
375 | return Rc; |
376 | } |
377 | |
378 | EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, |
379 | int DeviceNum) { |
380 | TIMESCOPE(); |
381 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
382 | DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n" , |
383 | DeviceNum, Ptr, NumBytes); |
384 | |
385 | // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation |
386 | // of unspecified behavior, see OpenMP spec). |
387 | if (!Ptr || NumBytes == 0) { |
388 | return Ptr; |
389 | } |
390 | |
391 | if (DeviceNum == omp_get_initial_device()) { |
392 | DP("filling memory on host via memset" ); |
393 | memset(s: Ptr, c: ByteVal, n: NumBytes); // ignore return value, memset() cannot fail |
394 | } else { |
395 | // TODO: replace the omp_target_memset() slow path with the fast path. |
396 | // That will require the ability to execute a kernel from within |
397 | // libomptarget.so (which we do not have at the moment). |
398 | |
399 | // This is a very slow path: create a filled array on the host and upload |
400 | // it to the GPU device. |
401 | int InitialDevice = omp_get_initial_device(); |
402 | void *Shadow = omp_target_alloc(NumBytes, InitialDevice); |
403 | if (Shadow) { |
404 | (void)memset(s: Shadow, c: ByteVal, n: NumBytes); |
405 | (void)omp_target_memcpy(Ptr, Shadow, NumBytes, 0, 0, DeviceNum, |
406 | InitialDevice); |
407 | (void)omp_target_free(Shadow, InitialDevice); |
408 | } else { |
409 | // If the omp_target_alloc has failed, let's just not do anything. |
410 | // omp_target_memset does not have any good way to fail, so we |
411 | // simply avoid a catastrophic failure of the process for now. |
412 | DP("omp_target_memset failed to fill memory due to error with " |
413 | "omp_target_alloc" ); |
414 | } |
415 | } |
416 | |
417 | DP("omp_target_memset returns %p\n" , Ptr); |
418 | return Ptr; |
419 | } |
420 | |
421 | EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes, |
422 | int DeviceNum, int DepObjCount, |
423 | omp_depend_t *DepObjList) { |
424 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
425 | DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu" , |
426 | DeviceNum, Ptr, NumBytes); |
427 | |
428 | // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation |
429 | // of unspecified behavior, see OpenMP spec). |
430 | if (!Ptr || NumBytes == 0) |
431 | return Ptr; |
432 | |
433 | // Create the task object to deal with the async invocation |
434 | auto *Args = new TargetMemsetArgsTy{Ptr, ByteVal, NumBytes, DeviceNum}; |
435 | |
436 | // omp_target_memset_async() cannot fail via a return code, so ignore the |
437 | // return code of the helper function |
438 | (void)libomp_helper_task_creation(Args, &libomp_target_memset_async_task, |
439 | DepObjCount, DepObjList); |
440 | |
441 | return Ptr; |
442 | } |
443 | |
444 | EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length, |
445 | size_t DstOffset, size_t SrcOffset, |
446 | int DstDevice, int SrcDevice, |
447 | int DepObjCount, omp_depend_t *DepObjList) { |
448 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
449 | ";src_dev=" + std::to_string(val: SrcDevice) + |
450 | ";size=" + std::to_string(val: Length)); |
451 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
452 | DP("Call to omp_target_memcpy_async, dst device %d, src device %d, " |
453 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " |
454 | "src offset %zu, length %zu\n" , |
455 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, |
456 | Length); |
457 | |
458 | // Check the source and dest address |
459 | if (Dst == nullptr || Src == nullptr) |
460 | return OFFLOAD_FAIL; |
461 | |
462 | // Create task object |
463 | TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( |
464 | Dst, Src, Length, DstOffset, SrcOffset, DstDevice, SrcDevice); |
465 | |
466 | // Create and launch helper task |
467 | int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, |
468 | DepObjCount, DepObjList); |
469 | |
470 | DP("omp_target_memcpy_async returns %d\n" , Rc); |
471 | return Rc; |
472 | } |
473 | |
474 | EXTERN int |
475 | omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, |
476 | int NumDims, const size_t *Volume, |
477 | const size_t *DstOffsets, const size_t *SrcOffsets, |
478 | const size_t *DstDimensions, const size_t *SrcDimensions, |
479 | int DstDevice, int SrcDevice) { |
480 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
481 | DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " |
482 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " |
483 | "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " |
484 | "volume " DPxMOD ", element size %zu, num_dims %d\n" , |
485 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), |
486 | DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), |
487 | DPxPTR(Volume), ElementSize, NumDims); |
488 | |
489 | if (!(Dst || Src)) { |
490 | DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n" , |
491 | INT_MAX); |
492 | return INT_MAX; |
493 | } |
494 | |
495 | if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume || |
496 | !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) { |
497 | REPORT("Call to omp_target_memcpy_rect with invalid arguments\n" ); |
498 | return OFFLOAD_FAIL; |
499 | } |
500 | |
501 | int Rc; |
502 | if (NumDims == 1) { |
503 | Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0], |
504 | ElementSize * DstOffsets[0], |
505 | ElementSize * SrcOffsets[0], DstDevice, SrcDevice); |
506 | } else { |
507 | size_t DstSliceSize = ElementSize; |
508 | size_t SrcSliceSize = ElementSize; |
509 | for (int I = 1; I < NumDims; ++I) { |
510 | DstSliceSize *= DstDimensions[I]; |
511 | SrcSliceSize *= SrcDimensions[I]; |
512 | } |
513 | |
514 | size_t DstOff = DstOffsets[0] * DstSliceSize; |
515 | size_t SrcOff = SrcOffsets[0] * SrcSliceSize; |
516 | for (size_t I = 0; I < Volume[0]; ++I) { |
517 | Rc = omp_target_memcpy_rect( |
518 | (char *)Dst + DstOff + DstSliceSize * I, |
519 | (char *)const_cast<void *>(Src) + SrcOff + SrcSliceSize * I, |
520 | ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1, |
521 | DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice); |
522 | |
523 | if (Rc) { |
524 | DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n" ); |
525 | return Rc; |
526 | } |
527 | } |
528 | } |
529 | |
530 | DP("omp_target_memcpy_rect returns %d\n" , Rc); |
531 | return Rc; |
532 | } |
533 | |
534 | EXTERN int omp_target_memcpy_rect_async( |
535 | void *Dst, const void *Src, size_t ElementSize, int NumDims, |
536 | const size_t *Volume, const size_t *DstOffsets, const size_t *SrcOffsets, |
537 | const size_t *DstDimensions, const size_t *SrcDimensions, int DstDevice, |
538 | int SrcDevice, int DepObjCount, omp_depend_t *DepObjList) { |
539 | TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(val: DstDevice) + |
540 | ";src_dev=" + std::to_string(val: SrcDevice) + |
541 | ";size=" + std::to_string(val: ElementSize) + |
542 | ";num_dims=" + std::to_string(val: NumDims)); |
543 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
544 | DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, " |
545 | "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " |
546 | "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " |
547 | "volume " DPxMOD ", element size %zu, num_dims %d\n" , |
548 | DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), |
549 | DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), |
550 | DPxPTR(Volume), ElementSize, NumDims); |
551 | |
552 | // Need to check this first to not return OFFLOAD_FAIL instead |
553 | if (!Dst && !Src) { |
554 | DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n" , |
555 | INT_MAX); |
556 | return INT_MAX; |
557 | } |
558 | |
559 | // Check the source and dest address |
560 | if (Dst == nullptr || Src == nullptr) |
561 | return OFFLOAD_FAIL; |
562 | |
563 | // Create task object |
564 | TargetMemcpyArgsTy *Args = new TargetMemcpyArgsTy( |
565 | Dst, Src, ElementSize, NumDims, Volume, DstOffsets, SrcOffsets, |
566 | DstDimensions, SrcDimensions, DstDevice, SrcDevice); |
567 | |
568 | // Create and launch helper task |
569 | int Rc = libomp_helper_task_creation(Args, &libomp_target_memcpy_async_task, |
570 | DepObjCount, DepObjList); |
571 | |
572 | DP("omp_target_memcpy_rect_async returns %d\n" , Rc); |
573 | return Rc; |
574 | } |
575 | |
576 | EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, |
577 | size_t Size, size_t DeviceOffset, |
578 | int DeviceNum) { |
579 | TIMESCOPE(); |
580 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
581 | DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " |
582 | "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n" , |
583 | DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); |
584 | |
585 | if (!HostPtr || !DevicePtr || Size <= 0) { |
586 | REPORT("Call to omp_target_associate_ptr with invalid arguments\n" ); |
587 | return OFFLOAD_FAIL; |
588 | } |
589 | |
590 | if (DeviceNum == omp_get_initial_device()) { |
591 | REPORT("omp_target_associate_ptr: no association possible on the host\n" ); |
592 | return OFFLOAD_FAIL; |
593 | } |
594 | |
595 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
596 | if (!DeviceOrErr) |
597 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
598 | |
599 | void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset); |
600 | int Rc = DeviceOrErr->getMappingInfo().associatePtr( |
601 | const_cast<void *>(HostPtr), const_cast<void *>(DeviceAddr), Size); |
602 | DP("omp_target_associate_ptr returns %d\n" , Rc); |
603 | return Rc; |
604 | } |
605 | |
606 | EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { |
607 | TIMESCOPE(); |
608 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
609 | DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " |
610 | "device_num %d\n" , |
611 | DPxPTR(HostPtr), DeviceNum); |
612 | |
613 | if (!HostPtr) { |
614 | REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n" ); |
615 | return OFFLOAD_FAIL; |
616 | } |
617 | |
618 | if (DeviceNum == omp_get_initial_device()) { |
619 | REPORT( |
620 | "omp_target_disassociate_ptr: no association possible on the host\n" ); |
621 | return OFFLOAD_FAIL; |
622 | } |
623 | |
624 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
625 | if (!DeviceOrErr) |
626 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
627 | |
628 | int Rc = DeviceOrErr->getMappingInfo().disassociatePtr( |
629 | const_cast<void *>(HostPtr)); |
630 | DP("omp_target_disassociate_ptr returns %d\n" , Rc); |
631 | return Rc; |
632 | } |
633 | |
634 | EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { |
635 | TIMESCOPE(); |
636 | OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); |
637 | DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n" , |
638 | DPxPTR(Ptr), DeviceNum); |
639 | |
640 | if (!Ptr) { |
641 | REPORT("Call to omp_get_mapped_ptr with nullptr.\n" ); |
642 | return nullptr; |
643 | } |
644 | |
645 | size_t NumDevices = omp_get_initial_device(); |
646 | if (DeviceNum == NumDevices) { |
647 | DP("Device %d is initial device, returning Ptr " DPxMOD ".\n" , |
648 | DeviceNum, DPxPTR(Ptr)); |
649 | return const_cast<void *>(Ptr); |
650 | } |
651 | |
652 | if (NumDevices <= DeviceNum) { |
653 | DP("DeviceNum %d is invalid, returning nullptr.\n" , DeviceNum); |
654 | return nullptr; |
655 | } |
656 | |
657 | auto DeviceOrErr = PM->getDevice(DeviceNum); |
658 | if (!DeviceOrErr) |
659 | FATAL_MESSAGE(DeviceNum, "%s" , toString(DeviceOrErr.takeError()).c_str()); |
660 | |
661 | TargetPointerResultTy TPR = |
662 | DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1, |
663 | /*UpdateRefCount=*/false, |
664 | /*UseHoldRefCount=*/false); |
665 | if (!TPR.isPresent()) { |
666 | DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n" , |
667 | DPxPTR(Ptr), DeviceNum); |
668 | return nullptr; |
669 | } |
670 | |
671 | DP("omp_get_mapped_ptr returns " DPxMOD ".\n" , DPxPTR(TPR.TargetPointer)); |
672 | |
673 | return TPR.TargetPointer; |
674 | } |
675 | |