1//===- PluginInterface.cpp - Target independent plugin device interface ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10
11#include "PluginInterface.h"
12
13#include "Shared/APITypes.h"
14#include "Shared/Debug.h"
15#include "Shared/Environment.h"
16#include "Shared/PluginAPI.h"
17
18#include "GlobalHandler.h"
19#include "JIT.h"
20#include "Utils/ELF.h"
21#include "omptarget.h"
22
23#ifdef OMPT_SUPPORT
24#include "OpenMP/OMPT/Callback.h"
25#include "omp-tools.h"
26#endif
27
28#include "llvm/Frontend/OpenMP/OMPConstants.h"
29#include "llvm/Support/Error.h"
30#include "llvm/Support/JSON.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Support/MemoryBuffer.h"
33
34#include <cstdint>
35#include <limits>
36
37using namespace llvm;
38using namespace omp;
39using namespace target;
40using namespace plugin;
41
42GenericPluginTy *PluginTy::SpecificPlugin = nullptr;
43
44// TODO: Fix any thread safety issues for multi-threaded kernel recording.
45struct RecordReplayTy {
46
47 // Describes the state of the record replay mechanism.
48 enum RRStatusTy { RRDeactivated = 0, RRRecording, RRReplaying };
49
50private:
51 // Memory pointers for recording, replaying memory.
52 void *MemoryStart = nullptr;
53 void *MemoryPtr = nullptr;
54 size_t MemorySize = 0;
55 size_t TotalSize = 0;
56 GenericDeviceTy *Device = nullptr;
57 std::mutex AllocationLock;
58
59 RRStatusTy Status = RRDeactivated;
60 bool ReplaySaveOutput = false;
61 bool UsedVAMap = false;
62 uintptr_t MemoryOffset = 0;
63
64 // A list of all globals mapped to the device.
65 struct GlobalEntry {
66 const char *Name;
67 uint64_t Size;
68 void *Addr;
69 };
70 llvm::SmallVector<GlobalEntry> GlobalEntries{};
71
72 void *suggestAddress(uint64_t MaxMemoryAllocation) {
73 // Get a valid pointer address for this system
74 void *Addr =
75 Device->allocate(1024, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
76 Device->free(Addr);
77 // Align Address to MaxMemoryAllocation
78 Addr = (void *)alignPtr((Addr), MaxMemoryAllocation);
79 return Addr;
80 }
81
82 Error preAllocateVAMemory(uint64_t MaxMemoryAllocation, void *VAddr) {
83 size_t ASize = MaxMemoryAllocation;
84
85 if (!VAddr && isRecording())
86 VAddr = suggestAddress(MaxMemoryAllocation);
87
88 DP("Request %ld bytes allocated at %p\n", MaxMemoryAllocation, VAddr);
89
90 if (auto Err = Device->memoryVAMap(&MemoryStart, VAddr, &ASize))
91 return Err;
92
93 if (isReplaying() && VAddr != MemoryStart) {
94 return Plugin::error("Record-Replay cannot assign the"
95 "requested recorded address (%p, %p)",
96 VAddr, MemoryStart);
97 }
98
99 INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
100 "Allocated %" PRIu64 " bytes at %p for replay.\n", ASize, MemoryStart);
101
102 MemoryPtr = MemoryStart;
103 MemorySize = 0;
104 TotalSize = ASize;
105 UsedVAMap = true;
106 return Plugin::success();
107 }
108
109 Error preAllocateHeuristic(uint64_t MaxMemoryAllocation,
110 uint64_t RequiredMemoryAllocation, void *VAddr) {
111 const size_t MAX_MEMORY_ALLOCATION = MaxMemoryAllocation;
112 constexpr size_t STEP = 1024 * 1024 * 1024ULL;
113 MemoryStart = nullptr;
114 for (TotalSize = MAX_MEMORY_ALLOCATION; TotalSize > 0; TotalSize -= STEP) {
115 MemoryStart =
116 Device->allocate(TotalSize, /*HstPtr=*/nullptr, TARGET_ALLOC_DEFAULT);
117 if (MemoryStart)
118 break;
119 }
120 if (!MemoryStart)
121 return Plugin::error("Allocating record/replay memory");
122
123 if (VAddr && VAddr != MemoryStart)
124 MemoryOffset = uintptr_t(VAddr) - uintptr_t(MemoryStart);
125
126 MemoryPtr = MemoryStart;
127 MemorySize = 0;
128
129 // Check if we need adjustment.
130 if (MemoryOffset > 0 &&
131 TotalSize >= RequiredMemoryAllocation + MemoryOffset) {
132 // If we are off but "before" the required address and with enough space,
133 // we just "allocate" the offset to match the required address.
134 MemoryPtr = (char *)MemoryPtr + MemoryOffset;
135 MemorySize += MemoryOffset;
136 MemoryOffset = 0;
137 assert(MemoryPtr == VAddr && "Expected offset adjustment to work");
138 } else if (MemoryOffset) {
139 // If we are off and in a situation we cannot just "waste" memory to force
140 // a match, we hope adjusting the arguments is sufficient.
141 REPORT(
142 "WARNING Failed to allocate replay memory at required location %p, "
143 "got %p, trying to offset argument pointers by %" PRIi64 "\n",
144 VAddr, MemoryStart, MemoryOffset);
145 }
146
147 INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
148 "Allocated %" PRIu64 " bytes at %p for replay.\n", TotalSize,
149 MemoryStart);
150
151 return Plugin::success();
152 }
153
154 Error preallocateDeviceMemory(uint64_t DeviceMemorySize, void *ReqVAddr) {
155 if (Device->supportVAManagement()) {
156 auto Err = preAllocateVAMemory(MaxMemoryAllocation: DeviceMemorySize, VAddr: ReqVAddr);
157 if (Err) {
158 REPORT("WARNING VA mapping failed, fallback to heuristic: "
159 "(Error: %s)\n",
160 toString(E: std::move(Err)).data());
161 }
162 }
163
164 uint64_t DevMemSize;
165 if (Device->getDeviceMemorySize(DevMemSize))
166 return Plugin::error("Cannot determine Device Memory Size");
167
168 return preAllocateHeuristic(MaxMemoryAllocation: DevMemSize, RequiredMemoryAllocation: DeviceMemorySize, VAddr: ReqVAddr);
169 }
170
171 void dumpDeviceMemory(StringRef Filename) {
172 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> DeviceMemoryMB =
173 WritableMemoryBuffer::getNewUninitMemBuffer(MemorySize);
174 if (!DeviceMemoryMB)
175 report_fatal_error(reason: "Error creating MemoryBuffer for device memory");
176
177 auto Err = Device->dataRetrieve(DeviceMemoryMB.get()->getBufferStart(),
178 MemoryStart, MemorySize, nullptr);
179 if (Err)
180 report_fatal_error(reason: "Error retrieving data for target pointer");
181
182 StringRef DeviceMemory(DeviceMemoryMB.get()->getBufferStart(), MemorySize);
183 std::error_code EC;
184 raw_fd_ostream OS(Filename, EC);
185 if (EC)
186 report_fatal_error(reason: "Error dumping memory to file " + Filename + " :" +
187 EC.message());
188 OS << DeviceMemory;
189 OS.close();
190 }
191
192public:
193 bool isRecording() const { return Status == RRStatusTy::RRRecording; }
194 bool isReplaying() const { return Status == RRStatusTy::RRReplaying; }
195 bool isRecordingOrReplaying() const {
196 return (Status != RRStatusTy::RRDeactivated);
197 }
198 void setStatus(RRStatusTy Status) { this->Status = Status; }
199 bool isSaveOutputEnabled() const { return ReplaySaveOutput; }
200 void addEntry(const char *Name, uint64_t Size, void *Addr) {
201 GlobalEntries.emplace_back(GlobalEntry{Name, Size, Addr});
202 }
203
204 void saveImage(const char *Name, const DeviceImageTy &Image) {
205 SmallString<128> ImageName = {Name, ".image"};
206 std::error_code EC;
207 raw_fd_ostream OS(ImageName, EC);
208 if (EC)
209 report_fatal_error(reason: "Error saving image : " + StringRef(EC.message()));
210 if (const auto *TgtImageBitcode = Image.getTgtImageBitcode()) {
211 size_t Size =
212 getPtrDiff(TgtImageBitcode->ImageEnd, TgtImageBitcode->ImageStart);
213 MemoryBufferRef MBR = MemoryBufferRef(
214 StringRef((const char *)TgtImageBitcode->ImageStart, Size), "");
215 OS << MBR.getBuffer();
216 } else {
217 OS << Image.getMemoryBuffer().getBuffer();
218 }
219 OS.close();
220 }
221
222 void dumpGlobals(StringRef Filename, DeviceImageTy &Image) {
223 int32_t Size = 0;
224
225 for (auto &OffloadEntry : GlobalEntries) {
226 if (!OffloadEntry.Size)
227 continue;
228 // Get the total size of the string and entry including the null byte.
229 Size += std::strlen(OffloadEntry.Name) + 1 + sizeof(uint32_t) +
230 OffloadEntry.Size;
231 }
232
233 ErrorOr<std::unique_ptr<WritableMemoryBuffer>> GlobalsMB =
234 WritableMemoryBuffer::getNewUninitMemBuffer(Size);
235 if (!GlobalsMB)
236 report_fatal_error(reason: "Error creating MemoryBuffer for globals memory");
237
238 void *BufferPtr = GlobalsMB.get()->getBufferStart();
239 for (auto &OffloadEntry : GlobalEntries) {
240 if (!OffloadEntry.Size)
241 continue;
242
243 int32_t NameLength = std::strlen(OffloadEntry.Name) + 1;
244 memcpy(BufferPtr, OffloadEntry.Name, NameLength);
245 BufferPtr = advanceVoidPtr(BufferPtr, NameLength);
246
247 *((uint32_t *)(BufferPtr)) = OffloadEntry.Size;
248 BufferPtr = advanceVoidPtr(BufferPtr, sizeof(uint32_t));
249
250 auto Err = Plugin::success();
251 {
252 if (auto Err = Device->dataRetrieve(BufferPtr, OffloadEntry.Addr,
253 OffloadEntry.Size, nullptr))
254 report_fatal_error("Error retrieving data for global");
255 }
256 if (Err)
257 report_fatal_error("Error retrieving data for global");
258 BufferPtr = advanceVoidPtr(BufferPtr, OffloadEntry.Size);
259 }
260 assert(BufferPtr == GlobalsMB->get()->getBufferEnd() &&
261 "Buffer over/under-filled.");
262 assert(Size == getPtrDiff(BufferPtr, GlobalsMB->get()->getBufferStart()) &&
263 "Buffer size mismatch");
264
265 StringRef GlobalsMemory(GlobalsMB.get()->getBufferStart(), Size);
266 std::error_code EC;
267 raw_fd_ostream OS(Filename, EC);
268 OS << GlobalsMemory;
269 OS.close();
270 }
271
272 void saveKernelDescr(const char *Name, void **ArgPtrs, int32_t NumArgs,
273 uint64_t NumTeamsClause, uint32_t ThreadLimitClause,
274 uint64_t LoopTripCount) {
275 json::Object JsonKernelInfo;
276 JsonKernelInfo["Name"] = Name;
277 JsonKernelInfo["NumArgs"] = NumArgs;
278 JsonKernelInfo["NumTeamsClause"] = NumTeamsClause;
279 JsonKernelInfo["ThreadLimitClause"] = ThreadLimitClause;
280 JsonKernelInfo["LoopTripCount"] = LoopTripCount;
281 JsonKernelInfo["DeviceMemorySize"] = MemorySize;
282 JsonKernelInfo["DeviceId"] = Device->getDeviceId();
283 JsonKernelInfo["BumpAllocVAStart"] = (intptr_t)MemoryStart;
284
285 json::Array JsonArgPtrs;
286 for (int I = 0; I < NumArgs; ++I)
287 JsonArgPtrs.push_back((intptr_t)ArgPtrs[I]);
288 JsonKernelInfo["ArgPtrs"] = json::Value(std::move(JsonArgPtrs));
289
290 json::Array JsonArgOffsets;
291 for (int I = 0; I < NumArgs; ++I)
292 JsonArgOffsets.push_back(0);
293 JsonKernelInfo["ArgOffsets"] = json::Value(std::move(JsonArgOffsets));
294
295 SmallString<128> JsonFilename = {Name, ".json"};
296 std::error_code EC;
297 raw_fd_ostream JsonOS(JsonFilename.str(), EC);
298 if (EC)
299 report_fatal_error(reason: "Error saving kernel json file : " +
300 StringRef(EC.message()));
301 JsonOS << json::Value(std::move(JsonKernelInfo));
302 JsonOS.close();
303 }
304
305 void saveKernelInput(const char *Name, DeviceImageTy &Image) {
306 SmallString<128> GlobalsFilename = {Name, ".globals"};
307 dumpGlobals(GlobalsFilename, Image);
308
309 SmallString<128> MemoryFilename = {Name, ".memory"};
310 dumpDeviceMemory(Filename: MemoryFilename);
311 }
312
313 void saveKernelOutputInfo(const char *Name) {
314 SmallString<128> OutputFilename = {
315 Name, (isRecording() ? ".original.output" : ".replay.output")};
316 dumpDeviceMemory(Filename: OutputFilename);
317 }
318
319 void *alloc(uint64_t Size) {
320 assert(MemoryStart && "Expected memory has been pre-allocated");
321 void *Alloc = nullptr;
322 constexpr int Alignment = 16;
323 // Assumes alignment is a power of 2.
324 int64_t AlignedSize = (Size + (Alignment - 1)) & (~(Alignment - 1));
325 std::lock_guard<std::mutex> LG(AllocationLock);
326 Alloc = MemoryPtr;
327 MemoryPtr = (char *)MemoryPtr + AlignedSize;
328 MemorySize += AlignedSize;
329 DP("Memory Allocator return " DPxMOD "\n", DPxPTR(Alloc));
330 return Alloc;
331 }
332
333 Error init(GenericDeviceTy *Device, uint64_t MemSize, void *VAddr,
334 RRStatusTy Status, bool SaveOutput, uint64_t &ReqPtrArgOffset) {
335 this->Device = Device;
336 this->Status = Status;
337 this->ReplaySaveOutput = SaveOutput;
338
339 if (auto Err = preallocateDeviceMemory(DeviceMemorySize: MemSize, ReqVAddr: VAddr))
340 return Err;
341
342 INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device->getDeviceId(),
343 "Record Replay Initialized (%p)"
344 " as starting address, %lu Memory Size"
345 " and set on status %s\n",
346 MemoryStart, TotalSize,
347 Status == RRStatusTy::RRRecording ? "Recording" : "Replaying");
348
349 // Tell the user to offset pointer arguments as the memory allocation does
350 // not match.
351 ReqPtrArgOffset = MemoryOffset;
352 return Plugin::success();
353 }
354
355 void deinit() {
356 if (UsedVAMap) {
357 if (auto Err = Device->memoryVAUnMap(MemoryStart, TotalSize))
358 report_fatal_error(reason: "Error on releasing virtual memory space");
359 } else {
360 Device->free(MemoryStart);
361 }
362 }
363};
364
365static RecordReplayTy RecordReplay;
366
367// Extract the mapping of host function pointers to device function pointers
368// from the entry table. Functions marked as 'indirect' in OpenMP will have
369// offloading entries generated for them which map the host's function pointer
370// to a global containing the corresponding function pointer on the device.
371static Expected<std::pair<void *, uint64_t>>
372setupIndirectCallTable(GenericPluginTy &Plugin, GenericDeviceTy &Device,
373 DeviceImageTy &Image) {
374 GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
375
376 llvm::ArrayRef<__tgt_offload_entry> Entries(Image.getTgtImage()->EntriesBegin,
377 Image.getTgtImage()->EntriesEnd);
378 llvm::SmallVector<std::pair<void *, void *>> IndirectCallTable;
379 for (const auto &Entry : Entries) {
380 if (Entry.size == 0 || !(Entry.flags & OMP_DECLARE_TARGET_INDIRECT))
381 continue;
382
383 assert(Entry.size == sizeof(void *) && "Global not a function pointer?");
384 auto &[HstPtr, DevPtr] = IndirectCallTable.emplace_back();
385
386 GlobalTy DeviceGlobal(Entry.name, Entry.size);
387 if (auto Err =
388 Handler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal))
389 return std::move(Err);
390
391 HstPtr = Entry.addr;
392 if (auto Err = Device.dataRetrieve(&DevPtr, DeviceGlobal.getPtr(),
393 Entry.size, nullptr))
394 return std::move(Err);
395 }
396
397 // If we do not have any indirect globals we exit early.
398 if (IndirectCallTable.empty())
399 return std::pair{nullptr, 0};
400
401 // Sort the array to allow for more efficient lookup of device pointers.
402 llvm::sort(IndirectCallTable,
403 [](const auto &x, const auto &y) { return x.first < y.first; });
404
405 uint64_t TableSize =
406 IndirectCallTable.size() * sizeof(std::pair<void *, void *>);
407 void *DevicePtr = Device.allocate(TableSize, nullptr, TARGET_ALLOC_DEVICE);
408 if (auto Err = Device.dataSubmit(DevicePtr, IndirectCallTable.data(),
409 TableSize, nullptr))
410 return std::move(Err);
411 return std::pair<void *, uint64_t>(DevicePtr, IndirectCallTable.size());
412}
413
414AsyncInfoWrapperTy::AsyncInfoWrapperTy(GenericDeviceTy &Device,
415 __tgt_async_info *AsyncInfoPtr)
416 : Device(Device),
417 AsyncInfoPtr(AsyncInfoPtr ? AsyncInfoPtr : &LocalAsyncInfo) {}
418
419void AsyncInfoWrapperTy::finalize(Error &Err) {
420 assert(AsyncInfoPtr && "AsyncInfoWrapperTy already finalized");
421
422 // If we used a local async info object we want synchronous behavior. In that
423 // case, and assuming the current status code is correct, we will synchronize
424 // explicitly when the object is deleted. Update the error with the result of
425 // the synchronize operation.
426 if (AsyncInfoPtr == &LocalAsyncInfo && LocalAsyncInfo.Queue && !Err)
427 Err = Device.synchronize(&LocalAsyncInfo);
428
429 // Invalidate the wrapper object.
430 AsyncInfoPtr = nullptr;
431}
432
433Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
434 DeviceImageTy &Image) {
435
436 ImagePtr = &Image;
437
438 // Retrieve kernel environment object for the kernel.
439 GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
440 sizeof(KernelEnvironment), &KernelEnvironment);
441 GenericGlobalHandlerTy &GHandler = GenericDevice.Plugin.getGlobalHandler();
442 if (auto Err =
443 GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
444 [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
445 DP("Failed to read kernel environment for '%s': %s\n"
446 "Using default SPMD (2) execution mode\n",
447 Name, ErrStr.data());
448 assert(KernelEnvironment.Configuration.ReductionDataSize == 0 &&
449 "Default initialization failed.");
450 IsBareKernel = true;
451 }
452
453 // Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
454 MaxNumThreads = KernelEnvironment.Configuration.MaxThreads > 0
455 ? std::min(KernelEnvironment.Configuration.MaxThreads,
456 int32_t(GenericDevice.getThreadLimit()))
457 : GenericDevice.getThreadLimit();
458
459 // Pref = Config.Pref > 0 ? max(Config.Pref, Device.Pref) : Device.Pref;
460 PreferredNumThreads =
461 KernelEnvironment.Configuration.MinThreads > 0
462 ? std::max(KernelEnvironment.Configuration.MinThreads,
463 int32_t(GenericDevice.getDefaultNumThreads()))
464 : GenericDevice.getDefaultNumThreads();
465
466 return initImpl(GenericDevice, Image);
467}
468
469Expected<KernelLaunchEnvironmentTy *>
470GenericKernelTy::getKernelLaunchEnvironment(
471 GenericDeviceTy &GenericDevice, uint32_t Version,
472 AsyncInfoWrapperTy &AsyncInfoWrapper) const {
473 // Ctor/Dtor have no arguments, replaying uses the original kernel launch
474 // environment. Older versions of the compiler do not generate a kernel
475 // launch environment.
476 if (isCtorOrDtor() || RecordReplay.isReplaying() ||
477 Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
478 return nullptr;
479
480 if (!KernelEnvironment.Configuration.ReductionDataSize ||
481 !KernelEnvironment.Configuration.ReductionBufferLength)
482 return reinterpret_cast<KernelLaunchEnvironmentTy *>(~0);
483
484 // TODO: Check if the kernel needs a launch environment.
485 auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy),
486 /*HostPtr=*/nullptr,
487 TargetAllocTy::TARGET_ALLOC_DEVICE);
488 if (!AllocOrErr)
489 return AllocOrErr.takeError();
490
491 // Remember to free the memory later.
492 AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr);
493
494 /// Use the KLE in the __tgt_async_info to ensure a stable address for the
495 /// async data transfer.
496 auto &LocalKLE = (*AsyncInfoWrapper).KernelLaunchEnvironment;
497 LocalKLE = KernelLaunchEnvironment;
498 {
499 auto AllocOrErr = GenericDevice.dataAlloc(
500 KernelEnvironment.Configuration.ReductionDataSize *
501 KernelEnvironment.Configuration.ReductionBufferLength,
502 /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE);
503 if (!AllocOrErr)
504 return AllocOrErr.takeError();
505 LocalKLE.ReductionBuffer = *AllocOrErr;
506 // Remember to free the memory later.
507 AsyncInfoWrapper.freeAllocationAfterSynchronization(*AllocOrErr);
508 }
509
510 INFO(OMP_INFOTYPE_DATA_TRANSFER, GenericDevice.getDeviceId(),
511 "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
512 ", Size=%" PRId64 ", Name=KernelLaunchEnv\n",
513 DPxPTR(&LocalKLE), DPxPTR(*AllocOrErr),
514 sizeof(KernelLaunchEnvironmentTy));
515
516 auto Err = GenericDevice.dataSubmit(*AllocOrErr, &LocalKLE,
517 sizeof(KernelLaunchEnvironmentTy),
518 AsyncInfoWrapper);
519 if (Err)
520 return Err;
521 return static_cast<KernelLaunchEnvironmentTy *>(*AllocOrErr);
522}
523
524Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice,
525 KernelArgsTy &KernelArgs,
526 uint32_t NumThreads,
527 uint64_t NumBlocks) const {
528 INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
529 "Launching kernel %s with %" PRIu64
530 " blocks and %d threads in %s mode\n",
531 getName(), NumBlocks, NumThreads, getExecutionModeName());
532 return printLaunchInfoDetails(GenericDevice, KernelArgs, NumThreads,
533 NumBlocks);
534}
535
536Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
537 KernelArgsTy &KernelArgs,
538 uint32_t NumThreads,
539 uint64_t NumBlocks) const {
540 return Plugin::success();
541}
542
543Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
544 ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs,
545 AsyncInfoWrapperTy &AsyncInfoWrapper) const {
546 llvm::SmallVector<void *, 16> Args;
547 llvm::SmallVector<void *, 16> Ptrs;
548
549 auto KernelLaunchEnvOrErr = getKernelLaunchEnvironment(
550 GenericDevice, KernelArgs.Version, AsyncInfoWrapper);
551 if (!KernelLaunchEnvOrErr)
552 return KernelLaunchEnvOrErr.takeError();
553
554 void *KernelArgsPtr =
555 prepareArgs(GenericDevice, ArgPtrs, ArgOffsets, KernelArgs.NumArgs, Args,
556 Ptrs, *KernelLaunchEnvOrErr);
557
558 uint32_t NumThreads = getNumThreads(GenericDevice, KernelArgs.ThreadLimit);
559 uint64_t NumBlocks =
560 getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount,
561 NumThreads, KernelArgs.ThreadLimit[0] > 0);
562
563 // Record the kernel description after we modified the argument count and num
564 // blocks/threads.
565 if (RecordReplay.isRecording()) {
566 RecordReplay.saveImage(getName(), getImage());
567 RecordReplay.saveKernelInput(getName(), getImage());
568 RecordReplay.saveKernelDescr(getName(), Ptrs.data(), KernelArgs.NumArgs,
569 NumBlocks, NumThreads, KernelArgs.Tripcount);
570 }
571
572 if (auto Err =
573 printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
574 return Err;
575
576 return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
577 KernelArgsPtr, AsyncInfoWrapper);
578}
579
580void *GenericKernelTy::prepareArgs(
581 GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets,
582 uint32_t &NumArgs, llvm::SmallVectorImpl<void *> &Args,
583 llvm::SmallVectorImpl<void *> &Ptrs,
584 KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const {
585 if (isCtorOrDtor())
586 return nullptr;
587
588 uint32_t KLEOffset = !!KernelLaunchEnvironment;
589 NumArgs += KLEOffset;
590
591 if (NumArgs == 0)
592 return nullptr;
593
594 Args.resize(NumArgs);
595 Ptrs.resize(NumArgs);
596
597 if (KernelLaunchEnvironment) {
598 Ptrs[0] = KernelLaunchEnvironment;
599 Args[0] = &Ptrs[0];
600 }
601
602 for (int I = KLEOffset; I < NumArgs; ++I) {
603 Ptrs[I] =
604 (void *)((intptr_t)ArgPtrs[I - KLEOffset] + ArgOffsets[I - KLEOffset]);
605 Args[I] = &Ptrs[I];
606 }
607 return &Args[0];
608}
609
610uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
611 uint32_t ThreadLimitClause[3]) const {
612 assert(ThreadLimitClause[1] == 0 && ThreadLimitClause[2] == 0 &&
613 "Multi dimensional launch not supported yet.");
614
615 if (IsBareKernel && ThreadLimitClause[0] > 0)
616 return ThreadLimitClause[0];
617
618 if (ThreadLimitClause[0] > 0 && isGenericMode())
619 ThreadLimitClause[0] += GenericDevice.getWarpSize();
620
621 return std::min(MaxNumThreads, (ThreadLimitClause[0] > 0)
622 ? ThreadLimitClause[0]
623 : PreferredNumThreads);
624}
625
626uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
627 uint32_t NumTeamsClause[3],
628 uint64_t LoopTripCount,
629 uint32_t &NumThreads,
630 bool IsNumThreadsFromUser) const {
631 assert(NumTeamsClause[1] == 0 && NumTeamsClause[2] == 0 &&
632 "Multi dimensional launch not supported yet.");
633
634 if (IsBareKernel && NumTeamsClause[0] > 0)
635 return NumTeamsClause[0];
636
637 if (NumTeamsClause[0] > 0) {
638 // TODO: We need to honor any value and consequently allow more than the
639 // block limit. For this we might need to start multiple kernels or let the
640 // blocks start again until the requested number has been started.
641 return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit());
642 }
643
644 uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks();
645 uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max();
646 if (LoopTripCount > 0) {
647 if (isSPMDMode()) {
648 // We have a combined construct, i.e. `target teams distribute
649 // parallel for [simd]`. We launch so many teams so that each thread
650 // will execute one iteration of the loop; rounded up to the nearest
651 // integer. However, if that results in too few teams, we artificially
652 // reduce the thread count per team to increase the outer parallelism.
653 auto MinThreads = GenericDevice.getMinThreadsForLowTripCountLoop();
654 MinThreads = std::min(MinThreads, NumThreads);
655
656 // Honor the thread_limit clause; only lower the number of threads.
657 [[maybe_unused]] auto OldNumThreads = NumThreads;
658 if (LoopTripCount >= DefaultNumBlocks * NumThreads ||
659 IsNumThreadsFromUser) {
660 // Enough parallelism for teams and threads.
661 TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
662 assert(IsNumThreadsFromUser ||
663 TripCountNumBlocks >= DefaultNumBlocks &&
664 "Expected sufficient outer parallelism.");
665 } else if (LoopTripCount >= DefaultNumBlocks * MinThreads) {
666 // Enough parallelism for teams, limit threads.
667
668 // This case is hard; for now, we force "full warps":
669 // First, compute a thread count assuming DefaultNumBlocks.
670 auto NumThreadsDefaultBlocks =
671 (LoopTripCount + DefaultNumBlocks - 1) / DefaultNumBlocks;
672 // Now get a power of two that is larger or equal.
673 auto NumThreadsDefaultBlocksP2 =
674 llvm::PowerOf2Ceil(NumThreadsDefaultBlocks);
675 // Do not increase a thread limit given be the user.
676 NumThreads = std::min(NumThreads, uint32_t(NumThreadsDefaultBlocksP2));
677 assert(NumThreads >= MinThreads &&
678 "Expected sufficient inner parallelism.");
679 TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
680 } else {
681 // Not enough parallelism for teams and threads, limit both.
682 NumThreads = std::min(NumThreads, MinThreads);
683 TripCountNumBlocks = ((LoopTripCount - 1) / NumThreads) + 1;
684 }
685
686 assert(NumThreads * TripCountNumBlocks >= LoopTripCount &&
687 "Expected sufficient parallelism");
688 assert(OldNumThreads >= NumThreads &&
689 "Number of threads cannot be increased!");
690 } else {
691 assert((isGenericMode() || isGenericSPMDMode()) &&
692 "Unexpected execution mode!");
693 // If we reach this point, then we have a non-combined construct, i.e.
694 // `teams distribute` with a nested `parallel for` and each team is
695 // assigned one iteration of the `distribute` loop. E.g.:
696 //
697 // #pragma omp target teams distribute
698 // for(...loop_tripcount...) {
699 // #pragma omp parallel for
700 // for(...) {}
701 // }
702 //
703 // Threads within a team will execute the iterations of the `parallel`
704 // loop.
705 TripCountNumBlocks = LoopTripCount;
706 }
707 }
708 // If the loops are long running we rather reuse blocks than spawn too many.
709 uint32_t PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks);
710 return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit());
711}
712
713GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
714 int32_t NumDevices,
715 const llvm::omp::GV &OMPGridValues)
716 : Plugin(Plugin), MemoryManager(nullptr), OMP_TeamLimit("OMP_TEAM_LIMIT"),
717 OMP_NumTeams("OMP_NUM_TEAMS"),
718 OMP_TeamsThreadLimit("OMP_TEAMS_THREAD_LIMIT"),
719 OMPX_DebugKind("LIBOMPTARGET_DEVICE_RTL_DEBUG"),
720 OMPX_SharedMemorySize("LIBOMPTARGET_SHARED_MEMORY_SIZE"),
721 // Do not initialize the following two envars since they depend on the
722 // device initialization. These cannot be consulted until the device is
723 // initialized correctly. We intialize them in GenericDeviceTy::init().
724 OMPX_TargetStackSize(), OMPX_TargetHeapSize(),
725 // By default, the initial number of streams and events is 1.
726 OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 1),
727 OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 1),
728 DeviceId(DeviceId), GridValues(OMPGridValues),
729 PeerAccesses(NumDevices, PeerAccessState::PENDING), PeerAccessesLock(),
730 PinnedAllocs(*this), RPCServer(nullptr) {
731#ifdef OMPT_SUPPORT
732 OmptInitialized.store(false);
733 // Bind the callbacks to this device's member functions
734#define bindOmptCallback(Name, Type, Code) \
735 if (ompt::Initialized && ompt::lookupCallbackByCode) { \
736 ompt::lookupCallbackByCode((ompt_callbacks_t)(Code), \
737 ((ompt_callback_t *)&(Name##_fn))); \
738 DP("OMPT: class bound %s=%p\n", #Name, ((void *)(uint64_t)Name##_fn)); \
739 }
740
741 FOREACH_OMPT_DEVICE_EVENT(bindOmptCallback);
742#undef bindOmptCallback
743
744#endif
745}
746
747Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
748 if (auto Err = initImpl(Plugin))
749 return Err;
750
751#ifdef OMPT_SUPPORT
752 if (ompt::Initialized) {
753 bool ExpectedStatus = false;
754 if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true))
755 performOmptCallback(device_initialize, /*device_num=*/DeviceId +
756 Plugin.getDeviceIdStartIndex(),
757 /*type=*/getComputeUnitKind().c_str(),
758 /*device=*/reinterpret_cast<ompt_device_t *>(this),
759 /*lookup=*/ompt::lookupCallbackByName,
760 /*documentation=*/nullptr);
761 }
762#endif
763
764 // Read and reinitialize the envars that depend on the device initialization.
765 // Notice these two envars may change the stack size and heap size of the
766 // device, so they need the device properly initialized.
767 auto StackSizeEnvarOrErr = UInt64Envar::create(
768 "LIBOMPTARGET_STACK_SIZE",
769 [this](uint64_t &V) -> Error { return getDeviceStackSize(V); },
770 [this](uint64_t V) -> Error { return setDeviceStackSize(V); });
771 if (!StackSizeEnvarOrErr)
772 return StackSizeEnvarOrErr.takeError();
773 OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
774
775 auto HeapSizeEnvarOrErr = UInt64Envar::create(
776 "LIBOMPTARGET_HEAP_SIZE",
777 [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
778 [this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
779 if (!HeapSizeEnvarOrErr)
780 return HeapSizeEnvarOrErr.takeError();
781 OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
782
783 // Update the maximum number of teams and threads after the device
784 // initialization sets the corresponding hardware limit.
785 if (OMP_NumTeams > 0)
786 GridValues.GV_Max_Teams =
787 std::min(GridValues.GV_Max_Teams, uint32_t(OMP_NumTeams));
788
789 if (OMP_TeamsThreadLimit > 0)
790 GridValues.GV_Max_WG_Size =
791 std::min(GridValues.GV_Max_WG_Size, uint32_t(OMP_TeamsThreadLimit));
792
793 // Enable the memory manager if required.
794 auto [ThresholdMM, EnableMM] = MemoryManagerTy::getSizeThresholdFromEnv();
795 if (EnableMM)
796 MemoryManager = new MemoryManagerTy(*this, ThresholdMM);
797
798 return Plugin::success();
799}
800
801Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
802 for (DeviceImageTy *Image : LoadedImages)
803 if (auto Err = callGlobalDestructors(Plugin, *Image))
804 return Err;
805
806 if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
807 GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
808 for (auto *Image : LoadedImages) {
809 DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
810 GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
811 sizeof(DeviceMemoryPoolTrackingTy),
812 &ImageDeviceMemoryPoolTracking);
813 if (auto Err =
814 GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
815 consumeError(std::move(Err));
816 continue;
817 }
818 DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
819 }
820
821 // TODO: Write this by default into a file.
822 printf("\n\n|-----------------------\n"
823 "| Device memory tracker:\n"
824 "|-----------------------\n"
825 "| #Allocations: %lu\n"
826 "| Byes allocated: %lu\n"
827 "| Minimal allocation: %lu\n"
828 "| Maximal allocation: %lu\n"
829 "|-----------------------\n\n\n",
830 DeviceMemoryPoolTracking.NumAllocations,
831 DeviceMemoryPoolTracking.AllocationTotal,
832 DeviceMemoryPoolTracking.AllocationMin,
833 DeviceMemoryPoolTracking.AllocationMax);
834 }
835
836 // Delete the memory manager before deinitializing the device. Otherwise,
837 // we may delete device allocations after the device is deinitialized.
838 if (MemoryManager)
839 delete MemoryManager;
840 MemoryManager = nullptr;
841
842 if (RecordReplay.isRecordingOrReplaying())
843 RecordReplay.deinit();
844
845 if (RPCServer)
846 if (auto Err = RPCServer->deinitDevice(*this))
847 return Err;
848
849#ifdef OMPT_SUPPORT
850 if (ompt::Initialized) {
851 bool ExpectedStatus = true;
852 if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false))
853 performOmptCallback(device_finalize,
854 /*device_num=*/DeviceId +
855 Plugin.getDeviceIdStartIndex());
856 }
857#endif
858
859 return deinitImpl();
860}
861Expected<DeviceImageTy *>
862GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
863 const __tgt_device_image *InputTgtImage) {
864 assert(InputTgtImage && "Expected non-null target image");
865 DP("Load data from image " DPxMOD "\n", DPxPTR(InputTgtImage->ImageStart));
866
867 auto PostJITImageOrErr = Plugin.getJIT().process(*InputTgtImage, *this);
868 if (!PostJITImageOrErr) {
869 auto Err = PostJITImageOrErr.takeError();
870 REPORT("Failure to jit IR image %p on device %d: %s\n", InputTgtImage,
871 DeviceId, toString(std::move(Err)).data());
872 return nullptr;
873 }
874
875 // Load the binary and allocate the image object. Use the next available id
876 // for the image id, which is the number of previously loaded images.
877 auto ImageOrErr =
878 loadBinaryImpl(PostJITImageOrErr.get(), LoadedImages.size());
879 if (!ImageOrErr)
880 return ImageOrErr.takeError();
881
882 DeviceImageTy *Image = *ImageOrErr;
883 assert(Image != nullptr && "Invalid image");
884 if (InputTgtImage != PostJITImageOrErr.get())
885 Image->setTgtImageBitcode(InputTgtImage);
886
887 // Add the image to list.
888 LoadedImages.push_back(Image);
889
890 // Setup the device environment if needed.
891 if (auto Err = setupDeviceEnvironment(Plugin, *Image))
892 return std::move(Err);
893
894 // Setup the global device memory pool if needed.
895 if (!RecordReplay.isReplaying() && shouldSetupDeviceMemoryPool()) {
896 uint64_t HeapSize;
897 auto SizeOrErr = getDeviceHeapSize(HeapSize);
898 if (SizeOrErr) {
899 REPORT("No global device memory pool due to error: %s\n",
900 toString(std::move(SizeOrErr)).data());
901 } else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize))
902 return std::move(Err);
903 }
904
905 if (auto Err = setupRPCServer(Plugin, *Image))
906 return std::move(Err);
907
908#ifdef OMPT_SUPPORT
909 if (ompt::Initialized) {
910 size_t Bytes =
911 getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart);
912 performOmptCallback(
913 device_load, /*device_num=*/DeviceId + Plugin.getDeviceIdStartIndex(),
914 /*FileName=*/nullptr, /*FileOffset=*/0, /*VmaInFile=*/nullptr,
915 /*ImgSize=*/Bytes, /*HostAddr=*/InputTgtImage->ImageStart,
916 /*DeviceAddr=*/nullptr, /* FIXME: ModuleId */ 0);
917 }
918#endif
919
920 // Call any global constructors present on the device.
921 if (auto Err = callGlobalConstructors(Plugin, *Image))
922 return std::move(Err);
923
924 // Return the pointer to the table of entries.
925 return Image;
926}
927
928Error GenericDeviceTy::setupDeviceEnvironment(GenericPluginTy &Plugin,
929 DeviceImageTy &Image) {
930 // There are some plugins that do not need this step.
931 if (!shouldSetupDeviceEnvironment())
932 return Plugin::success();
933
934 // Obtain a table mapping host function pointers to device function pointers.
935 auto CallTablePairOrErr = setupIndirectCallTable(Plugin, *this, Image);
936 if (!CallTablePairOrErr)
937 return CallTablePairOrErr.takeError();
938
939 DeviceEnvironmentTy DeviceEnvironment;
940 DeviceEnvironment.DeviceDebugKind = OMPX_DebugKind;
941 DeviceEnvironment.NumDevices = Plugin.getNumDevices();
942 // TODO: The device ID used here is not the real device ID used by OpenMP.
943 DeviceEnvironment.DeviceNum = DeviceId;
944 DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
945 DeviceEnvironment.ClockFrequency = getClockFrequency();
946 DeviceEnvironment.IndirectCallTable =
947 reinterpret_cast<uintptr_t>(CallTablePairOrErr->first);
948 DeviceEnvironment.IndirectCallTableSize = CallTablePairOrErr->second;
949 DeviceEnvironment.HardwareParallelism = getHardwareParallelism();
950
951 // Create the metainfo of the device environment global.
952 GlobalTy DevEnvGlobal("__omp_rtl_device_environment",
953 sizeof(DeviceEnvironmentTy), &DeviceEnvironment);
954
955 // Write device environment values to the device.
956 GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
957 if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {
958 DP("Missing symbol %s, continue execution anyway.\n",
959 DevEnvGlobal.getName().data());
960 consumeError(std::move(Err));
961 }
962 return Plugin::success();
963}
964
965Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
966 DeviceImageTy &Image,
967 uint64_t PoolSize) {
968 // Free the old pool, if any.
969 if (DeviceMemoryPool.Ptr) {
970 if (auto Err = dataDelete(DeviceMemoryPool.Ptr,
971 TargetAllocTy::TARGET_ALLOC_DEVICE))
972 return Err;
973 }
974
975 DeviceMemoryPool.Size = PoolSize;
976 auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr,
977 TargetAllocTy::TARGET_ALLOC_DEVICE);
978 if (AllocOrErr) {
979 DeviceMemoryPool.Ptr = *AllocOrErr;
980 } else {
981 auto Err = AllocOrErr.takeError();
982 REPORT("Failure to allocate device memory for global memory pool: %s\n",
983 toString(std::move(Err)).data());
984 DeviceMemoryPool.Ptr = nullptr;
985 DeviceMemoryPool.Size = 0;
986 }
987
988 // Create the metainfo of the device environment global.
989 GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
990 if (!GHandler.isSymbolInImage(*this, Image,
991 "__omp_rtl_device_memory_pool_tracker")) {
992 DP("Skip the memory pool as there is no tracker symbol in the image.");
993 return Error::success();
994 }
995
996 GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
997 sizeof(DeviceMemoryPoolTrackingTy),
998 &DeviceMemoryPoolTracking);
999 if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal))
1000 return Err;
1001
1002 // Create the metainfo of the device environment global.
1003 GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool",
1004 sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool);
1005
1006 // Write device environment values to the device.
1007 return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
1008}
1009
1010Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
1011 DeviceImageTy &Image) {
1012 // The plugin either does not need an RPC server or it is unavailible.
1013 if (!shouldSetupRPCServer())
1014 return Plugin::success();
1015
1016 // Check if this device needs to run an RPC server.
1017 RPCServerTy &Server = Plugin.getRPCServer();
1018 auto UsingOrErr =
1019 Server.isDeviceUsingRPC(*this, Plugin.getGlobalHandler(), Image);
1020 if (!UsingOrErr)
1021 return UsingOrErr.takeError();
1022
1023 if (!UsingOrErr.get())
1024 return Plugin::success();
1025
1026 if (auto Err = Server.initDevice(*this, Plugin.getGlobalHandler(), Image))
1027 return Err;
1028
1029 RPCServer = &Server;
1030 DP("Running an RPC server on device %d\n", getDeviceId());
1031 return Plugin::success();
1032}
1033
1034Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
1035 size_t Size, bool ExternallyLocked) {
1036 // Insert the new entry into the map.
1037 auto Res = Allocs.insert({HstPtr, DevAccessiblePtr, Size, ExternallyLocked});
1038 if (!Res.second)
1039 return Plugin::error("Cannot insert locked buffer entry");
1040
1041 // Check whether the next entry overlaps with the inserted entry.
1042 auto It = std::next(Res.first);
1043 if (It == Allocs.end())
1044 return Plugin::success();
1045
1046 const EntryTy *NextEntry = &(*It);
1047 if (intersects(NextEntry->HstPtr, NextEntry->Size, HstPtr, Size))
1048 return Plugin::error("Partial overlapping not allowed in locked buffers");
1049
1050 return Plugin::success();
1051}
1052
1053Error PinnedAllocationMapTy::eraseEntry(const EntryTy &Entry) {
1054 // Erase the existing entry. Notice this requires an additional map lookup,
1055 // but this should not be a performance issue. Using iterators would make
1056 // the code more difficult to read.
1057 size_t Erased = Allocs.erase({Entry.HstPtr});
1058 if (!Erased)
1059 return Plugin::error("Cannot erase locked buffer entry");
1060 return Plugin::success();
1061}
1062
1063Error PinnedAllocationMapTy::registerEntryUse(const EntryTy &Entry,
1064 void *HstPtr, size_t Size) {
1065 if (!contains(Entry.HstPtr, Entry.Size, HstPtr, Size))
1066 return Plugin::error("Partial overlapping not allowed in locked buffers");
1067
1068 ++Entry.References;
1069 return Plugin::success();
1070}
1071
1072Expected<bool> PinnedAllocationMapTy::unregisterEntryUse(const EntryTy &Entry) {
1073 if (Entry.References == 0)
1074 return Plugin::error("Invalid number of references");
1075
1076 // Return whether this was the last user.
1077 return (--Entry.References == 0);
1078}
1079
1080Error PinnedAllocationMapTy::registerHostBuffer(void *HstPtr,
1081 void *DevAccessiblePtr,
1082 size_t Size) {
1083 assert(HstPtr && "Invalid pointer");
1084 assert(DevAccessiblePtr && "Invalid pointer");
1085 assert(Size && "Invalid size");
1086
1087 std::lock_guard<std::shared_mutex> Lock(Mutex);
1088
1089 // No pinned allocation should intersect.
1090 const EntryTy *Entry = findIntersecting(HstPtr);
1091 if (Entry)
1092 return Plugin::error("Cannot insert entry due to an existing one");
1093
1094 // Now insert the new entry.
1095 return insertEntry(HstPtr, DevAccessiblePtr, Size);
1096}
1097
1098Error PinnedAllocationMapTy::unregisterHostBuffer(void *HstPtr) {
1099 assert(HstPtr && "Invalid pointer");
1100
1101 std::lock_guard<std::shared_mutex> Lock(Mutex);
1102
1103 const EntryTy *Entry = findIntersecting(HstPtr);
1104 if (!Entry)
1105 return Plugin::error("Cannot find locked buffer");
1106
1107 // The address in the entry should be the same we are unregistering.
1108 if (Entry->HstPtr != HstPtr)
1109 return Plugin::error("Unexpected host pointer in locked buffer entry");
1110
1111 // Unregister from the entry.
1112 auto LastUseOrErr = unregisterEntryUse(*Entry);
1113 if (!LastUseOrErr)
1114 return LastUseOrErr.takeError();
1115
1116 // There should be no other references to the pinned allocation.
1117 if (!(*LastUseOrErr))
1118 return Plugin::error("The locked buffer is still being used");
1119
1120 // Erase the entry from the map.
1121 return eraseEntry(*Entry);
1122}
1123
1124Expected<void *> PinnedAllocationMapTy::lockHostBuffer(void *HstPtr,
1125 size_t Size) {
1126 assert(HstPtr && "Invalid pointer");
1127 assert(Size && "Invalid size");
1128
1129 std::lock_guard<std::shared_mutex> Lock(Mutex);
1130
1131 const EntryTy *Entry = findIntersecting(HstPtr);
1132
1133 if (Entry) {
1134 // An already registered intersecting buffer was found. Register a new use.
1135 if (auto Err = registerEntryUse(*Entry, HstPtr, Size))
1136 return std::move(Err);
1137
1138 // Return the device accessible pointer with the correct offset.
1139 return advanceVoidPtr(Entry->DevAccessiblePtr,
1140 getPtrDiff(HstPtr, Entry->HstPtr));
1141 }
1142
1143 // No intersecting registered allocation found in the map. First, lock the
1144 // host buffer and retrieve the device accessible pointer.
1145 auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
1146 if (!DevAccessiblePtrOrErr)
1147 return DevAccessiblePtrOrErr.takeError();
1148
1149 // Now insert the new entry into the map.
1150 if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size))
1151 return std::move(Err);
1152
1153 // Return the device accessible pointer.
1154 return *DevAccessiblePtrOrErr;
1155}
1156
1157Error PinnedAllocationMapTy::unlockHostBuffer(void *HstPtr) {
1158 assert(HstPtr && "Invalid pointer");
1159
1160 std::lock_guard<std::shared_mutex> Lock(Mutex);
1161
1162 const EntryTy *Entry = findIntersecting(HstPtr);
1163 if (!Entry)
1164 return Plugin::error("Cannot find locked buffer");
1165
1166 // Unregister from the locked buffer. No need to do anything if there are
1167 // others using the allocation.
1168 auto LastUseOrErr = unregisterEntryUse(*Entry);
1169 if (!LastUseOrErr)
1170 return LastUseOrErr.takeError();
1171
1172 // No need to do anything if there are others using the allocation.
1173 if (!(*LastUseOrErr))
1174 return Plugin::success();
1175
1176 // This was the last user of the allocation. Unlock the original locked buffer
1177 // if it was locked by the plugin. Do not unlock it if it was locked by an
1178 // external entity. Unlock the buffer using the host pointer of the entry.
1179 if (!Entry->ExternallyLocked)
1180 if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
1181 return Err;
1182
1183 // Erase the entry from the map.
1184 return eraseEntry(*Entry);
1185}
1186
1187Error PinnedAllocationMapTy::lockMappedHostBuffer(void *HstPtr, size_t Size) {
1188 assert(HstPtr && "Invalid pointer");
1189 assert(Size && "Invalid size");
1190
1191 std::lock_guard<std::shared_mutex> Lock(Mutex);
1192
1193 // If previously registered, just register a new user on the entry.
1194 const EntryTy *Entry = findIntersecting(HstPtr);
1195 if (Entry)
1196 return registerEntryUse(*Entry, HstPtr, Size);
1197
1198 size_t BaseSize;
1199 void *BaseHstPtr, *BaseDevAccessiblePtr;
1200
1201 // Check if it was externally pinned by a vendor-specific API.
1202 auto IsPinnedOrErr = Device.isPinnedPtrImpl(HstPtr, BaseHstPtr,
1203 BaseDevAccessiblePtr, BaseSize);
1204 if (!IsPinnedOrErr)
1205 return IsPinnedOrErr.takeError();
1206
1207 // If pinned, just insert the entry representing the whole pinned buffer.
1208 if (*IsPinnedOrErr)
1209 return insertEntry(BaseHstPtr, BaseDevAccessiblePtr, BaseSize,
1210 /*Externallylocked=*/true);
1211
1212 // Not externally pinned. Do nothing if locking of mapped buffers is disabled.
1213 if (!LockMappedBuffers)
1214 return Plugin::success();
1215
1216 // Otherwise, lock the buffer and insert the new entry.
1217 auto DevAccessiblePtrOrErr = Device.dataLockImpl(HstPtr, Size);
1218 if (!DevAccessiblePtrOrErr) {
1219 // Errors may be tolerated.
1220 if (!IgnoreLockMappedFailures)
1221 return DevAccessiblePtrOrErr.takeError();
1222
1223 consumeError(DevAccessiblePtrOrErr.takeError());
1224 return Plugin::success();
1225 }
1226
1227 return insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size);
1228}
1229
1230Error PinnedAllocationMapTy::unlockUnmappedHostBuffer(void *HstPtr) {
1231 assert(HstPtr && "Invalid pointer");
1232
1233 std::lock_guard<std::shared_mutex> Lock(Mutex);
1234
1235 // Check whether there is any intersecting entry.
1236 const EntryTy *Entry = findIntersecting(HstPtr);
1237
1238 // No entry but automatic locking of mapped buffers is disabled, so
1239 // nothing to do.
1240 if (!Entry && !LockMappedBuffers)
1241 return Plugin::success();
1242
1243 // No entry, automatic locking is enabled, but the locking may have failed, so
1244 // do nothing.
1245 if (!Entry && IgnoreLockMappedFailures)
1246 return Plugin::success();
1247
1248 // No entry, but the automatic locking is enabled, so this is an error.
1249 if (!Entry)
1250 return Plugin::error("Locked buffer not found");
1251
1252 // There is entry, so unregister a user and check whether it was the last one.
1253 auto LastUseOrErr = unregisterEntryUse(*Entry);
1254 if (!LastUseOrErr)
1255 return LastUseOrErr.takeError();
1256
1257 // If it is not the last one, there is nothing to do.
1258 if (!(*LastUseOrErr))
1259 return Plugin::success();
1260
1261 // Otherwise, if it was the last and the buffer was locked by the plugin,
1262 // unlock it.
1263 if (!Entry->ExternallyLocked)
1264 if (auto Err = Device.dataUnlockImpl(Entry->HstPtr))
1265 return Err;
1266
1267 // Finally erase the entry from the map.
1268 return eraseEntry(*Entry);
1269}
1270
1271Error GenericDeviceTy::synchronize(__tgt_async_info *AsyncInfo) {
1272 if (!AsyncInfo || !AsyncInfo->Queue)
1273 return Plugin::error("Invalid async info queue");
1274
1275 if (auto Err = synchronizeImpl(*AsyncInfo))
1276 return Err;
1277
1278 for (auto *Ptr : AsyncInfo->AssociatedAllocations)
1279 if (auto Err = dataDelete(Ptr, TargetAllocTy::TARGET_ALLOC_DEVICE))
1280 return Err;
1281 AsyncInfo->AssociatedAllocations.clear();
1282
1283 return Plugin::success();
1284}
1285
1286Error GenericDeviceTy::queryAsync(__tgt_async_info *AsyncInfo) {
1287 if (!AsyncInfo || !AsyncInfo->Queue)
1288 return Plugin::error("Invalid async info queue");
1289
1290 return queryAsyncImpl(*AsyncInfo);
1291}
1292
1293Error GenericDeviceTy::memoryVAMap(void **Addr, void *VAddr, size_t *RSize) {
1294 return Plugin::error("Device does not suppport VA Management");
1295}
1296
1297Error GenericDeviceTy::memoryVAUnMap(void *VAddr, size_t Size) {
1298 return Plugin::error("Device does not suppport VA Management");
1299}
1300
1301Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) {
1302 return Plugin::error(
1303 "Mising getDeviceMemorySize impelmentation (required by RR-heuristic");
1304}
1305
1306Expected<void *> GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr,
1307 TargetAllocTy Kind) {
1308 void *Alloc = nullptr;
1309
1310 if (RecordReplay.isRecordingOrReplaying())
1311 return RecordReplay.alloc(Size);
1312
1313 switch (Kind) {
1314 case TARGET_ALLOC_DEFAULT:
1315 case TARGET_ALLOC_DEVICE_NON_BLOCKING:
1316 case TARGET_ALLOC_DEVICE:
1317 if (MemoryManager) {
1318 Alloc = MemoryManager->allocate(Size, HostPtr);
1319 if (!Alloc)
1320 return Plugin::error("Failed to allocate from memory manager");
1321 break;
1322 }
1323 [[fallthrough]];
1324 case TARGET_ALLOC_HOST:
1325 case TARGET_ALLOC_SHARED:
1326 Alloc = allocate(Size, HostPtr, Kind);
1327 if (!Alloc)
1328 return Plugin::error("Failed to allocate from device allocator");
1329 }
1330
1331 // Report error if the memory manager or the device allocator did not return
1332 // any memory buffer.
1333 if (!Alloc)
1334 return Plugin::error("Invalid target data allocation kind or requested "
1335 "allocator not implemented yet");
1336
1337 // Register allocated buffer as pinned memory if the type is host memory.
1338 if (Kind == TARGET_ALLOC_HOST)
1339 if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size))
1340 return std::move(Err);
1341
1342 return Alloc;
1343}
1344
1345Error GenericDeviceTy::dataDelete(void *TgtPtr, TargetAllocTy Kind) {
1346 // Free is a noop when recording or replaying.
1347 if (RecordReplay.isRecordingOrReplaying())
1348 return Plugin::success();
1349
1350 int Res;
1351 if (MemoryManager)
1352 Res = MemoryManager->free(TgtPtr);
1353 else
1354 Res = free(TgtPtr, Kind);
1355
1356 if (Res)
1357 return Plugin::error("Failure to deallocate device pointer %p", TgtPtr);
1358
1359 // Unregister deallocated pinned memory buffer if the type is host memory.
1360 if (Kind == TARGET_ALLOC_HOST)
1361 if (auto Err = PinnedAllocs.unregisterHostBuffer(TgtPtr))
1362 return Err;
1363
1364 return Plugin::success();
1365}
1366
1367Error GenericDeviceTy::dataSubmit(void *TgtPtr, const void *HstPtr,
1368 int64_t Size, __tgt_async_info *AsyncInfo) {
1369 AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1370
1371 auto Err = dataSubmitImpl(TgtPtr, HstPtr, Size, AsyncInfoWrapper);
1372 AsyncInfoWrapper.finalize(Err);
1373 return Err;
1374}
1375
1376Error GenericDeviceTy::dataRetrieve(void *HstPtr, const void *TgtPtr,
1377 int64_t Size, __tgt_async_info *AsyncInfo) {
1378 AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1379
1380 auto Err = dataRetrieveImpl(HstPtr, TgtPtr, Size, AsyncInfoWrapper);
1381 AsyncInfoWrapper.finalize(Err);
1382 return Err;
1383}
1384
1385Error GenericDeviceTy::dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev,
1386 void *DstPtr, int64_t Size,
1387 __tgt_async_info *AsyncInfo) {
1388 AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1389
1390 auto Err = dataExchangeImpl(SrcPtr, DstDev, DstPtr, Size, AsyncInfoWrapper);
1391 AsyncInfoWrapper.finalize(Err);
1392 return Err;
1393}
1394
1395Error GenericDeviceTy::launchKernel(void *EntryPtr, void **ArgPtrs,
1396 ptrdiff_t *ArgOffsets,
1397 KernelArgsTy &KernelArgs,
1398 __tgt_async_info *AsyncInfo) {
1399 AsyncInfoWrapperTy AsyncInfoWrapper(
1400 *this, RecordReplay.isRecordingOrReplaying() ? nullptr : AsyncInfo);
1401
1402 GenericKernelTy &GenericKernel =
1403 *reinterpret_cast<GenericKernelTy *>(EntryPtr);
1404
1405 auto Err = GenericKernel.launch(*this, ArgPtrs, ArgOffsets, KernelArgs,
1406 AsyncInfoWrapper);
1407
1408 // 'finalize' here to guarantee next record-replay actions are in-sync
1409 AsyncInfoWrapper.finalize(Err);
1410
1411 if (RecordReplay.isRecordingOrReplaying() &&
1412 RecordReplay.isSaveOutputEnabled())
1413 RecordReplay.saveKernelOutputInfo(GenericKernel.getName());
1414
1415 return Err;
1416}
1417
1418Error GenericDeviceTy::initAsyncInfo(__tgt_async_info **AsyncInfoPtr) {
1419 assert(AsyncInfoPtr && "Invalid async info");
1420
1421 *AsyncInfoPtr = new __tgt_async_info();
1422
1423 AsyncInfoWrapperTy AsyncInfoWrapper(*this, *AsyncInfoPtr);
1424
1425 auto Err = initAsyncInfoImpl(AsyncInfoWrapper);
1426 AsyncInfoWrapper.finalize(Err);
1427 return Err;
1428}
1429
1430Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
1431 assert(DeviceInfo && "Invalid device info");
1432
1433 return initDeviceInfoImpl(DeviceInfo);
1434}
1435
1436Error GenericDeviceTy::printInfo() {
1437 InfoQueueTy InfoQueue;
1438
1439 // Get the vendor-specific info entries describing the device properties.
1440 if (auto Err = obtainInfoImpl(InfoQueue))
1441 return Err;
1442
1443 // Print all info entries.
1444 InfoQueue.print();
1445
1446 return Plugin::success();
1447}
1448
1449Error GenericDeviceTy::createEvent(void **EventPtrStorage) {
1450 return createEventImpl(EventPtrStorage);
1451}
1452
1453Error GenericDeviceTy::destroyEvent(void *EventPtr) {
1454 return destroyEventImpl(EventPtr);
1455}
1456
1457Error GenericDeviceTy::recordEvent(void *EventPtr,
1458 __tgt_async_info *AsyncInfo) {
1459 AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1460
1461 auto Err = recordEventImpl(EventPtr, AsyncInfoWrapper);
1462 AsyncInfoWrapper.finalize(Err);
1463 return Err;
1464}
1465
1466Error GenericDeviceTy::waitEvent(void *EventPtr, __tgt_async_info *AsyncInfo) {
1467 AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo);
1468
1469 auto Err = waitEventImpl(EventPtr, AsyncInfoWrapper);
1470 AsyncInfoWrapper.finalize(Err);
1471 return Err;
1472}
1473
1474Error GenericDeviceTy::syncEvent(void *EventPtr) {
1475 return syncEventImpl(EventPtr);
1476}
1477
1478bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
1479
1480Error GenericPluginTy::init() {
1481 auto NumDevicesOrErr = initImpl();
1482 if (!NumDevicesOrErr)
1483 return NumDevicesOrErr.takeError();
1484
1485 NumDevices = *NumDevicesOrErr;
1486 if (NumDevices == 0)
1487 return Plugin::success();
1488
1489 assert(Devices.size() == 0 && "Plugin already initialized");
1490 Devices.resize(NumDevices, nullptr);
1491
1492 GlobalHandler = createGlobalHandler();
1493 assert(GlobalHandler && "Invalid global handler");
1494
1495 RPCServer = new RPCServerTy(*this);
1496 assert(RPCServer && "Invalid RPC server");
1497
1498 return Plugin::success();
1499}
1500
1501Error GenericPluginTy::deinit() {
1502 // Deinitialize all active devices.
1503 for (int32_t DeviceId = 0; DeviceId < NumDevices; ++DeviceId) {
1504 if (Devices[DeviceId]) {
1505 if (auto Err = deinitDevice(DeviceId))
1506 return Err;
1507 }
1508 assert(!Devices[DeviceId] && "Device was not deinitialized");
1509 }
1510
1511 // There is no global handler if no device is available.
1512 if (GlobalHandler)
1513 delete GlobalHandler;
1514
1515 if (RPCServer)
1516 delete RPCServer;
1517
1518 // Perform last deinitializations on the plugin.
1519 return deinitImpl();
1520}
1521
1522Error GenericPluginTy::initDevice(int32_t DeviceId) {
1523 assert(!Devices[DeviceId] && "Device already initialized");
1524
1525 // Create the device and save the reference.
1526 GenericDeviceTy *Device = createDevice(*this, DeviceId, NumDevices);
1527 assert(Device && "Invalid device");
1528
1529 // Save the device reference into the list.
1530 Devices[DeviceId] = Device;
1531
1532 // Initialize the device and its resources.
1533 return Device->init(*this);
1534}
1535
1536Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
1537 // The device may be already deinitialized.
1538 if (Devices[DeviceId] == nullptr)
1539 return Plugin::success();
1540
1541 // Deinitialize the device and release its resources.
1542 if (auto Err = Devices[DeviceId]->deinit(*this))
1543 return Err;
1544
1545 // Delete the device and invalidate its reference.
1546 delete Devices[DeviceId];
1547 Devices[DeviceId] = nullptr;
1548
1549 return Plugin::success();
1550}
1551
1552Expected<bool> GenericPluginTy::checkELFImage(StringRef Image) const {
1553 // First check if this image is a regular ELF file.
1554 if (!utils::elf::isELF(Image))
1555 return false;
1556
1557 // Check if this image is an ELF with a matching machine value.
1558 auto MachineOrErr = utils::elf::checkMachine(Image, getMagicElfBits());
1559 if (!MachineOrErr)
1560 return MachineOrErr.takeError();
1561
1562 if (!*MachineOrErr)
1563 return false;
1564
1565 // Perform plugin-dependent checks for the specific architecture if needed.
1566 return isELFCompatible(Image);
1567}
1568
1569int32_t GenericPluginTy::is_valid_binary(__tgt_device_image *Image) {
1570 StringRef Buffer(reinterpret_cast<const char *>(Image->ImageStart),
1571 target::getPtrDiff(Image->ImageEnd, Image->ImageStart));
1572
1573 auto HandleError = [&](Error Err) -> bool {
1574 [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
1575 DP("Failure to check validity of image %p: %s", Image, ErrStr.c_str());
1576 return false;
1577 };
1578 switch (identify_magic(Buffer)) {
1579 case file_magic::elf:
1580 case file_magic::elf_relocatable:
1581 case file_magic::elf_executable:
1582 case file_magic::elf_shared_object:
1583 case file_magic::elf_core: {
1584 auto MatchOrErr = checkELFImage(Buffer);
1585 if (Error Err = MatchOrErr.takeError())
1586 return HandleError(std::move(Err));
1587 return *MatchOrErr;
1588 }
1589 case file_magic::bitcode: {
1590 auto MatchOrErr = getJIT().checkBitcodeImage(Buffer);
1591 if (Error Err = MatchOrErr.takeError())
1592 return HandleError(std::move(Err));
1593 return *MatchOrErr;
1594 }
1595 default:
1596 return false;
1597 }
1598}
1599
1600int32_t GenericPluginTy::init_device(int32_t DeviceId) {
1601 auto Err = initDevice(DeviceId);
1602 if (Err) {
1603 REPORT("Failure to initialize device %d: %s\n", DeviceId,
1604 toString(std::move(Err)).data());
1605 return OFFLOAD_FAIL;
1606 }
1607
1608 return OFFLOAD_SUCCESS;
1609}
1610
1611int32_t GenericPluginTy::number_of_devices() { return getNumDevices(); }
1612
1613int64_t GenericPluginTy::init_requires(int64_t RequiresFlags) {
1614 setRequiresFlag(RequiresFlags);
1615 return OFFLOAD_SUCCESS;
1616}
1617
1618int32_t GenericPluginTy::is_data_exchangable(int32_t SrcDeviceId,
1619 int32_t DstDeviceId) {
1620 return isDataExchangable(SrcDeviceId, DstDeviceId);
1621}
1622
1623int32_t GenericPluginTy::initialize_record_replay(int32_t DeviceId,
1624 int64_t MemorySize,
1625 void *VAddr, bool isRecord,
1626 bool SaveOutput,
1627 uint64_t &ReqPtrArgOffset) {
1628 GenericDeviceTy &Device = getDevice(DeviceId);
1629 RecordReplayTy::RRStatusTy Status =
1630 isRecord ? RecordReplayTy::RRStatusTy::RRRecording
1631 : RecordReplayTy::RRStatusTy::RRReplaying;
1632
1633 if (auto Err = RecordReplay.init(&Device, MemorySize, VAddr, Status,
1634 SaveOutput, ReqPtrArgOffset)) {
1635 REPORT("WARNING RR did not intialize RR-properly with %lu bytes"
1636 "(Error: %s)\n",
1637 MemorySize, toString(std::move(Err)).data());
1638 RecordReplay.setStatus(RecordReplayTy::RRStatusTy::RRDeactivated);
1639
1640 if (!isRecord) {
1641 return OFFLOAD_FAIL;
1642 }
1643 }
1644 return OFFLOAD_SUCCESS;
1645}
1646
1647int32_t GenericPluginTy::load_binary(int32_t DeviceId,
1648 __tgt_device_image *TgtImage,
1649 __tgt_device_binary *Binary) {
1650 GenericDeviceTy &Device = getDevice(DeviceId);
1651
1652 auto ImageOrErr = Device.loadBinary(*this, TgtImage);
1653 if (!ImageOrErr) {
1654 auto Err = ImageOrErr.takeError();
1655 REPORT("Failure to load binary image %p on device %d: %s\n", TgtImage,
1656 DeviceId, toString(std::move(Err)).data());
1657 return OFFLOAD_FAIL;
1658 }
1659
1660 DeviceImageTy *Image = *ImageOrErr;
1661 assert(Image != nullptr && "Invalid Image");
1662
1663 *Binary = __tgt_device_binary{reinterpret_cast<uint64_t>(Image)};
1664
1665 return OFFLOAD_SUCCESS;
1666}
1667
1668void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
1669 int32_t Kind) {
1670 auto AllocOrErr =
1671 getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind);
1672 if (!AllocOrErr) {
1673 auto Err = AllocOrErr.takeError();
1674 REPORT("Failure to allocate device memory: %s\n",
1675 toString(std::move(Err)).data());
1676 return nullptr;
1677 }
1678 assert(*AllocOrErr && "Null pointer upon successful allocation");
1679
1680 return *AllocOrErr;
1681}
1682
1683int32_t GenericPluginTy::data_delete(int32_t DeviceId, void *TgtPtr,
1684 int32_t Kind) {
1685 auto Err =
1686 getDevice(DeviceId).dataDelete(TgtPtr, static_cast<TargetAllocTy>(Kind));
1687 if (Err) {
1688 REPORT("Failure to deallocate device pointer %p: %s\n", TgtPtr,
1689 toString(std::move(Err)).data());
1690 return OFFLOAD_FAIL;
1691 }
1692
1693 return OFFLOAD_SUCCESS;
1694}
1695
1696int32_t GenericPluginTy::data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
1697 void **LockedPtr) {
1698 auto LockedPtrOrErr = getDevice(DeviceId).dataLock(Ptr, Size);
1699 if (!LockedPtrOrErr) {
1700 auto Err = LockedPtrOrErr.takeError();
1701 REPORT("Failure to lock memory %p: %s\n", Ptr,
1702 toString(std::move(Err)).data());
1703 return OFFLOAD_FAIL;
1704 }
1705
1706 if (!(*LockedPtrOrErr)) {
1707 REPORT("Failure to lock memory %p: obtained a null locked pointer\n", Ptr);
1708 return OFFLOAD_FAIL;
1709 }
1710 *LockedPtr = *LockedPtrOrErr;
1711
1712 return OFFLOAD_SUCCESS;
1713}
1714
1715int32_t GenericPluginTy::data_unlock(int32_t DeviceId, void *Ptr) {
1716 auto Err = getDevice(DeviceId).dataUnlock(Ptr);
1717 if (Err) {
1718 REPORT("Failure to unlock memory %p: %s\n", Ptr,
1719 toString(std::move(Err)).data());
1720 return OFFLOAD_FAIL;
1721 }
1722
1723 return OFFLOAD_SUCCESS;
1724}
1725
1726int32_t GenericPluginTy::data_notify_mapped(int32_t DeviceId, void *HstPtr,
1727 int64_t Size) {
1728 auto Err = getDevice(DeviceId).notifyDataMapped(HstPtr, Size);
1729 if (Err) {
1730 REPORT("Failure to notify data mapped %p: %s\n", HstPtr,
1731 toString(std::move(Err)).data());
1732 return OFFLOAD_FAIL;
1733 }
1734
1735 return OFFLOAD_SUCCESS;
1736}
1737
1738int32_t GenericPluginTy::data_notify_unmapped(int32_t DeviceId, void *HstPtr) {
1739 auto Err = getDevice(DeviceId).notifyDataUnmapped(HstPtr);
1740 if (Err) {
1741 REPORT("Failure to notify data unmapped %p: %s\n", HstPtr,
1742 toString(std::move(Err)).data());
1743 return OFFLOAD_FAIL;
1744 }
1745
1746 return OFFLOAD_SUCCESS;
1747}
1748
1749int32_t GenericPluginTy::data_submit(int32_t DeviceId, void *TgtPtr,
1750 void *HstPtr, int64_t Size) {
1751 return data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
1752 /*AsyncInfoPtr=*/nullptr);
1753}
1754
1755int32_t GenericPluginTy::data_submit_async(int32_t DeviceId, void *TgtPtr,
1756 void *HstPtr, int64_t Size,
1757 __tgt_async_info *AsyncInfoPtr) {
1758 auto Err = getDevice(DeviceId).dataSubmit(TgtPtr, HstPtr, Size, AsyncInfoPtr);
1759 if (Err) {
1760 REPORT("Failure to copy data from host to device. Pointers: host "
1761 "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1762 DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
1763 toString(std::move(Err)).data());
1764 return OFFLOAD_FAIL;
1765 }
1766
1767 return OFFLOAD_SUCCESS;
1768}
1769
1770int32_t GenericPluginTy::data_retrieve(int32_t DeviceId, void *HstPtr,
1771 void *TgtPtr, int64_t Size) {
1772 return data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
1773 /*AsyncInfoPtr=*/nullptr);
1774}
1775
1776int32_t GenericPluginTy::data_retrieve_async(int32_t DeviceId, void *HstPtr,
1777 void *TgtPtr, int64_t Size,
1778 __tgt_async_info *AsyncInfoPtr) {
1779 auto Err =
1780 getDevice(DeviceId).dataRetrieve(HstPtr, TgtPtr, Size, AsyncInfoPtr);
1781 if (Err) {
1782 REPORT("Faliure to copy data from device to host. Pointers: host "
1783 "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1784 DPxPTR(HstPtr), DPxPTR(TgtPtr), Size,
1785 toString(std::move(Err)).data());
1786 return OFFLOAD_FAIL;
1787 }
1788
1789 return OFFLOAD_SUCCESS;
1790}
1791
1792int32_t GenericPluginTy::data_exchange(int32_t SrcDeviceId, void *SrcPtr,
1793 int32_t DstDeviceId, void *DstPtr,
1794 int64_t Size) {
1795 return data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr, Size,
1796 /*AsyncInfoPtr=*/nullptr);
1797}
1798
1799int32_t GenericPluginTy::data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
1800 int DstDeviceId, void *DstPtr,
1801 int64_t Size,
1802 __tgt_async_info *AsyncInfo) {
1803 GenericDeviceTy &SrcDevice = getDevice(SrcDeviceId);
1804 GenericDeviceTy &DstDevice = getDevice(DstDeviceId);
1805 auto Err = SrcDevice.dataExchange(SrcPtr, DstDevice, DstPtr, Size, AsyncInfo);
1806 if (Err) {
1807 REPORT("Failure to copy data from device (%d) to device (%d). Pointers: "
1808 "host = " DPxMOD ", device = " DPxMOD ", size = %" PRId64 ": %s\n",
1809 SrcDeviceId, DstDeviceId, DPxPTR(SrcPtr), DPxPTR(DstPtr), Size,
1810 toString(std::move(Err)).data());
1811 return OFFLOAD_FAIL;
1812 }
1813
1814 return OFFLOAD_SUCCESS;
1815}
1816
1817int32_t GenericPluginTy::launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
1818 void **TgtArgs, ptrdiff_t *TgtOffsets,
1819 KernelArgsTy *KernelArgs,
1820 __tgt_async_info *AsyncInfoPtr) {
1821 auto Err = getDevice(DeviceId).launchKernel(TgtEntryPtr, TgtArgs, TgtOffsets,
1822 *KernelArgs, AsyncInfoPtr);
1823 if (Err) {
1824 REPORT("Failure to run target region " DPxMOD " in device %d: %s\n",
1825 DPxPTR(TgtEntryPtr), DeviceId, toString(std::move(Err)).data());
1826 return OFFLOAD_FAIL;
1827 }
1828
1829 return OFFLOAD_SUCCESS;
1830}
1831
1832int32_t GenericPluginTy::synchronize(int32_t DeviceId,
1833 __tgt_async_info *AsyncInfoPtr) {
1834 auto Err = getDevice(DeviceId).synchronize(AsyncInfoPtr);
1835 if (Err) {
1836 REPORT("Failure to synchronize stream %p: %s\n", AsyncInfoPtr->Queue,
1837 toString(std::move(Err)).data());
1838 return OFFLOAD_FAIL;
1839 }
1840
1841 return OFFLOAD_SUCCESS;
1842}
1843
1844int32_t GenericPluginTy::query_async(int32_t DeviceId,
1845 __tgt_async_info *AsyncInfoPtr) {
1846 auto Err = getDevice(DeviceId).queryAsync(AsyncInfoPtr);
1847 if (Err) {
1848 REPORT("Failure to query stream %p: %s\n", AsyncInfoPtr->Queue,
1849 toString(std::move(Err)).data());
1850 return OFFLOAD_FAIL;
1851 }
1852
1853 return OFFLOAD_SUCCESS;
1854}
1855
1856void GenericPluginTy::print_device_info(int32_t DeviceId) {
1857 if (auto Err = getDevice(DeviceId).printInfo())
1858 REPORT("Failure to print device %d info: %s\n", DeviceId,
1859 toString(std::move(Err)).data());
1860}
1861
1862int32_t GenericPluginTy::create_event(int32_t DeviceId, void **EventPtr) {
1863 auto Err = getDevice(DeviceId).createEvent(EventPtr);
1864 if (Err) {
1865 REPORT("Failure to create event: %s\n", toString(std::move(Err)).data());
1866 return OFFLOAD_FAIL;
1867 }
1868
1869 return OFFLOAD_SUCCESS;
1870}
1871
1872int32_t GenericPluginTy::record_event(int32_t DeviceId, void *EventPtr,
1873 __tgt_async_info *AsyncInfoPtr) {
1874 auto Err = getDevice(DeviceId).recordEvent(EventPtr, AsyncInfoPtr);
1875 if (Err) {
1876 REPORT("Failure to record event %p: %s\n", EventPtr,
1877 toString(std::move(Err)).data());
1878 return OFFLOAD_FAIL;
1879 }
1880
1881 return OFFLOAD_SUCCESS;
1882}
1883
1884int32_t GenericPluginTy::wait_event(int32_t DeviceId, void *EventPtr,
1885 __tgt_async_info *AsyncInfoPtr) {
1886 auto Err = getDevice(DeviceId).waitEvent(EventPtr, AsyncInfoPtr);
1887 if (Err) {
1888 REPORT("Failure to wait event %p: %s\n", EventPtr,
1889 toString(std::move(Err)).data());
1890 return OFFLOAD_FAIL;
1891 }
1892
1893 return OFFLOAD_SUCCESS;
1894}
1895
1896int32_t GenericPluginTy::sync_event(int32_t DeviceId, void *EventPtr) {
1897 auto Err = getDevice(DeviceId).syncEvent(EventPtr);
1898 if (Err) {
1899 REPORT("Failure to synchronize event %p: %s\n", EventPtr,
1900 toString(std::move(Err)).data());
1901 return OFFLOAD_FAIL;
1902 }
1903
1904 return OFFLOAD_SUCCESS;
1905}
1906
1907int32_t GenericPluginTy::destroy_event(int32_t DeviceId, void *EventPtr) {
1908 auto Err = getDevice(DeviceId).destroyEvent(EventPtr);
1909 if (Err) {
1910 REPORT("Failure to destroy event %p: %s\n", EventPtr,
1911 toString(std::move(Err)).data());
1912 return OFFLOAD_FAIL;
1913 }
1914
1915 return OFFLOAD_SUCCESS;
1916}
1917
1918void GenericPluginTy::set_info_flag(uint32_t NewInfoLevel) {
1919 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
1920 InfoLevel.store(NewInfoLevel);
1921}
1922
1923int32_t GenericPluginTy::init_async_info(int32_t DeviceId,
1924 __tgt_async_info **AsyncInfoPtr) {
1925 assert(AsyncInfoPtr && "Invalid async info");
1926
1927 auto Err = getDevice(DeviceId).initAsyncInfo(AsyncInfoPtr);
1928 if (Err) {
1929 REPORT("Failure to initialize async info at " DPxMOD " on device %d: %s\n",
1930 DPxPTR(*AsyncInfoPtr), DeviceId, toString(std::move(Err)).data());
1931 return OFFLOAD_FAIL;
1932 }
1933
1934 return OFFLOAD_SUCCESS;
1935}
1936
1937int32_t GenericPluginTy::init_device_info(int32_t DeviceId,
1938 __tgt_device_info *DeviceInfo,
1939 const char **ErrStr) {
1940 *ErrStr = "";
1941
1942 auto Err = getDevice(DeviceId).initDeviceInfo(DeviceInfo);
1943 if (Err) {
1944 REPORT("Failure to initialize device info at " DPxMOD " on device %d: %s\n",
1945 DPxPTR(DeviceInfo), DeviceId, toString(std::move(Err)).data());
1946 return OFFLOAD_FAIL;
1947 }
1948
1949 return OFFLOAD_SUCCESS;
1950}
1951
1952int32_t GenericPluginTy::set_device_offset(int32_t DeviceIdOffset) {
1953 setDeviceIdStartIndex(DeviceIdOffset);
1954
1955 return OFFLOAD_SUCCESS;
1956}
1957
1958int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) {
1959 // Automatic zero-copy only applies to programs that did
1960 // not request unified_shared_memory and are deployed on an
1961 // APU with XNACK enabled.
1962 if (getRequiresFlags() & OMP_REQ_UNIFIED_SHARED_MEMORY)
1963 return false;
1964 return getDevice(DeviceId).useAutoZeroCopy();
1965}
1966
1967int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size,
1968 const char *Name, void **DevicePtr) {
1969 assert(Binary.handle && "Invalid device binary handle");
1970 DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle);
1971
1972 GenericDeviceTy &Device = Image.getDevice();
1973
1974 GlobalTy DeviceGlobal(Name, Size);
1975 GenericGlobalHandlerTy &GHandler = getGlobalHandler();
1976 if (auto Err =
1977 GHandler.getGlobalMetadataFromDevice(Device, Image, DeviceGlobal)) {
1978 REPORT("Failure to look up global address: %s\n",
1979 toString(std::move(Err)).data());
1980 return OFFLOAD_FAIL;
1981 }
1982
1983 *DevicePtr = DeviceGlobal.getPtr();
1984 assert(DevicePtr && "Invalid device global's address");
1985
1986 // Save the loaded globals if we are recording.
1987 if (RecordReplay.isRecording())
1988 RecordReplay.addEntry(Name, Size, *DevicePtr);
1989
1990 return OFFLOAD_SUCCESS;
1991}
1992
1993int32_t GenericPluginTy::get_function(__tgt_device_binary Binary,
1994 const char *Name, void **KernelPtr) {
1995 assert(Binary.handle && "Invalid device binary handle");
1996 DeviceImageTy &Image = *reinterpret_cast<DeviceImageTy *>(Binary.handle);
1997
1998 GenericDeviceTy &Device = Image.getDevice();
1999
2000 auto KernelOrErr = Device.constructKernel(Name);
2001 if (Error Err = KernelOrErr.takeError()) {
2002 REPORT("Failure to look up kernel: %s\n", toString(std::move(Err)).data());
2003 return OFFLOAD_FAIL;
2004 }
2005
2006 GenericKernelTy &Kernel = *KernelOrErr;
2007 if (auto Err = Kernel.init(Device, Image)) {
2008 REPORT("Failure to init kernel: %s\n", toString(std::move(Err)).data());
2009 return OFFLOAD_FAIL;
2010 }
2011
2012 // Note that this is not the kernel's device address.
2013 *KernelPtr = &Kernel;
2014 return OFFLOAD_SUCCESS;
2015}
2016
2017bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
2018#ifdef LIBOMPTARGET_RPC_SUPPORT
2019 return true;
2020#else
2021 return false;
2022#endif
2023}
2024
2025/// Exposed library API function, basically wrappers around the GenericDeviceTy
2026/// functionality with the same name. All non-async functions are redirected
2027/// to the async versions right away with a NULL AsyncInfoPtr.
2028#ifdef __cplusplus
2029extern "C" {
2030#endif
2031
2032int32_t __tgt_rtl_init_plugin() {
2033 auto Err = PluginTy::initIfNeeded();
2034 if (Err) {
2035 [[maybe_unused]] std::string ErrStr = toString(std::move(Err));
2036 DP("Failed to init plugin: %s", ErrStr.c_str());
2037 return OFFLOAD_FAIL;
2038 }
2039
2040 return OFFLOAD_SUCCESS;
2041}
2042
2043int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
2044 if (!PluginTy::isActive())
2045 return false;
2046
2047 return PluginTy::get().is_valid_binary(Image);
2048}
2049
2050int32_t __tgt_rtl_init_device(int32_t DeviceId) {
2051 return PluginTy::get().init_device(DeviceId);
2052}
2053
2054int32_t __tgt_rtl_number_of_devices() {
2055 return PluginTy::get().number_of_devices();
2056}
2057
2058int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) {
2059 return PluginTy::get().init_requires(RequiresFlags);
2060}
2061
2062int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDeviceId,
2063 int32_t DstDeviceId) {
2064 return PluginTy::get().is_data_exchangable(SrcDeviceId, DstDeviceId);
2065}
2066
2067int32_t __tgt_rtl_initialize_record_replay(int32_t DeviceId, int64_t MemorySize,
2068 void *VAddr, bool isRecord,
2069 bool SaveOutput,
2070 uint64_t &ReqPtrArgOffset) {
2071 return PluginTy::get().initialize_record_replay(
2072 DeviceId, MemorySize, VAddr, isRecord, SaveOutput, ReqPtrArgOffset);
2073}
2074
2075int32_t __tgt_rtl_load_binary(int32_t DeviceId, __tgt_device_image *TgtImage,
2076 __tgt_device_binary *Binary) {
2077 return PluginTy::get().load_binary(DeviceId, TgtImage, Binary);
2078}
2079
2080void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr,
2081 int32_t Kind) {
2082 return PluginTy::get().data_alloc(DeviceId, Size, HostPtr, Kind);
2083}
2084
2085int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
2086 return PluginTy::get().data_delete(DeviceId, TgtPtr, Kind);
2087}
2088
2089int32_t __tgt_rtl_data_lock(int32_t DeviceId, void *Ptr, int64_t Size,
2090 void **LockedPtr) {
2091 return PluginTy::get().data_lock(DeviceId, Ptr, Size, LockedPtr);
2092}
2093
2094int32_t __tgt_rtl_data_unlock(int32_t DeviceId, void *Ptr) {
2095 return PluginTy::get().data_unlock(DeviceId, Ptr);
2096}
2097
2098int32_t __tgt_rtl_data_notify_mapped(int32_t DeviceId, void *HstPtr,
2099 int64_t Size) {
2100 return PluginTy::get().data_notify_mapped(DeviceId, HstPtr, Size);
2101}
2102
2103int32_t __tgt_rtl_data_notify_unmapped(int32_t DeviceId, void *HstPtr) {
2104 return PluginTy::get().data_notify_unmapped(DeviceId, HstPtr);
2105}
2106
2107int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr,
2108 int64_t Size) {
2109 return PluginTy::get().data_submit(DeviceId, TgtPtr, HstPtr, Size);
2110}
2111
2112int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr,
2113 void *HstPtr, int64_t Size,
2114 __tgt_async_info *AsyncInfoPtr) {
2115 return PluginTy::get().data_submit_async(DeviceId, TgtPtr, HstPtr, Size,
2116 AsyncInfoPtr);
2117}
2118
2119int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
2120 int64_t Size) {
2121 return PluginTy::get().data_retrieve(DeviceId, HstPtr, TgtPtr, Size);
2122}
2123
2124int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr,
2125 void *TgtPtr, int64_t Size,
2126 __tgt_async_info *AsyncInfoPtr) {
2127 return PluginTy::get().data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size,
2128 AsyncInfoPtr);
2129}
2130
2131int32_t __tgt_rtl_data_exchange(int32_t SrcDeviceId, void *SrcPtr,
2132 int32_t DstDeviceId, void *DstPtr,
2133 int64_t Size) {
2134 return PluginTy::get().data_exchange(SrcDeviceId, SrcPtr, DstDeviceId, DstPtr,
2135 Size);
2136}
2137
2138int32_t __tgt_rtl_data_exchange_async(int32_t SrcDeviceId, void *SrcPtr,
2139 int DstDeviceId, void *DstPtr,
2140 int64_t Size,
2141 __tgt_async_info *AsyncInfo) {
2142 return PluginTy::get().data_exchange_async(SrcDeviceId, SrcPtr, DstDeviceId,
2143 DstPtr, Size, AsyncInfo);
2144}
2145
2146int32_t __tgt_rtl_launch_kernel(int32_t DeviceId, void *TgtEntryPtr,
2147 void **TgtArgs, ptrdiff_t *TgtOffsets,
2148 KernelArgsTy *KernelArgs,
2149 __tgt_async_info *AsyncInfoPtr) {
2150 return PluginTy::get().launch_kernel(DeviceId, TgtEntryPtr, TgtArgs,
2151 TgtOffsets, KernelArgs, AsyncInfoPtr);
2152}
2153
2154int32_t __tgt_rtl_synchronize(int32_t DeviceId,
2155 __tgt_async_info *AsyncInfoPtr) {
2156 return PluginTy::get().synchronize(DeviceId, AsyncInfoPtr);
2157}
2158
2159int32_t __tgt_rtl_query_async(int32_t DeviceId,
2160 __tgt_async_info *AsyncInfoPtr) {
2161 return PluginTy::get().query_async(DeviceId, AsyncInfoPtr);
2162}
2163
2164void __tgt_rtl_print_device_info(int32_t DeviceId) {
2165 PluginTy::get().print_device_info(DeviceId);
2166}
2167
2168int32_t __tgt_rtl_create_event(int32_t DeviceId, void **EventPtr) {
2169 return PluginTy::get().create_event(DeviceId, EventPtr);
2170}
2171
2172int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr,
2173 __tgt_async_info *AsyncInfoPtr) {
2174 return PluginTy::get().record_event(DeviceId, EventPtr, AsyncInfoPtr);
2175}
2176
2177int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr,
2178 __tgt_async_info *AsyncInfoPtr) {
2179 return PluginTy::get().wait_event(DeviceId, EventPtr, AsyncInfoPtr);
2180}
2181
2182int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) {
2183 return PluginTy::get().sync_event(DeviceId, EventPtr);
2184}
2185
2186int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) {
2187 return PluginTy::get().destroy_event(DeviceId, EventPtr);
2188}
2189
2190void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
2191 return PluginTy::get().set_info_flag(NewInfoLevel);
2192}
2193
2194int32_t __tgt_rtl_init_async_info(int32_t DeviceId,
2195 __tgt_async_info **AsyncInfoPtr) {
2196 return PluginTy::get().init_async_info(DeviceId, AsyncInfoPtr);
2197}
2198
2199int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
2200 __tgt_device_info *DeviceInfo,
2201 const char **ErrStr) {
2202 return PluginTy::get().init_device_info(DeviceId, DeviceInfo, ErrStr);
2203}
2204
2205int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) {
2206 return PluginTy::get().set_device_offset(DeviceIdOffset);
2207}
2208
2209int32_t __tgt_rtl_use_auto_zero_copy(int32_t DeviceId) {
2210 return PluginTy::get().use_auto_zero_copy(DeviceId);
2211}
2212
2213int32_t __tgt_rtl_get_global(__tgt_device_binary Binary, uint64_t Size,
2214 const char *Name, void **DevicePtr) {
2215 return PluginTy::get().get_global(Binary, Size, Name, DevicePtr);
2216}
2217
2218int32_t __tgt_rtl_get_function(__tgt_device_binary Binary, const char *Name,
2219 void **KernelPtr) {
2220 return PluginTy::get().get_function(Binary, Name, KernelPtr);
2221}
2222
2223#ifdef __cplusplus
2224}
2225#endif
2226

source code of offload/plugins-nextgen/common/src/PluginInterface.cpp