1//===-- PluginManager.cpp - Plugin loading and communication API ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Functionality for handling plugins.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PluginManager.h"
14#include "OffloadPolicy.h"
15#include "Shared/Debug.h"
16#include "Shared/Profile.h"
17#include "device.h"
18
19#include "llvm/Support/Error.h"
20#include "llvm/Support/ErrorHandling.h"
21#include <memory>
22
23using namespace llvm;
24using namespace llvm::sys;
25
26PluginManager *PM = nullptr;
27
28// Every plugin exports this method to create an instance of the plugin type.
29#define PLUGIN_TARGET(Name) extern "C" GenericPluginTy *createPlugin_##Name();
30#include "Shared/Targets.def"
31
32void PluginManager::init() {
33 TIMESCOPE();
34 if (OffloadPolicy::isOffloadDisabled()) {
35 DP("Offload is disabled. Skipping plugin initialization\n");
36 return;
37 }
38
39 DP("Loading RTLs...\n");
40
41 // Attempt to create an instance of each supported plugin.
42#define PLUGIN_TARGET(Name) \
43 do { \
44 Plugins.emplace_back( \
45 std::unique_ptr<GenericPluginTy>(createPlugin_##Name())); \
46 } while (false);
47#include "Shared/Targets.def"
48
49 DP("RTLs loaded!\n");
50}
51
52void PluginManager::deinit() {
53 TIMESCOPE();
54 DP("Unloading RTLs...\n");
55
56 for (auto &Plugin : Plugins) {
57 if (!Plugin->is_initialized())
58 continue;
59
60 if (auto Err = Plugin->deinit()) {
61 [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
62 DP("Failed to deinit plugin: %s\n", InfoMsg.c_str());
63 }
64 Plugin.release();
65 }
66
67 DP("RTLs unloaded!\n");
68}
69
70bool PluginManager::initializePlugin(GenericPluginTy &Plugin) {
71 if (Plugin.is_initialized())
72 return true;
73
74 if (auto Err = Plugin.init()) {
75 [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
76 DP("Failed to init plugin: %s\n", InfoMsg.c_str());
77 return false;
78 }
79
80 DP("Registered plugin %s with %d visible device(s)\n", Plugin.getName(),
81 Plugin.number_of_devices());
82 return true;
83}
84
85bool PluginManager::initializeDevice(GenericPluginTy &Plugin,
86 int32_t DeviceId) {
87 if (Plugin.is_device_initialized(DeviceId)) {
88 auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
89 (*ExclusiveDevicesAccessor)[PM->DeviceIds[std::make_pair(&Plugin,
90 DeviceId)]]
91 ->setHasPendingImages(true);
92 return true;
93 }
94
95 // Initialize the device information for the RTL we are about to use.
96 auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
97
98 int32_t UserId = ExclusiveDevicesAccessor->size();
99
100 // Set the device identifier offset in the plugin.
101#ifdef OMPT_SUPPORT
102 Plugin.set_device_identifier(UserId, DeviceId);
103#endif
104
105 auto Device = std::make_unique<DeviceTy>(&Plugin, UserId, DeviceId);
106 if (auto Err = Device->init()) {
107 [[maybe_unused]] std::string InfoMsg = toString(std::move(Err));
108 DP("Failed to init device %d: %s\n", DeviceId, InfoMsg.c_str());
109 return false;
110 }
111
112 ExclusiveDevicesAccessor->push_back(std::move(Device));
113
114 // We need to map between the plugin's device identifier and the one
115 // that OpenMP will use.
116 PM->DeviceIds[std::make_pair(&Plugin, DeviceId)] = UserId;
117
118 return true;
119}
120
121void PluginManager::initializeAllDevices() {
122 for (auto &Plugin : plugins()) {
123 if (!initializePlugin(Plugin))
124 continue;
125
126 for (int32_t DeviceId = 0; DeviceId < Plugin.number_of_devices();
127 ++DeviceId) {
128 initializeDevice(Plugin, DeviceId);
129 }
130 }
131}
132
133// Returns a pointer to the binary descriptor, upgrading from a legacy format if
134// necessary.
135__tgt_bin_desc *PluginManager::upgradeLegacyEntries(__tgt_bin_desc *Desc) {
136 struct LegacyEntryTy {
137 void *Address;
138 char *SymbolName;
139 size_t Size;
140 int32_t Flags;
141 int32_t Data;
142 };
143
144 if (UpgradedDescriptors.contains(Desc))
145 return &UpgradedDescriptors[Desc];
146
147 if (Desc->HostEntriesBegin == Desc->HostEntriesEnd ||
148 Desc->HostEntriesBegin->Reserved == 0)
149 return Desc;
150
151 // The new format mandates that each entry starts with eight bytes of zeroes.
152 // This allows us to detect the old format as this is a null pointer.
153 llvm::SmallVector<llvm::offloading::EntryTy, 0> &NewEntries =
154 LegacyEntries.emplace_back();
155 for (LegacyEntryTy &Entry : llvm::make_range(
156 reinterpret_cast<LegacyEntryTy *>(Desc->HostEntriesBegin),
157 reinterpret_cast<LegacyEntryTy *>(Desc->HostEntriesEnd))) {
158 llvm::offloading::EntryTy &NewEntry = NewEntries.emplace_back();
159
160 NewEntry.Address = Entry.Address;
161 NewEntry.Flags = Entry.Flags;
162 NewEntry.Data = Entry.Data;
163 NewEntry.Size = Entry.Size;
164 NewEntry.SymbolName = Entry.SymbolName;
165 NewEntry.Kind = object::OffloadKind::OFK_OpenMP;
166 }
167
168 // Create a new image struct so we can update the entries list.
169 llvm::SmallVector<__tgt_device_image, 0> &NewImages =
170 LegacyImages.emplace_back();
171 for (int32_t Image = 0; Image < Desc->NumDeviceImages; ++Image)
172 NewImages.emplace_back(
173 __tgt_device_image{Desc->DeviceImages[Image].ImageStart,
174 Desc->DeviceImages[Image].ImageEnd,
175 NewEntries.begin(), NewEntries.end()});
176
177 // Create the new binary descriptor containing the newly created memory.
178 __tgt_bin_desc &NewDesc = UpgradedDescriptors[Desc];
179 NewDesc.DeviceImages = NewImages.begin();
180 NewDesc.NumDeviceImages = Desc->NumDeviceImages;
181 NewDesc.HostEntriesBegin = NewEntries.begin();
182 NewDesc.HostEntriesEnd = NewEntries.end();
183
184 return &NewDesc;
185}
186
187void PluginManager::registerLib(__tgt_bin_desc *Desc) {
188 PM->RTLsMtx.lock();
189
190 // Upgrade the entries from the legacy implementation if necessary.
191 Desc = upgradeLegacyEntries(Desc);
192
193 // Add in all the OpenMP requirements associated with this binary.
194 for (llvm::offloading::EntryTy &Entry :
195 llvm::make_range(Desc->HostEntriesBegin, Desc->HostEntriesEnd))
196 if (Entry.Kind == object::OffloadKind::OFK_OpenMP &&
197 Entry.Flags == OMP_REGISTER_REQUIRES)
198 PM->addRequirements(Entry.Data);
199
200 // Extract the executable image and extra information if available.
201 for (int32_t i = 0; i < Desc->NumDeviceImages; ++i)
202 PM->addDeviceImage(*Desc, Desc->DeviceImages[i]);
203
204 // Register the images with the RTLs that understand them, if any.
205 llvm::DenseMap<GenericPluginTy *, llvm::DenseSet<int32_t>> UsedDevices;
206 for (int32_t i = 0; i < Desc->NumDeviceImages; ++i) {
207 // Obtain the image and information that was previously extracted.
208 __tgt_device_image *Img = &Desc->DeviceImages[i];
209
210 GenericPluginTy *FoundRTL = nullptr;
211
212 // Scan the RTLs that have associated images until we find one that supports
213 // the current image.
214 for (auto &R : plugins()) {
215 if (!R.is_plugin_compatible(Img))
216 continue;
217
218 if (!initializePlugin(R))
219 continue;
220
221 if (!R.number_of_devices()) {
222 DP("Skipping plugin %s with no visible devices\n", R.getName());
223 continue;
224 }
225
226 for (int32_t DeviceId = 0; DeviceId < R.number_of_devices(); ++DeviceId) {
227 // We only want a single matching image to be registered for each binary
228 // descriptor. This prevents multiple of the same image from being
229 // registered for the same device in the case that they are mutually
230 // compatible, such as sm_80 and sm_89.
231 if (UsedDevices[&R].contains(DeviceId)) {
232 DP("Image " DPxMOD
233 " is a duplicate, not loaded on RTL %s device %d!\n",
234 DPxPTR(Img->ImageStart), R.getName(), DeviceId);
235 continue;
236 }
237
238 if (!R.is_device_compatible(DeviceId, Img))
239 continue;
240
241 DP("Image " DPxMOD " is compatible with RTL %s device %d!\n",
242 DPxPTR(Img->ImageStart), R.getName(), DeviceId);
243
244 if (!initializeDevice(R, DeviceId))
245 continue;
246
247 // Initialize (if necessary) translation table for this library.
248 PM->TrlTblMtx.lock();
249 if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) {
250 PM->HostEntriesBeginRegistrationOrder.push_back(
251 Desc->HostEntriesBegin);
252 TranslationTable &TT =
253 (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
254 TT.HostTable.EntriesBegin = Desc->HostEntriesBegin;
255 TT.HostTable.EntriesEnd = Desc->HostEntriesEnd;
256 }
257
258 // Retrieve translation table for this library.
259 TranslationTable &TT =
260 (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin];
261
262 DP("Registering image " DPxMOD " with RTL %s!\n",
263 DPxPTR(Img->ImageStart), R.getName());
264
265 auto UserId = PM->DeviceIds[std::make_pair(&R, DeviceId)];
266 if (TT.TargetsTable.size() < static_cast<size_t>(UserId + 1)) {
267 TT.DeviceTables.resize(UserId + 1, {});
268 TT.TargetsImages.resize(UserId + 1, nullptr);
269 TT.TargetsEntries.resize(UserId + 1, {});
270 TT.TargetsTable.resize(UserId + 1, nullptr);
271 }
272
273 // Register the image for this target type and invalidate the table.
274 TT.TargetsImages[UserId] = Img;
275 TT.TargetsTable[UserId] = nullptr;
276
277 UsedDevices[&R].insert(DeviceId);
278 PM->UsedImages.insert(Img);
279 FoundRTL = &R;
280
281 PM->TrlTblMtx.unlock();
282 }
283 }
284 if (!FoundRTL)
285 DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart));
286 }
287 PM->RTLsMtx.unlock();
288
289 bool UseAutoZeroCopy = false;
290
291 auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
292 // APUs are homogeneous set of GPUs. Check the first device for
293 // configuring Auto Zero-Copy.
294 if (ExclusiveDevicesAccessor->size() > 0) {
295 auto &Device = *(*ExclusiveDevicesAccessor)[0];
296 UseAutoZeroCopy = Device.useAutoZeroCopy();
297 }
298
299 if (UseAutoZeroCopy)
300 addRequirements(OMPX_REQ_AUTO_ZERO_COPY);
301
302 DP("Done registering entries!\n");
303}
304
305// Temporary forward declaration, old style CTor/DTor handling is going away.
306int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
307 KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
308
309void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
310 DP("Unloading target library!\n");
311
312 Desc = upgradeLegacyEntries(Desc);
313
314 PM->RTLsMtx.lock();
315 // Find which RTL understands each image, if any.
316 for (DeviceImageTy &DI : PM->deviceImages()) {
317 // Obtain the image and information that was previously extracted.
318 __tgt_device_image *Img = &DI.getExecutableImage();
319
320 GenericPluginTy *FoundRTL = NULL;
321
322 // Scan the RTLs that have associated images until we find one that supports
323 // the current image. We only need to scan RTLs that are already being used.
324 for (auto &R : plugins()) {
325 if (R.is_initialized())
326 continue;
327
328 // Ensure that we do not use any unused images associated with this RTL.
329 if (!UsedImages.contains(Img))
330 continue;
331
332 FoundRTL = &R;
333
334 DP("Unregistered image " DPxMOD " from RTL\n", DPxPTR(Img->ImageStart));
335
336 break;
337 }
338
339 // if no RTL was found proceed to unregister the next image
340 if (!FoundRTL) {
341 DP("No RTLs in use support the image " DPxMOD "!\n",
342 DPxPTR(Img->ImageStart));
343 }
344 }
345 PM->RTLsMtx.unlock();
346 DP("Done unregistering images!\n");
347
348 // Remove entries from PM->HostPtrToTableMap
349 PM->TblMapMtx.lock();
350 for (llvm::offloading::EntryTy *Cur = Desc->HostEntriesBegin;
351 Cur < Desc->HostEntriesEnd; ++Cur) {
352 if (Cur->Kind == object::OffloadKind::OFK_OpenMP)
353 PM->HostPtrToTableMap.erase(Cur->Address);
354 }
355
356 // Remove translation table for this descriptor.
357 auto TransTable =
358 PM->HostEntriesBeginToTransTable.find(Desc->HostEntriesBegin);
359 if (TransTable != PM->HostEntriesBeginToTransTable.end()) {
360 DP("Removing translation table for descriptor " DPxMOD "\n",
361 DPxPTR(Desc->HostEntriesBegin));
362 PM->HostEntriesBeginToTransTable.erase(TransTable);
363 } else {
364 DP("Translation table for descriptor " DPxMOD " cannot be found, probably "
365 "it has been already removed.\n",
366 DPxPTR(Desc->HostEntriesBegin));
367 }
368
369 PM->TblMapMtx.unlock();
370
371 DP("Done unregistering library!\n");
372}
373
374/// Map global data and execute pending ctors
375static int loadImagesOntoDevice(DeviceTy &Device) {
376 /*
377 * Map global data
378 */
379 int32_t DeviceId = Device.DeviceID;
380 int Rc = OFFLOAD_SUCCESS;
381 {
382 std::lock_guard<decltype(PM->TrlTblMtx)> LG(PM->TrlTblMtx);
383 for (auto *HostEntriesBegin : PM->HostEntriesBeginRegistrationOrder) {
384 TranslationTable *TransTable =
385 &PM->HostEntriesBeginToTransTable[HostEntriesBegin];
386 DP("Trans table %p : %p\n", TransTable->HostTable.EntriesBegin,
387 TransTable->HostTable.EntriesEnd);
388 if (TransTable->HostTable.EntriesBegin ==
389 TransTable->HostTable.EntriesEnd) {
390 // No host entry so no need to proceed
391 continue;
392 }
393
394 if (TransTable->TargetsTable[DeviceId] != 0) {
395 // Library entries have already been processed
396 continue;
397 }
398
399 // 1) get image.
400 assert(TransTable->TargetsImages.size() > (size_t)DeviceId &&
401 "Not expecting a device ID outside the table's bounds!");
402 __tgt_device_image *Img = TransTable->TargetsImages[DeviceId];
403 if (!Img) {
404 REPORT("No image loaded for device id %d.\n", DeviceId);
405 Rc = OFFLOAD_FAIL;
406 break;
407 }
408
409 // 2) Load the image onto the given device.
410 auto BinaryOrErr = Device.loadBinary(Img);
411 if (llvm::Error Err = BinaryOrErr.takeError()) {
412 REPORT("Failed to load image %s\n",
413 llvm::toString(std::move(Err)).c_str());
414 Rc = OFFLOAD_FAIL;
415 break;
416 }
417
418 // 3) Create the translation table.
419 llvm::SmallVector<llvm::offloading::EntryTy> &DeviceEntries =
420 TransTable->TargetsEntries[DeviceId];
421 for (llvm::offloading::EntryTy &Entry :
422 llvm::make_range(Img->EntriesBegin, Img->EntriesEnd)) {
423 if (Entry.Kind != object::OffloadKind::OFK_OpenMP)
424 continue;
425
426 __tgt_device_binary &Binary = *BinaryOrErr;
427
428 llvm::offloading::EntryTy DeviceEntry = Entry;
429 if (Entry.Size) {
430 if (Device.RTL->get_global(Binary, Entry.Size, Entry.SymbolName,
431 &DeviceEntry.Address) != OFFLOAD_SUCCESS)
432 REPORT("Failed to load symbol %s\n", Entry.SymbolName);
433
434 // If unified memory is active, the corresponding global is a device
435 // reference to the host global. We need to initialize the pointer on
436 // the device to point to the memory on the host.
437 if ((PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
438 (PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY)) {
439 if (Device.RTL->data_submit(DeviceId, DeviceEntry.Address,
440 Entry.Address,
441 Entry.Size) != OFFLOAD_SUCCESS)
442 REPORT("Failed to write symbol for USM %s\n", Entry.SymbolName);
443 }
444 } else if (Entry.Address) {
445 if (Device.RTL->get_function(Binary, Entry.SymbolName,
446 &DeviceEntry.Address) != OFFLOAD_SUCCESS)
447 REPORT("Failed to load kernel %s\n", Entry.SymbolName);
448 }
449 DP("Entry point " DPxMOD " maps to%s %s (" DPxMOD ")\n",
450 DPxPTR(Entry.Address), (Entry.Size) ? " global" : "",
451 Entry.SymbolName, DPxPTR(DeviceEntry.Address));
452
453 DeviceEntries.emplace_back(DeviceEntry);
454 }
455
456 // Set the storage for the table and get a pointer to it.
457 __tgt_target_table DeviceTable{&DeviceEntries[0],
458 &DeviceEntries[0] + DeviceEntries.size()};
459 TransTable->DeviceTables[DeviceId] = DeviceTable;
460 __tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] =
461 &TransTable->DeviceTables[DeviceId];
462
463 MappingInfoTy::HDTTMapAccessorTy HDTTMap =
464 Device.getMappingInfo().HostDataToTargetMap.getExclusiveAccessor();
465
466 __tgt_target_table *HostTable = &TransTable->HostTable;
467 for (llvm::offloading::EntryTy *
468 CurrDeviceEntry = TargetTable->EntriesBegin,
469 *CurrHostEntry = HostTable->EntriesBegin,
470 *EntryDeviceEnd = TargetTable->EntriesEnd;
471 CurrDeviceEntry != EntryDeviceEnd;
472 CurrDeviceEntry++, CurrHostEntry++) {
473 if (CurrDeviceEntry->Size == 0 ||
474 CurrDeviceEntry->Kind != object::OffloadKind::OFK_OpenMP)
475 continue;
476
477 assert(CurrDeviceEntry->Size == CurrHostEntry->Size &&
478 "data size mismatch");
479
480 // Fortran may use multiple weak declarations for the same symbol,
481 // therefore we must allow for multiple weak symbols to be loaded from
482 // the fat binary. Treat these mappings as any other "regular"
483 // mapping. Add entry to map.
484 if (Device.getMappingInfo().getTgtPtrBegin(
485 HDTTMap, CurrHostEntry->Address, CurrHostEntry->Size))
486 continue;
487
488 void *CurrDeviceEntryAddr = CurrDeviceEntry->Address;
489
490 // For indirect mapping, follow the indirection and map the actual
491 // target.
492 if (CurrDeviceEntry->Flags & OMP_DECLARE_TARGET_INDIRECT) {
493 AsyncInfoTy AsyncInfo(Device);
494 void *DevPtr;
495 Device.retrieveData(&DevPtr, CurrDeviceEntryAddr, sizeof(void *),
496 AsyncInfo, /*Entry=*/nullptr, &HDTTMap);
497 if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS)
498 return OFFLOAD_FAIL;
499 CurrDeviceEntryAddr = DevPtr;
500 }
501
502 DP("Add mapping from host " DPxMOD " to device " DPxMOD " with size %zu"
503 ", name \"%s\"\n",
504 DPxPTR(CurrHostEntry->Address), DPxPTR(CurrDeviceEntry->Address),
505 CurrDeviceEntry->Size, CurrDeviceEntry->SymbolName);
506 HDTTMap->emplace(new HostDataToTargetTy(
507 (uintptr_t)CurrHostEntry->Address /*HstPtrBase*/,
508 (uintptr_t)CurrHostEntry->Address /*HstPtrBegin*/,
509 (uintptr_t)CurrHostEntry->Address +
510 CurrHostEntry->Size /*HstPtrEnd*/,
511 (uintptr_t)CurrDeviceEntryAddr /*TgtAllocBegin*/,
512 (uintptr_t)CurrDeviceEntryAddr /*TgtPtrBegin*/,
513 false /*UseHoldRefCount*/, CurrHostEntry->SymbolName,
514 true /*IsRefCountINF*/));
515
516 // Notify about the new mapping.
517 if (Device.notifyDataMapped(CurrHostEntry->Address,
518 CurrHostEntry->Size))
519 return OFFLOAD_FAIL;
520 }
521 }
522 Device.setHasPendingImages(false);
523 }
524
525 if (Rc != OFFLOAD_SUCCESS)
526 return Rc;
527
528 static Int32Envar DumpOffloadEntries =
529 Int32Envar("OMPTARGET_DUMP_OFFLOAD_ENTRIES", -1);
530 if (DumpOffloadEntries.get() == DeviceId)
531 Device.dumpOffloadEntries();
532
533 return OFFLOAD_SUCCESS;
534}
535
536Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
537 DeviceTy *DevicePtr;
538 {
539 auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
540 if (DeviceNo >= ExclusiveDevicesAccessor->size())
541 return error::createOffloadError(
542 error::ErrorCode::INVALID_VALUE,
543 "device number '%i' out of range, only %i devices available",
544 DeviceNo, ExclusiveDevicesAccessor->size());
545
546 DevicePtr = &*(*ExclusiveDevicesAccessor)[DeviceNo];
547 }
548
549 // Check whether global data has been mapped for this device
550 if (DevicePtr->hasPendingImages())
551 if (loadImagesOntoDevice(*DevicePtr) != OFFLOAD_SUCCESS)
552 return error::createOffloadError(error::ErrorCode::BACKEND_FAILURE,
553 "failed to load images on device '%i'",
554 DeviceNo);
555 return *DevicePtr;
556}
557

source code of offload/libomptarget/PluginManager.cpp