1//===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "OpenMP/OMPT/Interface.h"
15#include "OffloadPolicy.h"
16#include "OpenMP/OMPT/Callback.h"
17#include "OpenMP/omp.h"
18#include "PluginManager.h"
19#include "omptarget.h"
20#include "private.h"
21
22#include "Shared/EnvironmentVar.h"
23#include "Shared/Profile.h"
24
25#include "Utils/ExponentialBackoff.h"
26
27#include "llvm/Frontend/OpenMP/OMPConstants.h"
28
29#include <cassert>
30#include <cstdint>
31#include <cstdio>
32#include <cstdlib>
33
34#ifdef OMPT_SUPPORT
35using namespace llvm::omp::target::ompt;
36#endif
37
38// If offload is enabled, ensure that device DeviceID has been initialized.
39//
40// The return bool indicates if the offload is to the host device
41// There are three possible results:
42// - Return false if the target device is ready for offload
43// - Return true without reporting a runtime error if offload is
44// disabled, perhaps because the initial device was specified.
45// - Report a runtime error and return true.
46//
47// If DeviceID == OFFLOAD_DEVICE_DEFAULT, set DeviceID to the default device.
48// This step might be skipped if offload is disabled.
49bool checkDevice(int64_t &DeviceID, ident_t *Loc) {
50 if (OffloadPolicy::get(*PM).Kind == OffloadPolicy::DISABLED) {
51 DP("Offload is disabled\n");
52 return true;
53 }
54
55 if (DeviceID == OFFLOAD_DEVICE_DEFAULT) {
56 DeviceID = omp_get_default_device();
57 DP("Use default device id %" PRId64 "\n", DeviceID);
58 }
59
60 // Proposed behavior for OpenMP 5.2 in OpenMP spec github issue 2669.
61 if (omp_get_num_devices() == 0) {
62 DP("omp_get_num_devices() == 0 but offload is manadatory\n");
63 handleTargetOutcome(false, Loc);
64 return true;
65 }
66
67 if (DeviceID == omp_get_initial_device()) {
68 DP("Device is host (%" PRId64 "), returning as if offload is disabled\n",
69 DeviceID);
70 return true;
71 }
72 return false;
73}
74
75////////////////////////////////////////////////////////////////////////////////
76/// adds requires flags
77EXTERN void __tgt_register_requires(int64_t Flags) {
78 MESSAGE("The %s function has been removed. Old OpenMP requirements will not "
79 "be handled",
80 __PRETTY_FUNCTION__);
81}
82
83EXTERN void __tgt_rtl_init() { initRuntime(); }
84EXTERN void __tgt_rtl_deinit() { deinitRuntime(); }
85
86////////////////////////////////////////////////////////////////////////////////
87/// adds a target shared library to the target execution image
88EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
89 initRuntime();
90 if (PM->delayRegisterLib(Desc))
91 return;
92
93 PM->registerLib(Desc);
94}
95
96////////////////////////////////////////////////////////////////////////////////
97/// Initialize all available devices without registering any image
98EXTERN void __tgt_init_all_rtls() {
99 assert(PM && "Runtime not initialized");
100 PM->initializeAllDevices();
101}
102
103////////////////////////////////////////////////////////////////////////////////
104/// unloads a target shared library
105EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
106 PM->unregisterLib(Desc);
107
108 deinitRuntime();
109}
110
111template <typename TargetAsyncInfoTy>
112static inline void
113targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
114 void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
115 map_var_info_t *ArgNames, void **ArgMappers,
116 TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg,
117 const char *RegionName) {
118 assert(PM && "Runtime not initialized");
119 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
120 "TargetAsyncInfoTy must be convertible to AsyncInfoTy.");
121
122 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
123 "NumArgs=" + std::to_string(val: ArgNum), Loc);
124
125 DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
126 RegionName, DeviceId, ArgNum);
127
128 if (checkDevice(DeviceId, Loc)) {
129 DP("Not offloading to device %" PRId64 "\n", DeviceId);
130 return;
131 }
132
133 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
134 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
135 RegionTypeMsg);
136#ifdef OMPTARGET_DEBUG
137 for (int I = 0; I < ArgNum; ++I) {
138 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
139 ", Type=0x%" PRIx64 ", Name=%s\n",
140 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
141 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
142 }
143#endif
144
145 auto DeviceOrErr = PM->getDevice(DeviceId);
146 if (!DeviceOrErr)
147 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
148
149 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
150 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
151
152 /// RAII to establish tool anchors before and after data begin / end / update
153 OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
154 TargetDataFunction == targetDataEnd ||
155 TargetDataFunction == targetDataUpdate) &&
156 "Encountered unexpected TargetDataFunction during "
157 "execution of targetData");
158 auto CallbackFunctions =
159 (TargetDataFunction == targetDataBegin)
160 ? RegionInterface.getCallbacks<ompt_target_enter_data>()
161 : (TargetDataFunction == targetDataEnd)
162 ? RegionInterface.getCallbacks<ompt_target_exit_data>()
163 : RegionInterface.getCallbacks<ompt_target_update>();
164 InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
165 OMPT_GET_RETURN_ADDRESS);)
166
167 int Rc = OFFLOAD_SUCCESS;
168 Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
169 ArgTypes, ArgNames, ArgMappers, AsyncInfo,
170 false /*FromMapper=*/);
171
172 if (Rc == OFFLOAD_SUCCESS)
173 Rc = AsyncInfo.synchronize();
174
175 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
176}
177
178/// creates host-to-target data mapping, stores it in the
179/// libomptarget.so internal structure (an entry in a stack of data maps)
180/// and passes the data to the device.
181EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
182 int32_t ArgNum, void **ArgsBase,
183 void **Args, int64_t *ArgSizes,
184 int64_t *ArgTypes,
185 map_var_info_t *ArgNames,
186 void **ArgMappers) {
187 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
188 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
189 ArgTypes, ArgNames, ArgMappers, targetDataBegin,
190 "Entering OpenMP data region with being_mapper",
191 "begin");
192}
193
194EXTERN void __tgt_target_data_begin_nowait_mapper(
195 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
196 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
197 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
198 void *NoAliasDepList) {
199 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
200 targetData<TaskAsyncInfoWrapperTy>(
201 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
202 ArgMappers, targetDataBegin,
203 "Entering OpenMP data region with being_nowait_mapper", "begin");
204}
205
206/// passes data from the target, releases target memory and destroys
207/// the host-target mapping (top entry from the stack of data maps)
208/// created by the last __tgt_target_data_begin.
209EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
210 int32_t ArgNum, void **ArgsBase,
211 void **Args, int64_t *ArgSizes,
212 int64_t *ArgTypes,
213 map_var_info_t *ArgNames,
214 void **ArgMappers) {
215 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
216 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
217 ArgTypes, ArgNames, ArgMappers, targetDataEnd,
218 "Exiting OpenMP data region with end_mapper", "end");
219}
220
221EXTERN void __tgt_target_data_end_nowait_mapper(
222 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
223 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
224 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
225 void *NoAliasDepList) {
226 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
227 targetData<TaskAsyncInfoWrapperTy>(
228 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
229 ArgMappers, targetDataEnd,
230 "Exiting OpenMP data region with end_nowait_mapper", "end");
231}
232
233EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
234 int32_t ArgNum, void **ArgsBase,
235 void **Args, int64_t *ArgSizes,
236 int64_t *ArgTypes,
237 map_var_info_t *ArgNames,
238 void **ArgMappers) {
239 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
240 targetData<AsyncInfoTy>(
241 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
242 ArgMappers, targetDataUpdate,
243 "Updating data within the OpenMP data region with update_mapper",
244 "update");
245}
246
247EXTERN void __tgt_target_data_update_nowait_mapper(
248 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
249 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
250 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
251 void *NoAliasDepList) {
252 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
253 targetData<TaskAsyncInfoWrapperTy>(
254 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
255 ArgMappers, targetDataUpdate,
256 "Updating data within the OpenMP data region with update_nowait_mapper",
257 "update");
258}
259
260static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs,
261 KernelArgsTy &LocalKernelArgs,
262 int32_t NumTeams, int32_t ThreadLimit) {
263 if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION)
264 DP("Unexpected ABI version: %u\n", KernelArgs->Version);
265
266 uint32_t UpgradedVersion = KernelArgs->Version;
267 if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) {
268 // The upgraded version will be based on the kernel launch environment.
269 if (KernelArgs->Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
270 UpgradedVersion = OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR - 1;
271 else
272 UpgradedVersion = OMP_KERNEL_ARG_VERSION;
273 }
274 if (UpgradedVersion != KernelArgs->Version) {
275 LocalKernelArgs.Version = UpgradedVersion;
276 LocalKernelArgs.NumArgs = KernelArgs->NumArgs;
277 LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs;
278 LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs;
279 LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes;
280 LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes;
281 LocalKernelArgs.ArgNames = KernelArgs->ArgNames;
282 LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers;
283 LocalKernelArgs.Tripcount = KernelArgs->Tripcount;
284 LocalKernelArgs.Flags = KernelArgs->Flags;
285 LocalKernelArgs.DynCGroupMem = 0;
286 LocalKernelArgs.NumTeams[0] = NumTeams;
287 LocalKernelArgs.NumTeams[1] = 1;
288 LocalKernelArgs.NumTeams[2] = 1;
289 LocalKernelArgs.ThreadLimit[0] = ThreadLimit;
290 LocalKernelArgs.ThreadLimit[1] = 1;
291 LocalKernelArgs.ThreadLimit[2] = 1;
292 return &LocalKernelArgs;
293 }
294
295 // FIXME: This is a WA to "calibrate" the bad work done in the front end.
296 // Delete this ugly code after the front end emits proper values.
297 auto CorrectMultiDim = [](uint32_t (&Val)[3]) {
298 if (Val[1] == 0)
299 Val[1] = 1;
300 if (Val[2] == 0)
301 Val[2] = 1;
302 };
303 CorrectMultiDim(KernelArgs->ThreadLimit);
304 CorrectMultiDim(KernelArgs->NumTeams);
305
306 return KernelArgs;
307}
308
309template <typename TargetAsyncInfoTy>
310static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
311 int32_t ThreadLimit, void *HostPtr,
312 KernelArgsTy *KernelArgs) {
313 assert(PM && "Runtime not initialized");
314 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
315 "Target AsyncInfoTy must be convertible to AsyncInfoTy.");
316 DP("Entering target region for device %" PRId64 " with entry point " DPxMOD
317 "\n",
318 DeviceId, DPxPTR(HostPtr));
319
320 if (checkDevice(DeviceId, Loc)) {
321 DP("Not offloading to device %" PRId64 "\n", DeviceId);
322 return OMP_TGT_FAIL;
323 }
324
325 bool IsTeams = NumTeams != -1;
326 if (!IsTeams)
327 KernelArgs->NumTeams[0] = NumTeams = 1;
328
329 // Auto-upgrade kernel args version 1 to 2.
330 KernelArgsTy LocalKernelArgs;
331 KernelArgs =
332 upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit);
333
334 TIMESCOPE_WITH_DETAILS_AND_IDENT(
335 "Runtime: target exe",
336 "NumTeams=" + std::to_string(val: NumTeams) +
337 ";NumArgs=" + std::to_string(KernelArgs->NumArgs),
338 Loc);
339
340 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
341 printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs,
342 KernelArgs->ArgSizes, KernelArgs->ArgTypes,
343 KernelArgs->ArgNames, "Entering OpenMP kernel");
344#ifdef OMPTARGET_DEBUG
345 for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
346 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
347 ", Type=0x%" PRIx64 ", Name=%s\n",
348 I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]),
349 KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I],
350 (KernelArgs->ArgNames)
351 ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
352 : "unknown");
353 }
354#endif
355
356 auto DeviceOrErr = PM->getDevice(DeviceId);
357 if (!DeviceOrErr)
358 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
359
360 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
361 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
362 /// RAII to establish tool anchors before and after target region
363 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
364 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
365 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
366
367 int Rc = OFFLOAD_SUCCESS;
368 Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
369 { // required to show synchronization
370 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: synchronize", "", Loc);
371 if (Rc == OFFLOAD_SUCCESS)
372 Rc = AsyncInfo.synchronize();
373
374 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
375 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
376 }
377 return OMP_TGT_SUCCESS;
378}
379
380/// Implements a kernel entry that executes the target region on the specified
381/// device.
382///
383/// \param Loc Source location associated with this target region.
384/// \param DeviceId The device to execute this region, -1 indicated the default.
385/// \param NumTeams Number of teams to launch the region with, -1 indicates a
386/// non-teams region and 0 indicates it was unspecified.
387/// \param ThreadLimit Limit to the number of threads to use in the kernel
388/// launch, 0 indicates it was unspecified.
389/// \param HostPtr The pointer to the host function registered with the kernel.
390/// \param Args All arguments to this kernel launch (see struct definition).
391EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
392 int32_t ThreadLimit, void *HostPtr,
393 KernelArgsTy *KernelArgs) {
394 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
395 if (KernelArgs->Flags.NoWait)
396 return targetKernel<TaskAsyncInfoWrapperTy>(
397 Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs);
398 return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit,
399 HostPtr, KernelArgs);
400}
401
402/// Activates the record replay mechanism.
403/// \param DeviceId The device identifier to execute the target region.
404/// \param MemorySize The number of bytes to be (pre-)allocated
405/// by the bump allocator
406/// /param IsRecord Activates the record replay mechanism in
407/// 'record' mode or 'replay' mode.
408/// /param SaveOutput Store the device memory after kernel
409/// execution on persistent storage
410EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
411 void *VAddr, bool IsRecord,
412 bool SaveOutput,
413 uint64_t &ReqPtrArgOffset) {
414 assert(PM && "Runtime not initialized");
415 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
416 auto DeviceOrErr = PM->getDevice(DeviceId);
417 if (!DeviceOrErr)
418 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
419
420 [[maybe_unused]] int Rc = target_activate_rr(
421 *DeviceOrErr, MemorySize, VAddr, IsRecord, SaveOutput, ReqPtrArgOffset);
422 assert(Rc == OFFLOAD_SUCCESS &&
423 "__tgt_activate_record_replay unexpected failure!");
424 return OMP_TGT_SUCCESS;
425}
426
427/// Implements a target kernel entry that replays a pre-recorded kernel.
428/// \param Loc Source location associated with this target region (unused).
429/// \param DeviceId The device identifier to execute the target region.
430/// \param HostPtr A pointer to an address that uniquely identifies the kernel.
431/// \param DeviceMemory A pointer to an array storing device memory data to move
432/// prior to kernel execution.
433/// \param DeviceMemorySize The size of the above device memory data in bytes.
434/// \param TgtArgs An array of pointers of the pre-recorded target kernel
435/// arguments.
436/// \param TgtOffsets An array of pointers of the pre-recorded target kernel
437/// argument offsets.
438/// \param NumArgs The number of kernel arguments.
439/// \param NumTeams Number of teams to launch the target region with.
440/// \param ThreadLimit Limit to the number of threads to use in kernel
441/// execution.
442/// \param LoopTripCount The pre-recorded value of the loop tripcount, if any.
443/// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure.
444EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
445 void *HostPtr, void *DeviceMemory,
446 int64_t DeviceMemorySize, void **TgtArgs,
447 ptrdiff_t *TgtOffsets, int32_t NumArgs,
448 int32_t NumTeams, int32_t ThreadLimit,
449 uint64_t LoopTripCount) {
450 assert(PM && "Runtime not initialized");
451 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
452 if (checkDevice(DeviceId, Loc)) {
453 DP("Not offloading to device %" PRId64 "\n", DeviceId);
454 return OMP_TGT_FAIL;
455 }
456 auto DeviceOrErr = PM->getDevice(DeviceId);
457 if (!DeviceOrErr)
458 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
459
460 /// RAII to establish tool anchors before and after target region
461 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
462 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
463 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
464
465 AsyncInfoTy AsyncInfo(*DeviceOrErr);
466 int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory,
467 DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs,
468 NumTeams, ThreadLimit, LoopTripCount, AsyncInfo);
469 if (Rc == OFFLOAD_SUCCESS)
470 Rc = AsyncInfo.synchronize();
471 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
472 assert(Rc == OFFLOAD_SUCCESS &&
473 "__tgt_target_kernel_replay unexpected failure!");
474 return OMP_TGT_SUCCESS;
475}
476
477// Get the current number of components for a user-defined mapper.
478EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
479 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
480 int64_t Size = MapperComponentsPtr->Components.size();
481 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
482 DPxPTR(RtMapperHandle), Size);
483 return Size;
484}
485
486// Push back one component for a user-defined mapper.
487EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
488 void *Begin, int64_t Size, int64_t Type,
489 void *Name) {
490 DP("__tgt_push_mapper_component(Handle=" DPxMOD
491 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
492 ", Type=0x%" PRIx64 ", Name=%s).\n",
493 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
494 (Name) ? getNameFromMapping(Name).c_str() : "unknown");
495 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
496 MapperComponentsPtr->Components.push_back(
497 MapComponentInfoTy(Base, Begin, Size, Type, Name));
498}
499
500EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
501 assert(PM && "Runtime not initialized");
502 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
503 InfoLevel.store(NewInfoLevel);
504}
505
506EXTERN int __tgt_print_device_info(int64_t DeviceId) {
507 assert(PM && "Runtime not initialized");
508 auto DeviceOrErr = PM->getDevice(DeviceId);
509 if (!DeviceOrErr)
510 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
511
512 return DeviceOrErr->printDeviceInfo();
513}
514
515EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
516 assert(PM && "Runtime not initialized");
517 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
518
519 if (!AsyncHandle || !*AsyncHandle) {
520 FATAL_MESSAGE0(
521 1, "Receive an invalid async handle from the current OpenMP task. Is "
522 "this a target nowait region?\n");
523 }
524
525 // Exponential backoff tries to optimally decide if a thread should just query
526 // for the device operations (work/spin wait on them) or block until they are
527 // completed (use device side blocking mechanism). This allows the runtime to
528 // adapt itself when there are a lot of long-running target regions in-flight.
529 static thread_local utils::ExponentialBackoff QueryCounter(
530 Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10),
531 Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5),
532 Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR", 0.5f));
533
534 auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle;
535
536 // If the thread is actively waiting on too many target nowait regions, we
537 // should use the blocking sync type.
538 if (QueryCounter.isAboveThreshold())
539 AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING;
540
541 if (AsyncInfo->synchronize())
542 FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n");
543 // If there are device operations still pending, return immediately without
544 // deallocating the handle and increase the current thread query count.
545 if (!AsyncInfo->isDone()) {
546 QueryCounter.increment();
547 return;
548 }
549
550 // When a thread successfully completes a target nowait region, we
551 // exponentially backoff its query counter by the query factor.
552 QueryCounter.decrement();
553
554 // Delete the handle and unset it from the OpenMP task data.
555 delete AsyncInfo;
556 *AsyncHandle = nullptr;
557}
558

source code of offload/libomptarget/interface.cpp