1//===-------- interface.cpp - Target independent OpenMP target RTL --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the interface to be used by Clang during the codegen of a
10// target region.
11//
12//===----------------------------------------------------------------------===//
13
14#include "OpenMP/OMPT/Interface.h"
15#include "OpenMP/OMPT/Callback.h"
16#include "PluginManager.h"
17#include "private.h"
18
19#include "Shared/EnvironmentVar.h"
20#include "Shared/Profile.h"
21
22#include "Utils/ExponentialBackoff.h"
23
24#include "llvm/Frontend/OpenMP/OMPConstants.h"
25
26#include <cassert>
27#include <cstdint>
28#include <cstdio>
29#include <cstdlib>
30
31#ifdef OMPT_SUPPORT
32using namespace llvm::omp::target::ompt;
33#endif
34
35////////////////////////////////////////////////////////////////////////////////
36/// adds requires flags
37EXTERN void __tgt_register_requires(int64_t Flags) {
38 MESSAGE("The %s function has been removed. Old OpenMP requirements will not "
39 "be handled",
40 __PRETTY_FUNCTION__);
41}
42
43EXTERN void __tgt_rtl_init() { initRuntime(); }
44EXTERN void __tgt_rtl_deinit() { deinitRuntime(); }
45
46////////////////////////////////////////////////////////////////////////////////
47/// adds a target shared library to the target execution image
48EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) {
49 initRuntime();
50 if (PM->delayRegisterLib(Desc))
51 return;
52
53 PM->registerLib(Desc);
54}
55
56////////////////////////////////////////////////////////////////////////////////
57/// Initialize all available devices without registering any image
58EXTERN void __tgt_init_all_rtls() {
59 assert(PM && "Runtime not initialized");
60 PM->initAllPlugins();
61}
62
63////////////////////////////////////////////////////////////////////////////////
64/// unloads a target shared library
65EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) {
66 PM->unregisterLib(Desc);
67
68 deinitRuntime();
69}
70
71template <typename TargetAsyncInfoTy>
72static inline void
73targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
74 void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
75 map_var_info_t *ArgNames, void **ArgMappers,
76 TargetDataFuncPtrTy TargetDataFunction, const char *RegionTypeMsg,
77 const char *RegionName) {
78 assert(PM && "Runtime not initialized");
79 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
80 "TargetAsyncInfoTy must be convertible to AsyncInfoTy.");
81
82 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
83 "NumArgs=" + std::to_string(val: ArgNum), Loc);
84
85 DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
86 RegionName, DeviceId, ArgNum);
87
88 if (checkDeviceAndCtors(DeviceId, Loc)) {
89 DP("Not offloading to device %" PRId64 "\n", DeviceId);
90 return;
91 }
92
93 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
94 printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames,
95 RegionTypeMsg);
96#ifdef OMPTARGET_DEBUG
97 for (int I = 0; I < ArgNum; ++I) {
98 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
99 ", Type=0x%" PRIx64 ", Name=%s\n",
100 I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I],
101 (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown");
102 }
103#endif
104
105 auto DeviceOrErr = PM->getDevice(DeviceId);
106 if (!DeviceOrErr)
107 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
108
109 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
110 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
111
112 /// RAII to establish tool anchors before and after data begin / end / update
113 OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin ||
114 TargetDataFunction == targetDataEnd ||
115 TargetDataFunction == targetDataUpdate) &&
116 "Encountered unexpected TargetDataFunction during "
117 "execution of targetData");
118 auto CallbackFunctions =
119 (TargetDataFunction == targetDataBegin)
120 ? RegionInterface.getCallbacks<ompt_target_enter_data>()
121 : (TargetDataFunction == targetDataEnd)
122 ? RegionInterface.getCallbacks<ompt_target_exit_data>()
123 : RegionInterface.getCallbacks<ompt_target_update>();
124 InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
125 OMPT_GET_RETURN_ADDRESS);)
126
127 int Rc = OFFLOAD_SUCCESS;
128 Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
129 ArgTypes, ArgNames, ArgMappers, AsyncInfo,
130 false /*FromMapper=*/);
131
132 if (Rc == OFFLOAD_SUCCESS)
133 Rc = AsyncInfo.synchronize();
134
135 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
136}
137
138/// creates host-to-target data mapping, stores it in the
139/// libomptarget.so internal structure (an entry in a stack of data maps)
140/// and passes the data to the device.
141EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
142 int32_t ArgNum, void **ArgsBase,
143 void **Args, int64_t *ArgSizes,
144 int64_t *ArgTypes,
145 map_var_info_t *ArgNames,
146 void **ArgMappers) {
147 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
148 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
149 ArgTypes, ArgNames, ArgMappers, targetDataBegin,
150 "Entering OpenMP data region with being_mapper",
151 "begin");
152}
153
154EXTERN void __tgt_target_data_begin_nowait_mapper(
155 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
156 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
157 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
158 void *NoAliasDepList) {
159 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
160 targetData<TaskAsyncInfoWrapperTy>(
161 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
162 ArgMappers, targetDataBegin,
163 "Entering OpenMP data region with being_nowait_mapper", "begin");
164}
165
166/// passes data from the target, releases target memory and destroys
167/// the host-target mapping (top entry from the stack of data maps)
168/// created by the last __tgt_target_data_begin.
169EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
170 int32_t ArgNum, void **ArgsBase,
171 void **Args, int64_t *ArgSizes,
172 int64_t *ArgTypes,
173 map_var_info_t *ArgNames,
174 void **ArgMappers) {
175 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
176 targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
177 ArgTypes, ArgNames, ArgMappers, targetDataEnd,
178 "Exiting OpenMP data region with end_mapper", "end");
179}
180
181EXTERN void __tgt_target_data_end_nowait_mapper(
182 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
183 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
184 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
185 void *NoAliasDepList) {
186 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
187 targetData<TaskAsyncInfoWrapperTy>(
188 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
189 ArgMappers, targetDataEnd,
190 "Exiting OpenMP data region with end_nowait_mapper", "end");
191}
192
193EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
194 int32_t ArgNum, void **ArgsBase,
195 void **Args, int64_t *ArgSizes,
196 int64_t *ArgTypes,
197 map_var_info_t *ArgNames,
198 void **ArgMappers) {
199 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
200 targetData<AsyncInfoTy>(
201 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
202 ArgMappers, targetDataUpdate,
203 "Updating data within the OpenMP data region with update_mapper",
204 "update");
205}
206
207EXTERN void __tgt_target_data_update_nowait_mapper(
208 ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
209 void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
210 void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
211 void *NoAliasDepList) {
212 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
213 targetData<TaskAsyncInfoWrapperTy>(
214 Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
215 ArgMappers, targetDataUpdate,
216 "Updating data within the OpenMP data region with update_nowait_mapper",
217 "update");
218}
219
220static KernelArgsTy *upgradeKernelArgs(KernelArgsTy *KernelArgs,
221 KernelArgsTy &LocalKernelArgs,
222 int32_t NumTeams, int32_t ThreadLimit) {
223 if (KernelArgs->Version > OMP_KERNEL_ARG_VERSION)
224 DP("Unexpected ABI version: %u\n", KernelArgs->Version);
225
226 uint32_t UpgradedVersion = KernelArgs->Version;
227 if (KernelArgs->Version < OMP_KERNEL_ARG_VERSION) {
228 // The upgraded version will be based on the kernel launch environment.
229 if (KernelArgs->Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
230 UpgradedVersion = OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR - 1;
231 else
232 UpgradedVersion = OMP_KERNEL_ARG_VERSION;
233 }
234 if (UpgradedVersion != KernelArgs->Version) {
235 LocalKernelArgs.Version = UpgradedVersion;
236 LocalKernelArgs.NumArgs = KernelArgs->NumArgs;
237 LocalKernelArgs.ArgBasePtrs = KernelArgs->ArgBasePtrs;
238 LocalKernelArgs.ArgPtrs = KernelArgs->ArgPtrs;
239 LocalKernelArgs.ArgSizes = KernelArgs->ArgSizes;
240 LocalKernelArgs.ArgTypes = KernelArgs->ArgTypes;
241 LocalKernelArgs.ArgNames = KernelArgs->ArgNames;
242 LocalKernelArgs.ArgMappers = KernelArgs->ArgMappers;
243 LocalKernelArgs.Tripcount = KernelArgs->Tripcount;
244 LocalKernelArgs.Flags = KernelArgs->Flags;
245 LocalKernelArgs.DynCGroupMem = 0;
246 LocalKernelArgs.NumTeams[0] = NumTeams;
247 LocalKernelArgs.NumTeams[1] = 0;
248 LocalKernelArgs.NumTeams[2] = 0;
249 LocalKernelArgs.ThreadLimit[0] = ThreadLimit;
250 LocalKernelArgs.ThreadLimit[1] = 0;
251 LocalKernelArgs.ThreadLimit[2] = 0;
252 return &LocalKernelArgs;
253 }
254
255 return KernelArgs;
256}
257
258template <typename TargetAsyncInfoTy>
259static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
260 int32_t ThreadLimit, void *HostPtr,
261 KernelArgsTy *KernelArgs) {
262 assert(PM && "Runtime not initialized");
263 static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
264 "Target AsyncInfoTy must be convertible to AsyncInfoTy.");
265 DP("Entering target region for device %" PRId64 " with entry point " DPxMOD
266 "\n",
267 DeviceId, DPxPTR(HostPtr));
268
269 if (checkDeviceAndCtors(DeviceId, Loc)) {
270 DP("Not offloading to device %" PRId64 "\n", DeviceId);
271 return OMP_TGT_FAIL;
272 }
273
274 bool IsTeams = NumTeams != -1;
275 if (!IsTeams)
276 KernelArgs->NumTeams[0] = NumTeams = 1;
277
278 // Auto-upgrade kernel args version 1 to 2.
279 KernelArgsTy LocalKernelArgs;
280 KernelArgs =
281 upgradeKernelArgs(KernelArgs, LocalKernelArgs, NumTeams, ThreadLimit);
282
283 assert(KernelArgs->NumTeams[0] == static_cast<uint32_t>(NumTeams) &&
284 !KernelArgs->NumTeams[1] && !KernelArgs->NumTeams[2] &&
285 "OpenMP interface should not use multiple dimensions");
286 assert(KernelArgs->ThreadLimit[0] == static_cast<uint32_t>(ThreadLimit) &&
287 !KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
288 "OpenMP interface should not use multiple dimensions");
289 TIMESCOPE_WITH_DETAILS_AND_IDENT(
290 "Runtime: target exe",
291 "NumTeams=" + std::to_string(val: NumTeams) +
292 ";NumArgs=" + std::to_string(KernelArgs->NumArgs),
293 Loc);
294
295 if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS)
296 printKernelArguments(Loc, DeviceId, KernelArgs->NumArgs,
297 KernelArgs->ArgSizes, KernelArgs->ArgTypes,
298 KernelArgs->ArgNames, "Entering OpenMP kernel");
299#ifdef OMPTARGET_DEBUG
300 for (uint32_t I = 0; I < KernelArgs->NumArgs; ++I) {
301 DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
302 ", Type=0x%" PRIx64 ", Name=%s\n",
303 I, DPxPTR(KernelArgs->ArgBasePtrs[I]), DPxPTR(KernelArgs->ArgPtrs[I]),
304 KernelArgs->ArgSizes[I], KernelArgs->ArgTypes[I],
305 (KernelArgs->ArgNames)
306 ? getNameFromMapping(KernelArgs->ArgNames[I]).c_str()
307 : "unknown");
308 }
309#endif
310
311 auto DeviceOrErr = PM->getDevice(DeviceId);
312 if (!DeviceOrErr)
313 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
314
315 TargetAsyncInfoTy TargetAsyncInfo(*DeviceOrErr);
316 AsyncInfoTy &AsyncInfo = TargetAsyncInfo;
317 /// RAII to establish tool anchors before and after target region
318 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
319 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
320 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
321
322 int Rc = OFFLOAD_SUCCESS;
323 Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
324 { // required to show syncronization
325 TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc);
326 if (Rc == OFFLOAD_SUCCESS)
327 Rc = AsyncInfo.synchronize();
328
329 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
330 assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");
331 }
332 return OMP_TGT_SUCCESS;
333}
334
335/// Implements a kernel entry that executes the target region on the specified
336/// device.
337///
338/// \param Loc Source location associated with this target region.
339/// \param DeviceId The device to execute this region, -1 indicated the default.
340/// \param NumTeams Number of teams to launch the region with, -1 indicates a
341/// non-teams region and 0 indicates it was unspecified.
342/// \param ThreadLimit Limit to the number of threads to use in the kernel
343/// launch, 0 indicates it was unspecified.
344/// \param HostPtr The pointer to the host function registered with the kernel.
345/// \param Args All arguments to this kernel launch (see struct definition).
346EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
347 int32_t ThreadLimit, void *HostPtr,
348 KernelArgsTy *KernelArgs) {
349 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
350 if (KernelArgs->Flags.NoWait)
351 return targetKernel<TaskAsyncInfoWrapperTy>(
352 Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs);
353 return targetKernel<AsyncInfoTy>(Loc, DeviceId, NumTeams, ThreadLimit,
354 HostPtr, KernelArgs);
355}
356
357/// Activates the record replay mechanism.
358/// \param DeviceId The device identifier to execute the target region.
359/// \param MemorySize The number of bytes to be (pre-)allocated
360/// by the bump allocator
361/// /param IsRecord Activates the record replay mechanism in
362/// 'record' mode or 'replay' mode.
363/// /param SaveOutput Store the device memory after kernel
364/// execution on persistent storage
365EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
366 void *VAddr, bool IsRecord,
367 bool SaveOutput,
368 uint64_t &ReqPtrArgOffset) {
369 assert(PM && "Runtime not initialized");
370 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
371 auto DeviceOrErr = PM->getDevice(DeviceId);
372 if (!DeviceOrErr)
373 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
374
375 [[maybe_unused]] int Rc = target_activate_rr(
376 *DeviceOrErr, MemorySize, VAddr, IsRecord, SaveOutput, ReqPtrArgOffset);
377 assert(Rc == OFFLOAD_SUCCESS &&
378 "__tgt_activate_record_replay unexpected failure!");
379 return OMP_TGT_SUCCESS;
380}
381
382/// Implements a target kernel entry that replays a pre-recorded kernel.
383/// \param Loc Source location associated with this target region (unused).
384/// \param DeviceId The device identifier to execute the target region.
385/// \param HostPtr A pointer to an address that uniquely identifies the kernel.
386/// \param DeviceMemory A pointer to an array storing device memory data to move
387/// prior to kernel execution.
388/// \param DeviceMemorySize The size of the above device memory data in bytes.
389/// \param TgtArgs An array of pointers of the pre-recorded target kernel
390/// arguments.
391/// \param TgtOffsets An array of pointers of the pre-recorded target kernel
392/// argument offsets.
393/// \param NumArgs The number of kernel arguments.
394/// \param NumTeams Number of teams to launch the target region with.
395/// \param ThreadLimit Limit to the number of threads to use in kernel
396/// execution.
397/// \param LoopTripCount The pre-recorded value of the loop tripcount, if any.
398/// \return OMP_TGT_SUCCESS on success, OMP_TGT_FAIL on failure.
399EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
400 void *HostPtr, void *DeviceMemory,
401 int64_t DeviceMemorySize, void **TgtArgs,
402 ptrdiff_t *TgtOffsets, int32_t NumArgs,
403 int32_t NumTeams, int32_t ThreadLimit,
404 uint64_t LoopTripCount) {
405 assert(PM && "Runtime not initialized");
406 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
407 if (checkDeviceAndCtors(DeviceId, Loc)) {
408 DP("Not offloading to device %" PRId64 "\n", DeviceId);
409 return OMP_TGT_FAIL;
410 }
411 auto DeviceOrErr = PM->getDevice(DeviceId);
412 if (!DeviceOrErr)
413 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
414
415 /// RAII to establish tool anchors before and after target region
416 OMPT_IF_BUILT(InterfaceRAII TargetRAII(
417 RegionInterface.getCallbacks<ompt_target>(), DeviceId,
418 /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
419
420 AsyncInfoTy AsyncInfo(*DeviceOrErr);
421 int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory,
422 DeviceMemorySize, TgtArgs, TgtOffsets, NumArgs,
423 NumTeams, ThreadLimit, LoopTripCount, AsyncInfo);
424 if (Rc == OFFLOAD_SUCCESS)
425 Rc = AsyncInfo.synchronize();
426 handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
427 assert(Rc == OFFLOAD_SUCCESS &&
428 "__tgt_target_kernel_replay unexpected failure!");
429 return OMP_TGT_SUCCESS;
430}
431
432// Get the current number of components for a user-defined mapper.
433EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) {
434 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
435 int64_t Size = MapperComponentsPtr->Components.size();
436 DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n",
437 DPxPTR(RtMapperHandle), Size);
438 return Size;
439}
440
441// Push back one component for a user-defined mapper.
442EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base,
443 void *Begin, int64_t Size, int64_t Type,
444 void *Name) {
445 DP("__tgt_push_mapper_component(Handle=" DPxMOD
446 ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64
447 ", Type=0x%" PRIx64 ", Name=%s).\n",
448 DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type,
449 (Name) ? getNameFromMapping(Name).c_str() : "unknown");
450 auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle;
451 MapperComponentsPtr->Components.push_back(
452 MapComponentInfoTy(Base, Begin, Size, Type, Name));
453}
454
455EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
456 assert(PM && "Runtime not initialized");
457 std::atomic<uint32_t> &InfoLevel = getInfoLevelInternal();
458 InfoLevel.store(NewInfoLevel);
459 for (auto &R : PM->pluginAdaptors())
460 R.set_info_flag(NewInfoLevel);
461}
462
463EXTERN int __tgt_print_device_info(int64_t DeviceId) {
464 assert(PM && "Runtime not initialized");
465 auto DeviceOrErr = PM->getDevice(DeviceId);
466 if (!DeviceOrErr)
467 FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
468
469 return DeviceOrErr->printDeviceInfo();
470}
471
472EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
473 assert(PM && "Runtime not initialized");
474 OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
475
476 if (!AsyncHandle || !*AsyncHandle) {
477 FATAL_MESSAGE0(
478 1, "Receive an invalid async handle from the current OpenMP task. Is "
479 "this a target nowait region?\n");
480 }
481
482 // Exponential backoff tries to optimally decide if a thread should just query
483 // for the device operations (work/spin wait on them) or block until they are
484 // completed (use device side blocking mechanism). This allows the runtime to
485 // adapt itself when there are a lot of long-running target regions in-flight.
486 static thread_local utils::ExponentialBackoff QueryCounter(
487 Int64Envar("OMPTARGET_QUERY_COUNT_MAX", 10),
488 Int64Envar("OMPTARGET_QUERY_COUNT_THRESHOLD", 5),
489 Envar<float>("OMPTARGET_QUERY_COUNT_BACKOFF_FACTOR", 0.5f));
490
491 auto *AsyncInfo = (AsyncInfoTy *)*AsyncHandle;
492
493 // If the thread is actively waiting on too many target nowait regions, we
494 // should use the blocking sync type.
495 if (QueryCounter.isAboveThreshold())
496 AsyncInfo->SyncType = AsyncInfoTy::SyncTy::BLOCKING;
497
498 if (AsyncInfo->synchronize())
499 FATAL_MESSAGE0(1, "Error while querying the async queue for completion.\n");
500 // If there are device operations still pending, return immediately without
501 // deallocating the handle and increase the current thread query count.
502 if (!AsyncInfo->isDone()) {
503 QueryCounter.increment();
504 return;
505 }
506
507 // When a thread successfully completes a target nowait region, we
508 // exponentially backoff its query counter by the query factor.
509 QueryCounter.decrement();
510
511 // Delete the handle and unset it from the OpenMP task data.
512 delete AsyncInfo;
513 *AsyncHandle = nullptr;
514}
515

source code of offload/src/interface.cpp