1//===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the kernel entry points for the device.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Shared/Environment.h"
14
15#include "Allocator.h"
16#include "Debug.h"
17#include "Interface.h"
18#include "Mapping.h"
19#include "State.h"
20#include "Synchronization.h"
21#include "Types.h"
22
23#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
24
25using namespace ompx;
26
27#pragma omp begin declare target device_type(nohost)
28
29static void
30inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
31 KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
32 // Order is important here.
33 synchronize::init(IsSPMD);
34 mapping::init(IsSPMD);
35 state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
36 allocator::init(IsSPMD, KernelEnvironment);
37}
38
39/// Simple generic state machine for worker threads.
40static void genericStateMachine(IdentTy *Ident) {
41 uint32_t TId = mapping::getThreadIdInBlock();
42
43 do {
44 ParallelRegionFnTy WorkFn = nullptr;
45
46 // Wait for the signal that we have a new work function.
47 synchronize::threads(atomic::seq_cst);
48
49 // Retrieve the work function from the runtime.
50 bool IsActive = __kmpc_kernel_parallel(&WorkFn);
51
52 // If there is nothing more to do, break out of the state machine by
53 // returning to the caller.
54 if (!WorkFn)
55 return;
56
57 if (IsActive) {
58 ASSERT(!mapping::isSPMDMode(), nullptr);
59 ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId);
60 __kmpc_kernel_end_parallel();
61 }
62
63 synchronize::threads(atomic::seq_cst);
64
65 } while (true);
66}
67
68extern "C" {
69
70/// Initialization
71///
72/// \param Ident Source location identification, can be NULL.
73///
74int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
75 KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
76 ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
77 bool IsSPMD = Configuration.ExecMode &
78 llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
79 bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
80 if (IsSPMD) {
81 inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
82 KernelLaunchEnvironment);
83 synchronize::threadsAligned(atomic::relaxed);
84 } else {
85 inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
86 KernelLaunchEnvironment);
87 // No need to wait since only the main threads will execute user
88 // code and workers will run into a barrier right away.
89 }
90
91 if (IsSPMD) {
92 state::assumeInitialState(IsSPMD);
93
94 // Synchronize to ensure the assertions above are in an aligned region.
95 // The barrier is eliminated later.
96 synchronize::threadsAligned(atomic::relaxed);
97 return -1;
98 }
99
100 if (mapping::isInitialThreadInLevel0(IsSPMD))
101 return -1;
102
103 // Enter the generic state machine if enabled and if this thread can possibly
104 // be an active worker thread.
105 //
106 // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
107 // GPU. In those cases, a single thread can apparently satisfy a barrier on
108 // behalf of all threads in the same warp. Thus, it would not be safe for
109 // other threads in the main thread's warp to reach the first
110 // synchronize::threads call in genericStateMachine before the main thread
111 // reaches its corresponding synchronize::threads call: that would permit all
112 // active worker threads to proceed before the main thread has actually set
113 // state::ParallelRegionFn, and then they would immediately quit without
114 // doing any work. mapping::getMaxTeamThreads() does not include any of the
115 // main thread's warp, so none of its threads can ever be active worker
116 // threads.
117 if (UseGenericStateMachine &&
118 mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD))
119 genericStateMachine(KernelEnvironment.Ident);
120
121 return mapping::getThreadIdInBlock();
122}
123
124/// De-Initialization
125///
126/// In non-SPMD, this function releases the workers trapped in a state machine
127/// and also any memory dynamically allocated by the runtime.
128///
129/// \param Ident Source location identification, can be NULL.
130///
131void __kmpc_target_deinit() {
132 bool IsSPMD = mapping::isSPMDMode();
133 if (IsSPMD)
134 return;
135
136 if (mapping::isInitialThreadInLevel0(IsSPMD)) {
137 // Signal the workers to exit the state machine and exit the kernel.
138 state::ParallelRegionFn = nullptr;
139 } else if (!state::getKernelEnvironment()
140 .Configuration.UseGenericStateMachine) {
141 // Retrieve the work function just to ensure we always call
142 // __kmpc_kernel_parallel even if a custom state machine is used.
143 // TODO: this is not super pretty. The problem is we create the call to
144 // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
145 // is not there yet. Thus, we assume we never reach it from
146 // __kmpc_target_deinit. That allows us to remove the store in there to
147 // ParallelRegionFn, which leads to bad results later on.
148 ParallelRegionFnTy WorkFn = nullptr;
149 __kmpc_kernel_parallel(&WorkFn);
150 ASSERT(WorkFn == nullptr, nullptr);
151 }
152}
153
154int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
155}
156
157#pragma omp end declare target
158

source code of offload/DeviceRTL/src/Kernel.cpp