1 | //===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the kernel entry points for the device. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "Shared/Environment.h" |
14 | |
15 | #include "Allocator.h" |
16 | #include "Debug.h" |
17 | #include "Interface.h" |
18 | #include "Mapping.h" |
19 | #include "State.h" |
20 | #include "Synchronization.h" |
21 | #include "Types.h" |
22 | |
23 | #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h" |
24 | |
25 | using namespace ompx; |
26 | |
27 | #pragma omp begin declare target device_type(nohost) |
28 | |
29 | static void |
30 | inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, |
31 | KernelLaunchEnvironmentTy &KernelLaunchEnvironment) { |
32 | // Order is important here. |
33 | synchronize::init(IsSPMD); |
34 | mapping::init(IsSPMD); |
35 | state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment); |
36 | allocator::init(IsSPMD, KernelEnvironment); |
37 | } |
38 | |
39 | /// Simple generic state machine for worker threads. |
40 | static void genericStateMachine(IdentTy *Ident) { |
41 | uint32_t TId = mapping::getThreadIdInBlock(); |
42 | |
43 | do { |
44 | ParallelRegionFnTy WorkFn = nullptr; |
45 | |
46 | // Wait for the signal that we have a new work function. |
47 | synchronize::threads(atomic::seq_cst); |
48 | |
49 | // Retrieve the work function from the runtime. |
50 | bool IsActive = __kmpc_kernel_parallel(&WorkFn); |
51 | |
52 | // If there is nothing more to do, break out of the state machine by |
53 | // returning to the caller. |
54 | if (!WorkFn) |
55 | return; |
56 | |
57 | if (IsActive) { |
58 | ASSERT(!mapping::isSPMDMode(), nullptr); |
59 | ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId); |
60 | __kmpc_kernel_end_parallel(); |
61 | } |
62 | |
63 | synchronize::threads(atomic::seq_cst); |
64 | |
65 | } while (true); |
66 | } |
67 | |
68 | extern "C" { |
69 | |
70 | /// Initialization |
71 | /// |
72 | /// \param Ident Source location identification, can be NULL. |
73 | /// |
74 | int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment, |
75 | KernelLaunchEnvironmentTy &KernelLaunchEnvironment) { |
76 | ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration; |
77 | bool IsSPMD = Configuration.ExecMode & |
78 | llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD; |
79 | bool UseGenericStateMachine = Configuration.UseGenericStateMachine; |
80 | if (IsSPMD) { |
81 | inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment, |
82 | KernelLaunchEnvironment); |
83 | synchronize::threadsAligned(atomic::relaxed); |
84 | } else { |
85 | inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment, |
86 | KernelLaunchEnvironment); |
87 | // No need to wait since only the main threads will execute user |
88 | // code and workers will run into a barrier right away. |
89 | } |
90 | |
91 | if (IsSPMD) { |
92 | state::assumeInitialState(IsSPMD); |
93 | |
94 | // Synchronize to ensure the assertions above are in an aligned region. |
95 | // The barrier is eliminated later. |
96 | synchronize::threadsAligned(atomic::relaxed); |
97 | return -1; |
98 | } |
99 | |
100 | if (mapping::isInitialThreadInLevel0(IsSPMD)) |
101 | return -1; |
102 | |
103 | // Enter the generic state machine if enabled and if this thread can possibly |
104 | // be an active worker thread. |
105 | // |
106 | // The latter check is important for NVIDIA Pascal (but not Volta) and AMD |
107 | // GPU. In those cases, a single thread can apparently satisfy a barrier on |
108 | // behalf of all threads in the same warp. Thus, it would not be safe for |
109 | // other threads in the main thread's warp to reach the first |
110 | // synchronize::threads call in genericStateMachine before the main thread |
111 | // reaches its corresponding synchronize::threads call: that would permit all |
112 | // active worker threads to proceed before the main thread has actually set |
113 | // state::ParallelRegionFn, and then they would immediately quit without |
114 | // doing any work. mapping::getMaxTeamThreads() does not include any of the |
115 | // main thread's warp, so none of its threads can ever be active worker |
116 | // threads. |
117 | if (UseGenericStateMachine && |
118 | mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD)) |
119 | genericStateMachine(KernelEnvironment.Ident); |
120 | |
121 | return mapping::getThreadIdInBlock(); |
122 | } |
123 | |
124 | /// De-Initialization |
125 | /// |
126 | /// In non-SPMD, this function releases the workers trapped in a state machine |
127 | /// and also any memory dynamically allocated by the runtime. |
128 | /// |
129 | /// \param Ident Source location identification, can be NULL. |
130 | /// |
131 | void __kmpc_target_deinit() { |
132 | bool IsSPMD = mapping::isSPMDMode(); |
133 | if (IsSPMD) |
134 | return; |
135 | |
136 | if (mapping::isInitialThreadInLevel0(IsSPMD)) { |
137 | // Signal the workers to exit the state machine and exit the kernel. |
138 | state::ParallelRegionFn = nullptr; |
139 | } else if (!state::getKernelEnvironment() |
140 | .Configuration.UseGenericStateMachine) { |
141 | // Retrieve the work function just to ensure we always call |
142 | // __kmpc_kernel_parallel even if a custom state machine is used. |
143 | // TODO: this is not super pretty. The problem is we create the call to |
144 | // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it |
145 | // is not there yet. Thus, we assume we never reach it from |
146 | // __kmpc_target_deinit. That allows us to remove the store in there to |
147 | // ParallelRegionFn, which leads to bad results later on. |
148 | ParallelRegionFnTy WorkFn = nullptr; |
149 | __kmpc_kernel_parallel(&WorkFn); |
150 | ASSERT(WorkFn == nullptr, nullptr); |
151 | } |
152 | } |
153 | |
154 | int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); } |
155 | } |
156 | |
157 | #pragma omp end declare target |
158 | |