1//===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the kernel entry points for the device.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Shared/Environment.h"
14
15#include "Allocator.h"
16#include "Debug.h"
17#include "DeviceTypes.h"
18#include "Interface.h"
19#include "Mapping.h"
20#include "State.h"
21#include "Synchronization.h"
22#include "Workshare.h"
23
24using namespace ompx;
25
26// These flags are copied from "llvm/Frontend/OpenMP/OMPDeviceConstants.h" and
27// must be kept in-sync.
28enum OMPTgtExecModeFlags : unsigned char {
29 OMP_TGT_EXEC_MODE_BARE = 0,
30 OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
31 OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
32 OMP_TGT_EXEC_MODE_GENERIC_SPMD =
33 OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
34};
35
36static void
37inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
38 KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
39 // Order is important here.
40 synchronize::init(IsSPMD);
41 mapping::init(IsSPMD);
42 state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
43 allocator::init(IsSPMD, KernelEnvironment);
44 workshare::init(IsSPMD);
45}
46
47/// Simple generic state machine for worker threads.
48static void genericStateMachine(IdentTy *Ident) {
49 uint32_t TId = mapping::getThreadIdInBlock();
50
51 do {
52 ParallelRegionFnTy WorkFn = nullptr;
53
54 // Wait for the signal that we have a new work function.
55 synchronize::threads(atomic::seq_cst);
56
57 // Retrieve the work function from the runtime.
58 bool IsActive = __kmpc_kernel_parallel(&WorkFn);
59
60 // If there is nothing more to do, break out of the state machine by
61 // returning to the caller.
62 if (!WorkFn)
63 return;
64
65 if (IsActive) {
66 ASSERT(!mapping::isSPMDMode(), nullptr);
67 ((void (*)(uint32_t, uint32_t))WorkFn)(0, TId);
68 __kmpc_kernel_end_parallel();
69 }
70
71 synchronize::threads(atomic::seq_cst);
72
73 } while (true);
74}
75
76extern "C" {
77
78/// Initialization
79///
80/// \param Ident Source location identification, can be NULL.
81///
82int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
83 KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
84 ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
85 bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
86 bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
87 if (IsSPMD) {
88 inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
89 KernelLaunchEnvironment);
90 synchronize::threadsAligned(atomic::relaxed);
91 } else {
92 inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
93 KernelLaunchEnvironment);
94 // No need to wait since only the main threads will execute user
95 // code and workers will run into a barrier right away.
96 }
97
98 if (IsSPMD) {
99 state::assumeInitialState(IsSPMD);
100
101 // Synchronize to ensure the assertions above are in an aligned region.
102 // The barrier is eliminated later.
103 synchronize::threadsAligned(atomic::relaxed);
104 return -1;
105 }
106
107 if (mapping::isInitialThreadInLevel0(IsSPMD))
108 return -1;
109
110 // Enter the generic state machine if enabled and if this thread can possibly
111 // be an active worker thread.
112 //
113 // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
114 // GPU. In those cases, a single thread can apparently satisfy a barrier on
115 // behalf of all threads in the same warp. Thus, it would not be safe for
116 // other threads in the main thread's warp to reach the first
117 // synchronize::threads call in genericStateMachine before the main thread
118 // reaches its corresponding synchronize::threads call: that would permit all
119 // active worker threads to proceed before the main thread has actually set
120 // state::ParallelRegionFn, and then they would immediately quit without
121 // doing any work. mapping::getMaxTeamThreads() does not include any of the
122 // main thread's warp, so none of its threads can ever be active worker
123 // threads.
124 if (UseGenericStateMachine &&
125 mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD))
126 genericStateMachine(KernelEnvironment.Ident);
127
128 return mapping::getThreadIdInBlock();
129}
130
131/// De-Initialization
132///
133/// In non-SPMD, this function releases the workers trapped in a state machine
134/// and also any memory dynamically allocated by the runtime.
135///
136/// \param Ident Source location identification, can be NULL.
137///
138void __kmpc_target_deinit() {
139 bool IsSPMD = mapping::isSPMDMode();
140 if (IsSPMD)
141 return;
142
143 if (mapping::isInitialThreadInLevel0(IsSPMD)) {
144 // Signal the workers to exit the state machine and exit the kernel.
145 state::ParallelRegionFn = nullptr;
146 } else if (!state::getKernelEnvironment()
147 .Configuration.UseGenericStateMachine) {
148 // Retrieve the work function just to ensure we always call
149 // __kmpc_kernel_parallel even if a custom state machine is used.
150 // TODO: this is not super pretty. The problem is we create the call to
151 // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
152 // is not there yet. Thus, we assume we never reach it from
153 // __kmpc_target_deinit. That allows us to remove the store in there to
154 // ParallelRegionFn, which leads to bad results later on.
155 ParallelRegionFnTy WorkFn = nullptr;
156 __kmpc_kernel_parallel(&WorkFn);
157 ASSERT(WorkFn == nullptr, nullptr);
158 }
159}
160
161int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
162}
163

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of offload/DeviceRTL/src/Kernel.cpp