Warning: This file is not a C or C++ file. It does not have highlighting.

1//===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#ifndef OMPTARGET_STATE_H
13#define OMPTARGET_STATE_H
14
15#include "Shared/Environment.h"
16
17#include "Debug.h"
18#include "DeviceTypes.h"
19#include "DeviceUtils.h"
20#include "Mapping.h"
21
22// Forward declaration.
23struct KernelEnvironmentTy;
24
25namespace ompx {
26
27namespace memory {
28
29/// Alloca \p Size bytes in shared memory, if possible, for \p Reason.
30///
31/// Note: See the restrictions on __kmpc_alloc_shared for proper usage.
32void *allocShared(uint64_t Size, const char *Reason);
33
34/// Free \p Ptr, allocated via allocShared, for \p Reason.
35///
36/// Note: See the restrictions on __kmpc_free_shared for proper usage.
37void freeShared(void *Ptr, uint64_t Bytes, const char *Reason);
38
39/// Alloca \p Size bytes in global memory, if possible, for \p Reason.
40void *allocGlobal(uint64_t Size, const char *Reason);
41
42/// Return a pointer to the dynamic shared memory buffer.
43void *getDynamicBuffer();
44
45/// Free \p Ptr, allocated via allocGlobal, for \p Reason.
46void freeGlobal(void *Ptr, const char *Reason);
47
48} // namespace memory
49
50namespace state {
51
52inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE;
53
54struct ICVStateTy {
55 uint32_t NThreadsVar;
56 uint32_t LevelVar;
57 uint32_t ActiveLevelVar;
58 uint32_t Padding0Val;
59 uint32_t MaxActiveLevelsVar;
60 uint32_t RunSchedVar;
61 uint32_t RunSchedChunkVar;
62
63 bool operator==(const ICVStateTy &Other) const;
64
65 void assertEqual(const ICVStateTy &Other) const;
66};
67
68struct TeamStateTy {
69 void init(bool IsSPMD);
70
71 bool operator==(const TeamStateTy &) const;
72
73 void assertEqual(TeamStateTy &Other) const;
74
75 /// ICVs
76 ///
77 /// Preallocated storage for ICV values that are used if the threads have not
78 /// set a custom default. The latter is supported but unlikely and slow(er).
79 ///
80 ///{
81 ICVStateTy ICVState;
82 ///}
83
84 uint32_t ParallelTeamSize;
85 uint32_t HasThreadState;
86 ParallelRegionFnTy ParallelRegionFnVar;
87};
88
89extern Local<TeamStateTy> TeamState;
90
91struct ThreadStateTy {
92
93 /// ICVs have preallocated storage in the TeamStateTy which is used if a
94 /// thread has not set a custom value. The latter is supported but unlikely.
95 /// When it happens we will allocate dynamic memory to hold the values of all
96 /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an
97 /// ICV struct to hold them all. This is slower than alternatives but allows
98 /// users to pay only for what they use.
99 ///
100 state::ICVStateTy ICVState;
101
102 ThreadStateTy *PreviousThreadState;
103
104 void init() {
105 ICVState = TeamState.ICVState;
106 PreviousThreadState = nullptr;
107 }
108
109 void init(ThreadStateTy *PreviousTS) {
110 ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState;
111 PreviousThreadState = PreviousTS;
112 }
113};
114
115extern Local<ThreadStateTy **> ThreadStates;
116
117/// Initialize the state machinery. Must be called by all threads.
118void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
119 KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
120
121/// Return the kernel and kernel launch environment associated with the current
122/// kernel. The former is static and contains compile time information that
123/// holds for all instances of the kernel. The latter is dynamic and provides
124/// per-launch information.
125KernelEnvironmentTy &getKernelEnvironment();
126KernelLaunchEnvironmentTy &getKernelLaunchEnvironment();
127
128/// TODO
129enum ValueKind {
130 VK_NThreads,
131 VK_Level,
132 VK_ActiveLevel,
133 VK_MaxActiveLevels,
134 VK_RunSched,
135 // ---
136 VK_RunSchedChunk,
137 VK_ParallelRegionFn,
138 VK_ParallelTeamSize,
139 VK_HasThreadState,
140};
141
142/// TODO
143void enterDataEnvironment(IdentTy *Ident);
144
145/// TODO
146void exitDataEnvironment();
147
148/// TODO
149struct DateEnvironmentRAII {
150 DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); }
151 ~DateEnvironmentRAII() { exitDataEnvironment(); }
152};
153
154/// TODO
155void resetStateForThread(uint32_t TId);
156
157// FIXME: https://github.com/llvm/llvm-project/issues/123241.
158#define lookupForModify32Impl(Member, Ident, ForceTeamState) \
159 { \
160 if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \
161 !TeamState.HasThreadState)) \
162 return TeamState.ICVState.Member; \
163 uint32_t TId = mapping::getThreadIdInBlock(); \
164 if (OMP_UNLIKELY(!ThreadStates[TId])) { \
165 ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \
166 memory::allocGlobal(sizeof(ThreadStateTy), \
167 "ICV modification outside data environment")); \
168 ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!"); \
169 TeamState.HasThreadState = true; \
170 ThreadStates[TId]->init(); \
171 } \
172 return ThreadStates[TId]->ICVState.Member; \
173 }
174
175// FIXME: https://github.com/llvm/llvm-project/issues/123241.
176#define lookupImpl(Member, ForceTeamState) \
177 { \
178 auto TId = mapping::getThreadIdInBlock(); \
179 if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \
180 TeamState.HasThreadState && ThreadStates[TId])) \
181 return ThreadStates[TId]->ICVState.Member; \
182 return TeamState.ICVState.Member; \
183 }
184
185[[gnu::always_inline, gnu::flatten]] inline uint32_t &
186lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
187 switch (Kind) {
188 case state::VK_NThreads:
189 if (IsReadonly)
190 lookupImpl(NThreadsVar, ForceTeamState);
191 lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState);
192 case state::VK_Level:
193 if (IsReadonly)
194 lookupImpl(LevelVar, ForceTeamState);
195 lookupForModify32Impl(LevelVar, Ident, ForceTeamState);
196 case state::VK_ActiveLevel:
197 if (IsReadonly)
198 lookupImpl(ActiveLevelVar, ForceTeamState);
199 lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState);
200 case state::VK_MaxActiveLevels:
201 if (IsReadonly)
202 lookupImpl(MaxActiveLevelsVar, ForceTeamState);
203 lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState);
204 case state::VK_RunSched:
205 if (IsReadonly)
206 lookupImpl(RunSchedVar, ForceTeamState);
207 lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState);
208 case state::VK_RunSchedChunk:
209 if (IsReadonly)
210 lookupImpl(RunSchedChunkVar, ForceTeamState);
211 lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState);
212 case state::VK_ParallelTeamSize:
213 return TeamState.ParallelTeamSize;
214 case state::VK_HasThreadState:
215 return TeamState.HasThreadState;
216 default:
217 break;
218 }
219 __builtin_unreachable();
220}
221
222[[gnu::always_inline, gnu::flatten]] inline void *&
223lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
224 switch (Kind) {
225 case state::VK_ParallelRegionFn:
226 return TeamState.ParallelRegionFnVar;
227 default:
228 break;
229 }
230 __builtin_unreachable();
231}
232
233/// A class without actual state used to provide a nice interface to lookup and
234/// update ICV values we can declare in global scope.
235template <typename Ty, ValueKind Kind> struct Value {
236 [[gnu::flatten, gnu::always_inline]] operator Ty() {
237 return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr,
238 /*ForceTeamState=*/false);
239 }
240
241 [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) {
242 set(Other, /*IdentTy=*/nullptr);
243 return *this;
244 }
245
246 [[gnu::flatten, gnu::always_inline]] Value &operator++() {
247 inc(1, /*IdentTy=*/nullptr);
248 return *this;
249 }
250
251 [[gnu::flatten, gnu::always_inline]] Value &operator--() {
252 inc(-1, /*IdentTy=*/nullptr);
253 return *this;
254 }
255
256 [[gnu::flatten, gnu::always_inline]] void
257 assert_eq(const Ty &V, IdentTy *Ident = nullptr,
258 bool ForceTeamState = false) {
259 ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr);
260 }
261
262private:
263 [[gnu::flatten, gnu::always_inline]] Ty &
264 lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
265 Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
266 return t;
267 }
268
269 [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) {
270 return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) +=
271 UpdateVal);
272 }
273
274 [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) {
275 return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) =
276 UpdateVal);
277 }
278
279 template <typename VTy, typename Ty2> friend struct ValueRAII;
280};
281
282/// A mookup class without actual state used to provide
283/// a nice interface to lookup and update ICV values
284/// we can declare in global scope.
285template <typename Ty, ValueKind Kind> struct PtrValue {
286 [[gnu::flatten, gnu::always_inline]] operator Ty() {
287 return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr,
288 /*ForceTeamState=*/false);
289 }
290
291 [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) {
292 set(Other);
293 return *this;
294 }
295
296private:
297 Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) {
298 return lookupPtr(Kind, IsReadonly, ForceTeamState);
299 }
300
301 Ty &set(Ty UpdateVal) {
302 return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr,
303 /*ForceTeamState=*/false) = UpdateVal);
304 }
305
306 template <typename VTy, typename Ty2> friend struct ValueRAII;
307};
308
309template <typename VTy, typename Ty> struct ValueRAII {
310 ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident,
311 bool ForceTeamState = false)
312 : Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState)
313 : (Ty *)utils::UndefPtr),
314 Val(OldValue), Active(Active) {
315 if (!Active)
316 return;
317 ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!");
318 *Ptr = NewValue;
319 }
320 ~ValueRAII() {
321 if (Active)
322 *Ptr = Val;
323 }
324
325private:
326 Ty *Ptr;
327 Ty Val;
328 bool Active;
329};
330
331/// TODO
332inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk;
333
334/// TODO
335inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize;
336
337/// TODO
338inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState;
339
340/// TODO
341inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn>
342 ParallelRegionFn;
343
344void runAndCheckState(void(Func(void)));
345
346void assumeInitialState(bool IsSPMD);
347
348/// Return the value of the ParallelTeamSize ICV.
349int getEffectivePTeamSize();
350
351} // namespace state
352
353namespace icv {
354
355/// TODO
356inline state::Value<uint32_t, state::VK_NThreads> NThreads;
357
358/// TODO
359inline state::Value<uint32_t, state::VK_Level> Level;
360
361/// The `active-level` describes which of the parallel level counted with the
362/// `level-var` is active. There can only be one.
363///
364/// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0.
365inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel;
366
367/// TODO
368inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels;
369
370/// TODO
371inline state::Value<uint32_t, state::VK_RunSched> RunSched;
372
373} // namespace icv
374
375} // namespace ompx
376
377#endif
378

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of offload/DeviceRTL/include/State.h