Warning: This file is not a C or C++ file. It does not have highlighting.
1 | //===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // |
10 | //===----------------------------------------------------------------------===// |
11 | |
12 | #ifndef OMPTARGET_STATE_H |
13 | #define OMPTARGET_STATE_H |
14 | |
15 | #include "Shared/Environment.h" |
16 | |
17 | #include "Debug.h" |
18 | #include "DeviceTypes.h" |
19 | #include "DeviceUtils.h" |
20 | #include "Mapping.h" |
21 | |
22 | // Forward declaration. |
23 | struct KernelEnvironmentTy; |
24 | |
25 | namespace ompx { |
26 | |
27 | namespace memory { |
28 | |
29 | /// Alloca \p Size bytes in shared memory, if possible, for \p Reason. |
30 | /// |
31 | /// Note: See the restrictions on __kmpc_alloc_shared for proper usage. |
32 | void *allocShared(uint64_t Size, const char *Reason); |
33 | |
34 | /// Free \p Ptr, allocated via allocShared, for \p Reason. |
35 | /// |
36 | /// Note: See the restrictions on __kmpc_free_shared for proper usage. |
37 | void freeShared(void *Ptr, uint64_t Bytes, const char *Reason); |
38 | |
39 | /// Alloca \p Size bytes in global memory, if possible, for \p Reason. |
40 | void *allocGlobal(uint64_t Size, const char *Reason); |
41 | |
42 | /// Return a pointer to the dynamic shared memory buffer. |
43 | void *getDynamicBuffer(); |
44 | |
45 | /// Free \p Ptr, allocated via allocGlobal, for \p Reason. |
46 | void freeGlobal(void *Ptr, const char *Reason); |
47 | |
48 | } // namespace memory |
49 | |
50 | namespace state { |
51 | |
52 | inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; |
53 | |
54 | struct ICVStateTy { |
55 | uint32_t NThreadsVar; |
56 | uint32_t LevelVar; |
57 | uint32_t ActiveLevelVar; |
58 | uint32_t Padding0Val; |
59 | uint32_t MaxActiveLevelsVar; |
60 | uint32_t RunSchedVar; |
61 | uint32_t RunSchedChunkVar; |
62 | |
63 | bool operator==(const ICVStateTy &Other) const; |
64 | |
65 | void assertEqual(const ICVStateTy &Other) const; |
66 | }; |
67 | |
68 | struct TeamStateTy { |
69 | void init(bool IsSPMD); |
70 | |
71 | bool operator==(const TeamStateTy &) const; |
72 | |
73 | void assertEqual(TeamStateTy &Other) const; |
74 | |
75 | /// ICVs |
76 | /// |
77 | /// Preallocated storage for ICV values that are used if the threads have not |
78 | /// set a custom default. The latter is supported but unlikely and slow(er). |
79 | /// |
80 | ///{ |
81 | ICVStateTy ICVState; |
82 | ///} |
83 | |
84 | uint32_t ParallelTeamSize; |
85 | uint32_t HasThreadState; |
86 | ParallelRegionFnTy ParallelRegionFnVar; |
87 | }; |
88 | |
89 | extern Local<TeamStateTy> TeamState; |
90 | |
91 | struct ThreadStateTy { |
92 | |
93 | /// ICVs have preallocated storage in the TeamStateTy which is used if a |
94 | /// thread has not set a custom value. The latter is supported but unlikely. |
95 | /// When it happens we will allocate dynamic memory to hold the values of all |
96 | /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an |
97 | /// ICV struct to hold them all. This is slower than alternatives but allows |
98 | /// users to pay only for what they use. |
99 | /// |
100 | state::ICVStateTy ICVState; |
101 | |
102 | ThreadStateTy *PreviousThreadState; |
103 | |
104 | void init() { |
105 | ICVState = TeamState.ICVState; |
106 | PreviousThreadState = nullptr; |
107 | } |
108 | |
109 | void init(ThreadStateTy *PreviousTS) { |
110 | ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState; |
111 | PreviousThreadState = PreviousTS; |
112 | } |
113 | }; |
114 | |
115 | extern Local<ThreadStateTy **> ThreadStates; |
116 | |
117 | /// Initialize the state machinery. Must be called by all threads. |
118 | void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, |
119 | KernelLaunchEnvironmentTy &KernelLaunchEnvironment); |
120 | |
121 | /// Return the kernel and kernel launch environment associated with the current |
122 | /// kernel. The former is static and contains compile time information that |
123 | /// holds for all instances of the kernel. The latter is dynamic and provides |
124 | /// per-launch information. |
125 | KernelEnvironmentTy &getKernelEnvironment(); |
126 | KernelLaunchEnvironmentTy &getKernelLaunchEnvironment(); |
127 | |
128 | /// TODO |
129 | enum ValueKind { |
130 | VK_NThreads, |
131 | VK_Level, |
132 | VK_ActiveLevel, |
133 | VK_MaxActiveLevels, |
134 | VK_RunSched, |
135 | // --- |
136 | VK_RunSchedChunk, |
137 | VK_ParallelRegionFn, |
138 | VK_ParallelTeamSize, |
139 | VK_HasThreadState, |
140 | }; |
141 | |
142 | /// TODO |
143 | void enterDataEnvironment(IdentTy *Ident); |
144 | |
145 | /// TODO |
146 | void exitDataEnvironment(); |
147 | |
148 | /// TODO |
149 | struct DateEnvironmentRAII { |
150 | DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } |
151 | ~DateEnvironmentRAII() { exitDataEnvironment(); } |
152 | }; |
153 | |
154 | /// TODO |
155 | void resetStateForThread(uint32_t TId); |
156 | |
157 | // FIXME: https://github.com/llvm/llvm-project/issues/123241. |
158 | #define lookupForModify32Impl(Member, Ident, ForceTeamState) \ |
159 | { \ |
160 | if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \ |
161 | !TeamState.HasThreadState)) \ |
162 | return TeamState.ICVState.Member; \ |
163 | uint32_t TId = mapping::getThreadIdInBlock(); \ |
164 | if (OMP_UNLIKELY(!ThreadStates[TId])) { \ |
165 | ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \ |
166 | memory::allocGlobal(sizeof(ThreadStateTy), \ |
167 | "ICV modification outside data environment")); \ |
168 | ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!"); \ |
169 | TeamState.HasThreadState = true; \ |
170 | ThreadStates[TId]->init(); \ |
171 | } \ |
172 | return ThreadStates[TId]->ICVState.Member; \ |
173 | } |
174 | |
175 | // FIXME: https://github.com/llvm/llvm-project/issues/123241. |
176 | #define lookupImpl(Member, ForceTeamState) \ |
177 | { \ |
178 | auto TId = mapping::getThreadIdInBlock(); \ |
179 | if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \ |
180 | TeamState.HasThreadState && ThreadStates[TId])) \ |
181 | return ThreadStates[TId]->ICVState.Member; \ |
182 | return TeamState.ICVState.Member; \ |
183 | } |
184 | |
185 | [[gnu::always_inline, gnu::flatten]] inline uint32_t & |
186 | lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { |
187 | switch (Kind) { |
188 | case state::VK_NThreads: |
189 | if (IsReadonly) |
190 | lookupImpl(NThreadsVar, ForceTeamState); |
191 | lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState); |
192 | case state::VK_Level: |
193 | if (IsReadonly) |
194 | lookupImpl(LevelVar, ForceTeamState); |
195 | lookupForModify32Impl(LevelVar, Ident, ForceTeamState); |
196 | case state::VK_ActiveLevel: |
197 | if (IsReadonly) |
198 | lookupImpl(ActiveLevelVar, ForceTeamState); |
199 | lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState); |
200 | case state::VK_MaxActiveLevels: |
201 | if (IsReadonly) |
202 | lookupImpl(MaxActiveLevelsVar, ForceTeamState); |
203 | lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState); |
204 | case state::VK_RunSched: |
205 | if (IsReadonly) |
206 | lookupImpl(RunSchedVar, ForceTeamState); |
207 | lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState); |
208 | case state::VK_RunSchedChunk: |
209 | if (IsReadonly) |
210 | lookupImpl(RunSchedChunkVar, ForceTeamState); |
211 | lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState); |
212 | case state::VK_ParallelTeamSize: |
213 | return TeamState.ParallelTeamSize; |
214 | case state::VK_HasThreadState: |
215 | return TeamState.HasThreadState; |
216 | default: |
217 | break; |
218 | } |
219 | __builtin_unreachable(); |
220 | } |
221 | |
222 | [[gnu::always_inline, gnu::flatten]] inline void *& |
223 | lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) { |
224 | switch (Kind) { |
225 | case state::VK_ParallelRegionFn: |
226 | return TeamState.ParallelRegionFnVar; |
227 | default: |
228 | break; |
229 | } |
230 | __builtin_unreachable(); |
231 | } |
232 | |
233 | /// A class without actual state used to provide a nice interface to lookup and |
234 | /// update ICV values we can declare in global scope. |
235 | template <typename Ty, ValueKind Kind> struct Value { |
236 | [[gnu::flatten, gnu::always_inline]] operator Ty() { |
237 | return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, |
238 | /*ForceTeamState=*/false); |
239 | } |
240 | |
241 | [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) { |
242 | set(Other, /*IdentTy=*/nullptr); |
243 | return *this; |
244 | } |
245 | |
246 | [[gnu::flatten, gnu::always_inline]] Value &operator++() { |
247 | inc(1, /*IdentTy=*/nullptr); |
248 | return *this; |
249 | } |
250 | |
251 | [[gnu::flatten, gnu::always_inline]] Value &operator--() { |
252 | inc(-1, /*IdentTy=*/nullptr); |
253 | return *this; |
254 | } |
255 | |
256 | [[gnu::flatten, gnu::always_inline]] void |
257 | assert_eq(const Ty &V, IdentTy *Ident = nullptr, |
258 | bool ForceTeamState = false) { |
259 | ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr); |
260 | } |
261 | |
262 | private: |
263 | [[gnu::flatten, gnu::always_inline]] Ty & |
264 | lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { |
265 | Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState); |
266 | return t; |
267 | } |
268 | |
269 | [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) { |
270 | return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) += |
271 | UpdateVal); |
272 | } |
273 | |
274 | [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) { |
275 | return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) = |
276 | UpdateVal); |
277 | } |
278 | |
279 | template <typename VTy, typename Ty2> friend struct ValueRAII; |
280 | }; |
281 | |
282 | /// A mookup class without actual state used to provide |
283 | /// a nice interface to lookup and update ICV values |
284 | /// we can declare in global scope. |
285 | template <typename Ty, ValueKind Kind> struct PtrValue { |
286 | [[gnu::flatten, gnu::always_inline]] operator Ty() { |
287 | return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, |
288 | /*ForceTeamState=*/false); |
289 | } |
290 | |
291 | [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) { |
292 | set(Other); |
293 | return *this; |
294 | } |
295 | |
296 | private: |
297 | Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) { |
298 | return lookupPtr(Kind, IsReadonly, ForceTeamState); |
299 | } |
300 | |
301 | Ty &set(Ty UpdateVal) { |
302 | return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr, |
303 | /*ForceTeamState=*/false) = UpdateVal); |
304 | } |
305 | |
306 | template <typename VTy, typename Ty2> friend struct ValueRAII; |
307 | }; |
308 | |
309 | template <typename VTy, typename Ty> struct ValueRAII { |
310 | ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident, |
311 | bool ForceTeamState = false) |
312 | : Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState) |
313 | : (Ty *)utils::UndefPtr), |
314 | Val(OldValue), Active(Active) { |
315 | if (!Active) |
316 | return; |
317 | ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!"); |
318 | *Ptr = NewValue; |
319 | } |
320 | ~ValueRAII() { |
321 | if (Active) |
322 | *Ptr = Val; |
323 | } |
324 | |
325 | private: |
326 | Ty *Ptr; |
327 | Ty Val; |
328 | bool Active; |
329 | }; |
330 | |
331 | /// TODO |
332 | inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk; |
333 | |
334 | /// TODO |
335 | inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize; |
336 | |
337 | /// TODO |
338 | inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState; |
339 | |
340 | /// TODO |
341 | inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn> |
342 | ParallelRegionFn; |
343 | |
344 | void runAndCheckState(void(Func(void))); |
345 | |
346 | void assumeInitialState(bool IsSPMD); |
347 | |
348 | /// Return the value of the ParallelTeamSize ICV. |
349 | int getEffectivePTeamSize(); |
350 | |
351 | } // namespace state |
352 | |
353 | namespace icv { |
354 | |
355 | /// TODO |
356 | inline state::Value<uint32_t, state::VK_NThreads> NThreads; |
357 | |
358 | /// TODO |
359 | inline state::Value<uint32_t, state::VK_Level> Level; |
360 | |
361 | /// The `active-level` describes which of the parallel level counted with the |
362 | /// `level-var` is active. There can only be one. |
363 | /// |
364 | /// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0. |
365 | inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel; |
366 | |
367 | /// TODO |
368 | inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels; |
369 | |
370 | /// TODO |
371 | inline state::Value<uint32_t, state::VK_RunSched> RunSched; |
372 | |
373 | } // namespace icv |
374 | |
375 | } // namespace ompx |
376 | |
377 | #endif |
378 |
Warning: This file is not a C or C++ file. It does not have highlighting.