Warning: This file is not a C or C++ file. It does not have highlighting.
| 1 | //===-------- State.h - OpenMP State & ICV interface ------------- C++ -*-===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // |
| 10 | //===----------------------------------------------------------------------===// |
| 11 | |
| 12 | #ifndef OMPTARGET_STATE_H |
| 13 | #define OMPTARGET_STATE_H |
| 14 | |
| 15 | #include "Shared/Environment.h" |
| 16 | |
| 17 | #include "Debug.h" |
| 18 | #include "DeviceTypes.h" |
| 19 | #include "DeviceUtils.h" |
| 20 | #include "Mapping.h" |
| 21 | |
| 22 | // Forward declaration. |
| 23 | struct KernelEnvironmentTy; |
| 24 | |
| 25 | namespace ompx { |
| 26 | |
| 27 | namespace memory { |
| 28 | |
| 29 | /// Alloca \p Size bytes in shared memory, if possible, for \p Reason. |
| 30 | /// |
| 31 | /// Note: See the restrictions on __kmpc_alloc_shared for proper usage. |
| 32 | void *allocShared(uint64_t Size, const char *Reason); |
| 33 | |
| 34 | /// Free \p Ptr, allocated via allocShared, for \p Reason. |
| 35 | /// |
| 36 | /// Note: See the restrictions on __kmpc_free_shared for proper usage. |
| 37 | void freeShared(void *Ptr, uint64_t Bytes, const char *Reason); |
| 38 | |
| 39 | /// Alloca \p Size bytes in global memory, if possible, for \p Reason. |
| 40 | void *allocGlobal(uint64_t Size, const char *Reason); |
| 41 | |
| 42 | /// Return a pointer to the dynamic shared memory buffer. |
| 43 | void *getDynamicBuffer(); |
| 44 | |
| 45 | /// Free \p Ptr, allocated via allocGlobal, for \p Reason. |
| 46 | void freeGlobal(void *Ptr, const char *Reason); |
| 47 | |
| 48 | } // namespace memory |
| 49 | |
| 50 | namespace state { |
| 51 | |
| 52 | inline constexpr uint32_t SharedScratchpadSize = SHARED_SCRATCHPAD_SIZE; |
| 53 | |
| 54 | struct ICVStateTy { |
| 55 | uint32_t NThreadsVar; |
| 56 | uint32_t LevelVar; |
| 57 | uint32_t ActiveLevelVar; |
| 58 | uint32_t Padding0Val; |
| 59 | uint32_t MaxActiveLevelsVar; |
| 60 | uint32_t RunSchedVar; |
| 61 | uint32_t RunSchedChunkVar; |
| 62 | |
| 63 | bool operator==(const ICVStateTy &Other) const; |
| 64 | |
| 65 | void assertEqual(const ICVStateTy &Other) const; |
| 66 | }; |
| 67 | |
| 68 | struct TeamStateTy { |
| 69 | void init(bool IsSPMD); |
| 70 | |
| 71 | bool operator==(const TeamStateTy &) const; |
| 72 | |
| 73 | void assertEqual(TeamStateTy &Other) const; |
| 74 | |
| 75 | /// ICVs |
| 76 | /// |
| 77 | /// Preallocated storage for ICV values that are used if the threads have not |
| 78 | /// set a custom default. The latter is supported but unlikely and slow(er). |
| 79 | /// |
| 80 | ///{ |
| 81 | ICVStateTy ICVState; |
| 82 | ///} |
| 83 | |
| 84 | uint32_t ParallelTeamSize; |
| 85 | uint32_t HasThreadState; |
| 86 | ParallelRegionFnTy ParallelRegionFnVar; |
| 87 | }; |
| 88 | |
| 89 | extern Local<TeamStateTy> TeamState; |
| 90 | |
| 91 | struct ThreadStateTy { |
| 92 | |
| 93 | /// ICVs have preallocated storage in the TeamStateTy which is used if a |
| 94 | /// thread has not set a custom value. The latter is supported but unlikely. |
| 95 | /// When it happens we will allocate dynamic memory to hold the values of all |
| 96 | /// ICVs. Thus, the first time an ICV is set by a thread we will allocate an |
| 97 | /// ICV struct to hold them all. This is slower than alternatives but allows |
| 98 | /// users to pay only for what they use. |
| 99 | /// |
| 100 | state::ICVStateTy ICVState; |
| 101 | |
| 102 | ThreadStateTy *PreviousThreadState; |
| 103 | |
| 104 | void init() { |
| 105 | ICVState = TeamState.ICVState; |
| 106 | PreviousThreadState = nullptr; |
| 107 | } |
| 108 | |
| 109 | void init(ThreadStateTy *PreviousTS) { |
| 110 | ICVState = PreviousTS ? PreviousTS->ICVState : TeamState.ICVState; |
| 111 | PreviousThreadState = PreviousTS; |
| 112 | } |
| 113 | }; |
| 114 | |
| 115 | extern Local<ThreadStateTy **> ThreadStates; |
| 116 | |
| 117 | /// Initialize the state machinery. Must be called by all threads. |
| 118 | void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, |
| 119 | KernelLaunchEnvironmentTy &KernelLaunchEnvironment); |
| 120 | |
| 121 | /// Return the kernel and kernel launch environment associated with the current |
| 122 | /// kernel. The former is static and contains compile time information that |
| 123 | /// holds for all instances of the kernel. The latter is dynamic and provides |
| 124 | /// per-launch information. |
| 125 | KernelEnvironmentTy &getKernelEnvironment(); |
| 126 | KernelLaunchEnvironmentTy &getKernelLaunchEnvironment(); |
| 127 | |
| 128 | /// TODO |
| 129 | enum ValueKind { |
| 130 | VK_NThreads, |
| 131 | VK_Level, |
| 132 | VK_ActiveLevel, |
| 133 | VK_MaxActiveLevels, |
| 134 | VK_RunSched, |
| 135 | // --- |
| 136 | VK_RunSchedChunk, |
| 137 | VK_ParallelRegionFn, |
| 138 | VK_ParallelTeamSize, |
| 139 | VK_HasThreadState, |
| 140 | }; |
| 141 | |
| 142 | /// TODO |
| 143 | void enterDataEnvironment(IdentTy *Ident); |
| 144 | |
| 145 | /// TODO |
| 146 | void exitDataEnvironment(); |
| 147 | |
| 148 | /// TODO |
| 149 | struct DateEnvironmentRAII { |
| 150 | DateEnvironmentRAII(IdentTy *Ident) { enterDataEnvironment(Ident); } |
| 151 | ~DateEnvironmentRAII() { exitDataEnvironment(); } |
| 152 | }; |
| 153 | |
| 154 | /// TODO |
| 155 | void resetStateForThread(uint32_t TId); |
| 156 | |
| 157 | // FIXME: https://github.com/llvm/llvm-project/issues/123241. |
| 158 | #define lookupForModify32Impl(Member, Ident, ForceTeamState) \ |
| 159 | { \ |
| 160 | if (OMP_LIKELY(ForceTeamState || !config::mayUseThreadStates() || \ |
| 161 | !TeamState.HasThreadState)) \ |
| 162 | return TeamState.ICVState.Member; \ |
| 163 | uint32_t TId = mapping::getThreadIdInBlock(); \ |
| 164 | if (OMP_UNLIKELY(!ThreadStates[TId])) { \ |
| 165 | ThreadStates[TId] = reinterpret_cast<ThreadStateTy *>( \ |
| 166 | memory::allocGlobal(sizeof(ThreadStateTy), \ |
| 167 | "ICV modification outside data environment")); \ |
| 168 | ASSERT(ThreadStates[TId] != nullptr, "Nullptr returned by malloc!"); \ |
| 169 | TeamState.HasThreadState = true; \ |
| 170 | ThreadStates[TId]->init(); \ |
| 171 | } \ |
| 172 | return ThreadStates[TId]->ICVState.Member; \ |
| 173 | } |
| 174 | |
| 175 | // FIXME: https://github.com/llvm/llvm-project/issues/123241. |
| 176 | #define lookupImpl(Member, ForceTeamState) \ |
| 177 | { \ |
| 178 | auto TId = mapping::getThreadIdInBlock(); \ |
| 179 | if (OMP_UNLIKELY(!ForceTeamState && config::mayUseThreadStates() && \ |
| 180 | TeamState.HasThreadState && ThreadStates[TId])) \ |
| 181 | return ThreadStates[TId]->ICVState.Member; \ |
| 182 | return TeamState.ICVState.Member; \ |
| 183 | } |
| 184 | |
| 185 | [[gnu::always_inline, gnu::flatten]] inline uint32_t & |
| 186 | lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { |
| 187 | switch (Kind) { |
| 188 | case state::VK_NThreads: |
| 189 | if (IsReadonly) |
| 190 | lookupImpl(NThreadsVar, ForceTeamState); |
| 191 | lookupForModify32Impl(NThreadsVar, Ident, ForceTeamState); |
| 192 | case state::VK_Level: |
| 193 | if (IsReadonly) |
| 194 | lookupImpl(LevelVar, ForceTeamState); |
| 195 | lookupForModify32Impl(LevelVar, Ident, ForceTeamState); |
| 196 | case state::VK_ActiveLevel: |
| 197 | if (IsReadonly) |
| 198 | lookupImpl(ActiveLevelVar, ForceTeamState); |
| 199 | lookupForModify32Impl(ActiveLevelVar, Ident, ForceTeamState); |
| 200 | case state::VK_MaxActiveLevels: |
| 201 | if (IsReadonly) |
| 202 | lookupImpl(MaxActiveLevelsVar, ForceTeamState); |
| 203 | lookupForModify32Impl(MaxActiveLevelsVar, Ident, ForceTeamState); |
| 204 | case state::VK_RunSched: |
| 205 | if (IsReadonly) |
| 206 | lookupImpl(RunSchedVar, ForceTeamState); |
| 207 | lookupForModify32Impl(RunSchedVar, Ident, ForceTeamState); |
| 208 | case state::VK_RunSchedChunk: |
| 209 | if (IsReadonly) |
| 210 | lookupImpl(RunSchedChunkVar, ForceTeamState); |
| 211 | lookupForModify32Impl(RunSchedChunkVar, Ident, ForceTeamState); |
| 212 | case state::VK_ParallelTeamSize: |
| 213 | return TeamState.ParallelTeamSize; |
| 214 | case state::VK_HasThreadState: |
| 215 | return TeamState.HasThreadState; |
| 216 | default: |
| 217 | break; |
| 218 | } |
| 219 | __builtin_unreachable(); |
| 220 | } |
| 221 | |
| 222 | [[gnu::always_inline, gnu::flatten]] inline void *& |
| 223 | lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) { |
| 224 | switch (Kind) { |
| 225 | case state::VK_ParallelRegionFn: |
| 226 | return TeamState.ParallelRegionFnVar; |
| 227 | default: |
| 228 | break; |
| 229 | } |
| 230 | __builtin_unreachable(); |
| 231 | } |
| 232 | |
| 233 | /// A class without actual state used to provide a nice interface to lookup and |
| 234 | /// update ICV values we can declare in global scope. |
| 235 | template <typename Ty, ValueKind Kind> struct Value { |
| 236 | [[gnu::flatten, gnu::always_inline]] operator Ty() { |
| 237 | return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, |
| 238 | /*ForceTeamState=*/false); |
| 239 | } |
| 240 | |
| 241 | [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) { |
| 242 | set(Other, /*IdentTy=*/nullptr); |
| 243 | return *this; |
| 244 | } |
| 245 | |
| 246 | [[gnu::flatten, gnu::always_inline]] Value &operator++() { |
| 247 | inc(1, /*IdentTy=*/nullptr); |
| 248 | return *this; |
| 249 | } |
| 250 | |
| 251 | [[gnu::flatten, gnu::always_inline]] Value &operator--() { |
| 252 | inc(-1, /*IdentTy=*/nullptr); |
| 253 | return *this; |
| 254 | } |
| 255 | |
| 256 | [[gnu::flatten, gnu::always_inline]] void |
| 257 | assert_eq(const Ty &V, IdentTy *Ident = nullptr, |
| 258 | bool ForceTeamState = false) { |
| 259 | ASSERT(lookup(/*IsReadonly=*/true, Ident, ForceTeamState) == V, nullptr); |
| 260 | } |
| 261 | |
| 262 | private: |
| 263 | [[gnu::flatten, gnu::always_inline]] Ty & |
| 264 | lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) { |
| 265 | Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState); |
| 266 | return t; |
| 267 | } |
| 268 | |
| 269 | [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) { |
| 270 | return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) += |
| 271 | UpdateVal); |
| 272 | } |
| 273 | |
| 274 | [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) { |
| 275 | return (lookup(/*IsReadonly=*/false, Ident, /*ForceTeamState=*/false) = |
| 276 | UpdateVal); |
| 277 | } |
| 278 | |
| 279 | template <typename VTy, typename Ty2> friend struct ValueRAII; |
| 280 | }; |
| 281 | |
| 282 | /// A mookup class without actual state used to provide |
| 283 | /// a nice interface to lookup and update ICV values |
| 284 | /// we can declare in global scope. |
| 285 | template <typename Ty, ValueKind Kind> struct PtrValue { |
| 286 | [[gnu::flatten, gnu::always_inline]] operator Ty() { |
| 287 | return lookup(/*IsReadonly=*/true, /*IdentTy=*/nullptr, |
| 288 | /*ForceTeamState=*/false); |
| 289 | } |
| 290 | |
| 291 | [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) { |
| 292 | set(Other); |
| 293 | return *this; |
| 294 | } |
| 295 | |
| 296 | private: |
| 297 | Ty &lookup(bool IsReadonly, IdentTy *, bool ForceTeamState) { |
| 298 | return lookupPtr(Kind, IsReadonly, ForceTeamState); |
| 299 | } |
| 300 | |
| 301 | Ty &set(Ty UpdateVal) { |
| 302 | return (lookup(/*IsReadonly=*/false, /*IdentTy=*/nullptr, |
| 303 | /*ForceTeamState=*/false) = UpdateVal); |
| 304 | } |
| 305 | |
| 306 | template <typename VTy, typename Ty2> friend struct ValueRAII; |
| 307 | }; |
| 308 | |
| 309 | template <typename VTy, typename Ty> struct ValueRAII { |
| 310 | ValueRAII(VTy &V, Ty NewValue, Ty OldValue, bool Active, IdentTy *Ident, |
| 311 | bool ForceTeamState = false) |
| 312 | : Ptr(Active ? &V.lookup(/*IsReadonly=*/false, Ident, ForceTeamState) |
| 313 | : (Ty *)utils::UndefPtr), |
| 314 | Val(OldValue), Active(Active) { |
| 315 | if (!Active) |
| 316 | return; |
| 317 | ASSERT(*Ptr == OldValue, "ValueRAII initialization with wrong old value!"); |
| 318 | *Ptr = NewValue; |
| 319 | } |
| 320 | ~ValueRAII() { |
| 321 | if (Active) |
| 322 | *Ptr = Val; |
| 323 | } |
| 324 | |
| 325 | private: |
| 326 | Ty *Ptr; |
| 327 | Ty Val; |
| 328 | bool Active; |
| 329 | }; |
| 330 | |
| 331 | /// TODO |
| 332 | inline state::Value<uint32_t, state::VK_RunSchedChunk> RunSchedChunk; |
| 333 | |
| 334 | /// TODO |
| 335 | inline state::Value<uint32_t, state::VK_ParallelTeamSize> ParallelTeamSize; |
| 336 | |
| 337 | /// TODO |
| 338 | inline state::Value<uint32_t, state::VK_HasThreadState> HasThreadState; |
| 339 | |
| 340 | /// TODO |
| 341 | inline state::PtrValue<ParallelRegionFnTy, state::VK_ParallelRegionFn> |
| 342 | ParallelRegionFn; |
| 343 | |
| 344 | void runAndCheckState(void(Func(void))); |
| 345 | |
| 346 | void assumeInitialState(bool IsSPMD); |
| 347 | |
| 348 | /// Return the value of the ParallelTeamSize ICV. |
| 349 | int getEffectivePTeamSize(); |
| 350 | |
| 351 | } // namespace state |
| 352 | |
| 353 | namespace icv { |
| 354 | |
| 355 | /// TODO |
| 356 | inline state::Value<uint32_t, state::VK_NThreads> NThreads; |
| 357 | |
| 358 | /// TODO |
| 359 | inline state::Value<uint32_t, state::VK_Level> Level; |
| 360 | |
| 361 | /// The `active-level` describes which of the parallel level counted with the |
| 362 | /// `level-var` is active. There can only be one. |
| 363 | /// |
| 364 | /// active-level-var is 1, if ActiveLevelVar is not 0, otherwise it is 0. |
| 365 | inline state::Value<uint32_t, state::VK_ActiveLevel> ActiveLevel; |
| 366 | |
| 367 | /// TODO |
| 368 | inline state::Value<uint32_t, state::VK_MaxActiveLevels> MaxActiveLevels; |
| 369 | |
| 370 | /// TODO |
| 371 | inline state::Value<uint32_t, state::VK_RunSched> RunSched; |
| 372 | |
| 373 | } // namespace icv |
| 374 | |
| 375 | } // namespace ompx |
| 376 | |
| 377 | #endif |
| 378 |
Warning: This file is not a C or C++ file. It does not have highlighting.
