1 | //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// \brief Provides definitions for Target specific Grid Values |
11 | /// |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
15 | #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
16 | |
17 | namespace llvm { |
18 | |
19 | namespace omp { |
20 | |
21 | /// \brief Defines various target-specific GPU grid values that must be |
22 | /// consistent between host RTL (plugin), device RTL, and clang. |
23 | /// We can change grid values for a "fat" binary so that different |
24 | /// passes get the correct values when generating code for a |
25 | /// multi-target binary. Both amdgcn and nvptx values are stored in |
26 | /// this file. In the future, should there be differences between GPUs |
27 | /// of the same architecture, then simply make a different array and |
28 | /// use the new array name. |
29 | /// |
30 | /// Example usage in clang: |
31 | /// const unsigned slot_size = |
32 | /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; |
33 | /// |
34 | /// Example usage in libomptarget/deviceRTLs: |
35 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
36 | /// #ifdef __AMDGPU__ |
37 | /// #define GRIDVAL AMDGPUGridValues |
38 | /// #else |
39 | /// #define GRIDVAL NVPTXGridValues |
40 | /// #endif |
41 | /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. |
42 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
43 | /// |
44 | /// Example usage in libomptarget hsa plugin: |
45 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
46 | /// #define GRIDVAL AMDGPUGridValues |
47 | /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. |
48 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
49 | /// |
50 | /// Example usage in libomptarget cuda plugin: |
51 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
52 | /// #define GRIDVAL NVPTXGridValues |
53 | /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. |
54 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
55 | /// |
56 | |
57 | struct GV { |
58 | /// The size reserved for data in a shared memory slot. |
59 | unsigned GV_Slot_Size; |
60 | /// The default value of maximum number of threads in a worker warp. |
61 | unsigned GV_Warp_Size; |
62 | |
63 | constexpr unsigned warpSlotSize() const { |
64 | return GV_Warp_Size * GV_Slot_Size; |
65 | } |
66 | |
67 | /// the maximum number of teams. |
68 | unsigned GV_Max_Teams; |
69 | // The default number of teams in the absence of any other information. |
70 | unsigned GV_Default_Num_Teams; |
71 | |
72 | // An alternative to the heavy data sharing infrastructure that uses global |
73 | // memory is one that uses device __shared__ memory. The amount of such space |
74 | // (in bytes) reserved by the OpenMP runtime is noted here. |
75 | unsigned GV_SimpleBufferSize; |
76 | // The absolute maximum team size for a working group |
77 | unsigned GV_Max_WG_Size; |
78 | // The default maximum team size for a working group |
79 | unsigned GV_Default_WG_Size; |
80 | |
81 | constexpr unsigned maxWarpNumber() const { |
82 | return GV_Max_WG_Size / GV_Warp_Size; |
83 | } |
84 | }; |
85 | |
86 | /// For AMDGPU GPUs |
87 | static constexpr GV AMDGPUGridValues64 = { |
88 | .GV_Slot_Size: 256, // GV_Slot_Size |
89 | .GV_Warp_Size: 64, // GV_Warp_Size |
90 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
91 | .GV_Default_Num_Teams: 440, // GV_Default_Num_Teams |
92 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
93 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size, |
94 | .GV_Default_WG_Size: 256, // GV_Default_WG_Size |
95 | }; |
96 | |
97 | static constexpr GV AMDGPUGridValues32 = { |
98 | .GV_Slot_Size: 256, // GV_Slot_Size |
99 | .GV_Warp_Size: 32, // GV_Warp_Size |
100 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
101 | .GV_Default_Num_Teams: 440, // GV_Default_Num_Teams |
102 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
103 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size, |
104 | .GV_Default_WG_Size: 256, // GV_Default_WG_Size |
105 | }; |
106 | |
107 | template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { |
108 | static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize" ); |
109 | return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; |
110 | } |
111 | |
112 | /// For Nvidia GPUs |
113 | static constexpr GV NVPTXGridValues = { |
114 | .GV_Slot_Size: 256, // GV_Slot_Size |
115 | .GV_Warp_Size: 32, // GV_Warp_Size |
116 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
117 | .GV_Default_Num_Teams: 3200, // GV_Default_Num_Teams |
118 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
119 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size |
120 | .GV_Default_WG_Size: 128, // GV_Default_WG_Size |
121 | }; |
122 | |
123 | } // namespace omp |
124 | } // namespace llvm |
125 | |
126 | #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
127 | |