| 1 | //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// \brief Provides definitions for Target specific Grid Values |
| 11 | /// |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
| 15 | #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
| 16 | |
| 17 | namespace llvm { |
| 18 | |
| 19 | namespace omp { |
| 20 | |
| 21 | /// \brief Defines various target-specific GPU grid values that must be |
| 22 | /// consistent between host RTL (plugin), device RTL, and clang. |
| 23 | /// We can change grid values for a "fat" binary so that different |
| 24 | /// passes get the correct values when generating code for a |
| 25 | /// multi-target binary. Both amdgcn and nvptx values are stored in |
| 26 | /// this file. In the future, should there be differences between GPUs |
| 27 | /// of the same architecture, then simply make a different array and |
| 28 | /// use the new array name. |
| 29 | /// |
| 30 | /// Example usage in clang: |
| 31 | /// const unsigned slot_size = |
| 32 | /// ctx.GetTargetInfo().getGridValue().GV_Warp_Size; |
| 33 | /// |
| 34 | /// Example usage in libomptarget/deviceRTLs: |
| 35 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
| 36 | /// #ifdef __AMDGPU__ |
| 37 | /// #define GRIDVAL AMDGPUGridValues |
| 38 | /// #else |
| 39 | /// #define GRIDVAL NVPTXGridValues |
| 40 | /// #endif |
| 41 | /// ... Then use this reference for GV_Warp_Size in the deviceRTL source. |
| 42 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
| 43 | /// |
| 44 | /// Example usage in libomptarget hsa plugin: |
| 45 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
| 46 | /// #define GRIDVAL AMDGPUGridValues |
| 47 | /// ... Then use this reference to access GV_Warp_Size in the hsa plugin. |
| 48 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
| 49 | /// |
| 50 | /// Example usage in libomptarget cuda plugin: |
| 51 | /// #include "llvm/Frontend/OpenMP/OMPGridValues.h" |
| 52 | /// #define GRIDVAL NVPTXGridValues |
| 53 | /// ... Then use this reference to access GV_Warp_Size in the cuda plugin. |
| 54 | /// llvm::omp::GRIDVAL().GV_Warp_Size |
| 55 | /// |
| 56 | |
| 57 | struct GV { |
| 58 | /// The size reserved for data in a shared memory slot. |
| 59 | unsigned GV_Slot_Size; |
| 60 | /// The default value of maximum number of threads in a worker warp. |
| 61 | unsigned GV_Warp_Size; |
| 62 | |
| 63 | constexpr unsigned warpSlotSize() const { |
| 64 | return GV_Warp_Size * GV_Slot_Size; |
| 65 | } |
| 66 | |
| 67 | /// the maximum number of teams. |
| 68 | unsigned GV_Max_Teams; |
| 69 | // The default number of teams in the absence of any other information. |
| 70 | unsigned GV_Default_Num_Teams; |
| 71 | |
| 72 | // An alternative to the heavy data sharing infrastructure that uses global |
| 73 | // memory is one that uses device __shared__ memory. The amount of such space |
| 74 | // (in bytes) reserved by the OpenMP runtime is noted here. |
| 75 | unsigned GV_SimpleBufferSize; |
| 76 | // The absolute maximum team size for a working group |
| 77 | unsigned GV_Max_WG_Size; |
| 78 | // The default maximum team size for a working group |
| 79 | unsigned GV_Default_WG_Size; |
| 80 | |
| 81 | constexpr unsigned maxWarpNumber() const { |
| 82 | return GV_Max_WG_Size / GV_Warp_Size; |
| 83 | } |
| 84 | }; |
| 85 | |
| 86 | /// For AMDGPU GPUs |
| 87 | static constexpr GV AMDGPUGridValues64 = { |
| 88 | .GV_Slot_Size: 256, // GV_Slot_Size |
| 89 | .GV_Warp_Size: 64, // GV_Warp_Size |
| 90 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
| 91 | .GV_Default_Num_Teams: 440, // GV_Default_Num_Teams |
| 92 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
| 93 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size, |
| 94 | .GV_Default_WG_Size: 256, // GV_Default_WG_Size |
| 95 | }; |
| 96 | |
| 97 | static constexpr GV AMDGPUGridValues32 = { |
| 98 | .GV_Slot_Size: 256, // GV_Slot_Size |
| 99 | .GV_Warp_Size: 32, // GV_Warp_Size |
| 100 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
| 101 | .GV_Default_Num_Teams: 440, // GV_Default_Num_Teams |
| 102 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
| 103 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size, |
| 104 | .GV_Default_WG_Size: 256, // GV_Default_WG_Size |
| 105 | }; |
| 106 | |
| 107 | template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() { |
| 108 | static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize" ); |
| 109 | return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64; |
| 110 | } |
| 111 | |
| 112 | /// For Nvidia GPUs |
| 113 | static constexpr GV NVPTXGridValues = { |
| 114 | .GV_Slot_Size: 256, // GV_Slot_Size |
| 115 | .GV_Warp_Size: 32, // GV_Warp_Size |
| 116 | .GV_Max_Teams: (1 << 16), // GV_Max_Teams |
| 117 | .GV_Default_Num_Teams: 3200, // GV_Default_Num_Teams |
| 118 | .GV_SimpleBufferSize: 896, // GV_SimpleBufferSize |
| 119 | .GV_Max_WG_Size: 1024, // GV_Max_WG_Size |
| 120 | .GV_Default_WG_Size: 128, // GV_Default_WG_Size |
| 121 | }; |
| 122 | |
| 123 | } // namespace omp |
| 124 | } // namespace llvm |
| 125 | |
| 126 | #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H |
| 127 | |