1 | //===--- SIProgramInfo.h ----------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Defines struct to track resource usage and hardware flags for kernels and |
11 | /// entry functions. |
12 | /// |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #ifndef LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H |
17 | #define LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H |
18 | |
19 | #include "llvm/IR/CallingConv.h" |
20 | #include <cstdint> |
21 | |
22 | namespace llvm { |
23 | |
24 | class GCNSubtarget; |
25 | |
26 | /// Track resource usage for kernels / entry functions. |
27 | struct SIProgramInfo { |
28 | // Fields set in PGM_RSRC1 pm4 packet. |
29 | uint32_t VGPRBlocks = 0; |
30 | uint32_t SGPRBlocks = 0; |
31 | uint32_t Priority = 0; |
32 | uint32_t FloatMode = 0; |
33 | uint32_t Priv = 0; |
34 | uint32_t DX10Clamp = 0; |
35 | uint32_t DebugMode = 0; |
36 | uint32_t IEEEMode = 0; |
37 | uint32_t WgpMode = 0; // GFX10+ |
38 | uint32_t MemOrdered = 0; // GFX10+ |
39 | uint32_t RrWgMode = 0; // GFX12+ |
40 | uint64_t ScratchSize = 0; |
41 | |
42 | // State used to calculate fields set in PGM_RSRC2 pm4 packet. |
43 | uint32_t LDSBlocks = 0; |
44 | uint32_t ScratchBlocks = 0; |
45 | |
46 | // Fields set in PGM_RSRC2 pm4 packet |
47 | uint32_t ScratchEnable = 0; |
48 | uint32_t UserSGPR = 0; |
49 | uint32_t TrapHandlerEnable = 0; |
50 | uint32_t TGIdXEnable = 0; |
51 | uint32_t TGIdYEnable = 0; |
52 | uint32_t TGIdZEnable = 0; |
53 | uint32_t TGSizeEnable = 0; |
54 | uint32_t TIdIGCompCount = 0; |
55 | uint32_t EXCPEnMSB = 0; |
56 | uint32_t LdsSize = 0; |
57 | uint32_t EXCPEnable = 0; |
58 | |
59 | uint64_t ComputePGMRSrc3GFX90A = 0; |
60 | |
61 | uint32_t NumVGPR = 0; |
62 | uint32_t NumArchVGPR = 0; |
63 | uint32_t NumAccVGPR = 0; |
64 | uint32_t AccumOffset = 0; |
65 | uint32_t TgSplit = 0; |
66 | uint32_t NumSGPR = 0; |
67 | unsigned SGPRSpill = 0; |
68 | unsigned VGPRSpill = 0; |
69 | uint32_t LDSSize = 0; |
70 | bool FlatUsed = false; |
71 | |
72 | // Number of SGPRs that meets number of waves per execution unit request. |
73 | uint32_t NumSGPRsForWavesPerEU = 0; |
74 | |
75 | // Number of VGPRs that meets number of waves per execution unit request. |
76 | uint32_t NumVGPRsForWavesPerEU = 0; |
77 | |
78 | // Final occupancy. |
79 | uint32_t Occupancy = 0; |
80 | |
81 | // Whether there is recursion, dynamic allocas, indirect calls or some other |
82 | // reason there may be statically unknown stack usage. |
83 | bool DynamicCallStack = false; |
84 | |
85 | // Bonus information for debugging. |
86 | bool VCCUsed = false; |
87 | |
88 | SIProgramInfo() = default; |
89 | |
90 | /// Compute the value of the ComputePGMRsrc1 register. |
91 | uint64_t getComputePGMRSrc1(const GCNSubtarget &ST) const; |
92 | uint64_t getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST) const; |
93 | |
94 | /// Compute the value of the ComputePGMRsrc2 register. |
95 | uint64_t getComputePGMRSrc2() const; |
96 | uint64_t getPGMRSrc2(CallingConv::ID CC) const; |
97 | }; |
98 | |
99 | } // namespace llvm |
100 | |
101 | #endif // LLVM_LIB_TARGET_AMDGPU_SIPROGRAMINFO_H |
102 | |