1 | //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "AMDGPUMachineFunction.h" |
10 | #include "AMDGPU.h" |
11 | #include "AMDGPUPerfHintAnalysis.h" |
12 | #include "AMDGPUSubtarget.h" |
13 | #include "Utils/AMDGPUBaseInfo.h" |
14 | #include "llvm/CodeGen/MachineModuleInfo.h" |
15 | #include "llvm/IR/ConstantRange.h" |
16 | #include "llvm/IR/Constants.h" |
17 | #include "llvm/IR/Metadata.h" |
18 | #include "llvm/Target/TargetMachine.h" |
19 | |
20 | using namespace llvm; |
21 | |
22 | static const GlobalVariable * |
23 | getKernelDynLDSGlobalFromFunction(const Function &F) { |
24 | const Module *M = F.getParent(); |
25 | SmallString<64> KernelDynLDSName("llvm.amdgcn." ); |
26 | KernelDynLDSName += F.getName(); |
27 | KernelDynLDSName += ".dynlds" ; |
28 | return M->getNamedGlobal(Name: KernelDynLDSName); |
29 | } |
30 | |
31 | static bool hasLDSKernelArgument(const Function &F) { |
32 | for (const Argument &Arg : F.args()) { |
33 | Type *ArgTy = Arg.getType(); |
34 | if (auto PtrTy = dyn_cast<PointerType>(Val: ArgTy)) { |
35 | if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) |
36 | return true; |
37 | } |
38 | } |
39 | return false; |
40 | } |
41 | |
42 | AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F, |
43 | const AMDGPUSubtarget &ST) |
44 | : IsEntryFunction(AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())), |
45 | IsModuleEntryFunction( |
46 | AMDGPU::isModuleEntryFunctionCC(CC: F.getCallingConv())), |
47 | IsChainFunction(AMDGPU::isChainCC(CC: F.getCallingConv())), |
48 | NoSignedZerosFPMath(false) { |
49 | |
50 | // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset, |
51 | // except reserved size is not correctly aligned. |
52 | |
53 | Attribute MemBoundAttr = F.getFnAttribute(Kind: "amdgpu-memory-bound" ); |
54 | MemoryBound = MemBoundAttr.getValueAsBool(); |
55 | |
56 | Attribute WaveLimitAttr = F.getFnAttribute(Kind: "amdgpu-wave-limiter" ); |
57 | WaveLimiter = WaveLimitAttr.getValueAsBool(); |
58 | |
59 | // FIXME: How is this attribute supposed to interact with statically known |
60 | // global sizes? |
61 | StringRef S = F.getFnAttribute(Kind: "amdgpu-gds-size" ).getValueAsString(); |
62 | if (!S.empty()) |
63 | S.consumeInteger(Radix: 0, Result&: GDSSize); |
64 | |
65 | // Assume the attribute allocates before any known GDS globals. |
66 | StaticGDSSize = GDSSize; |
67 | |
68 | // Second value, if present, is the maximum value that can be assigned. |
69 | // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics |
70 | // during codegen. |
71 | std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute( |
72 | F, Name: "amdgpu-lds-size" , Default: {0, UINT32_MAX}, OnlyFirstRequired: true); |
73 | |
74 | // The two separate variables are only profitable when the LDS module lowering |
75 | // pass is disabled. If graphics does not use dynamic LDS, this is never |
76 | // profitable. Leaving cleanup for a later change. |
77 | LDSSize = LDSSizeRange.first; |
78 | StaticLDSSize = LDSSize; |
79 | |
80 | CallingConv::ID CC = F.getCallingConv(); |
81 | if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) |
82 | ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxAlign&: MaxKernArgAlign); |
83 | |
84 | // FIXME: Shouldn't be target specific |
85 | Attribute NSZAttr = F.getFnAttribute(Kind: "no-signed-zeros-fp-math" ); |
86 | NoSignedZerosFPMath = |
87 | NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true" ; |
88 | |
89 | const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F); |
90 | if (DynLdsGlobal || hasLDSKernelArgument(F)) |
91 | UsesDynamicLDS = true; |
92 | } |
93 | |
94 | unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL, |
95 | const GlobalVariable &GV, |
96 | Align Trailing) { |
97 | auto Entry = LocalMemoryObjects.insert(KV: std::pair(&GV, 0)); |
98 | if (!Entry.second) |
99 | return Entry.first->second; |
100 | |
101 | Align Alignment = |
102 | DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType()); |
103 | |
104 | unsigned Offset; |
105 | if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { |
106 | |
107 | std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV); |
108 | if (MaybeAbs) { |
109 | // Absolute address LDS variables that exist prior to the LDS lowering |
110 | // pass raise a fatal error in that pass. These failure modes are only |
111 | // reachable if that lowering pass is disabled or broken. If/when adding |
112 | // support for absolute addresses on user specified variables, the |
113 | // alignment check moves to the lowering pass and the frame calculation |
114 | // needs to take the user variables into consideration. |
115 | |
116 | uint32_t ObjectStart = *MaybeAbs; |
117 | |
118 | if (ObjectStart != alignTo(Size: ObjectStart, A: Alignment)) { |
119 | report_fatal_error(reason: "Absolute address LDS variable inconsistent with " |
120 | "variable alignment" ); |
121 | } |
122 | |
123 | if (isModuleEntryFunction()) { |
124 | // If this is a module entry function, we can also sanity check against |
125 | // the static frame. Strictly it would be better to check against the |
126 | // attribute, i.e. that the variable is within the always-allocated |
127 | // section, and not within some other non-absolute-address object |
128 | // allocated here, but the extra error detection is minimal and we would |
129 | // have to pass the Function around or cache the attribute value. |
130 | uint32_t ObjectEnd = |
131 | ObjectStart + DL.getTypeAllocSize(Ty: GV.getValueType()); |
132 | if (ObjectEnd > StaticLDSSize) { |
133 | report_fatal_error( |
134 | reason: "Absolute address LDS variable outside of static frame" ); |
135 | } |
136 | } |
137 | |
138 | Entry.first->second = ObjectStart; |
139 | return ObjectStart; |
140 | } |
141 | |
142 | /// TODO: We should sort these to minimize wasted space due to alignment |
143 | /// padding. Currently the padding is decided by the first encountered use |
144 | /// during lowering. |
145 | Offset = StaticLDSSize = alignTo(Size: StaticLDSSize, A: Alignment); |
146 | |
147 | StaticLDSSize += DL.getTypeAllocSize(Ty: GV.getValueType()); |
148 | |
149 | // Align LDS size to trailing, e.g. for aligning dynamic shared memory |
150 | LDSSize = alignTo(Size: StaticLDSSize, A: Trailing); |
151 | } else { |
152 | assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS && |
153 | "expected region address space" ); |
154 | |
155 | Offset = StaticGDSSize = alignTo(Size: StaticGDSSize, A: Alignment); |
156 | StaticGDSSize += DL.getTypeAllocSize(Ty: GV.getValueType()); |
157 | |
158 | // FIXME: Apply alignment of dynamic GDS |
159 | GDSSize = StaticGDSSize; |
160 | } |
161 | |
162 | Entry.first->second = Offset; |
163 | return Offset; |
164 | } |
165 | |
166 | std::optional<uint32_t> |
167 | AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) { |
168 | // TODO: Would be more consistent with the abs symbols to use a range |
169 | MDNode *MD = F.getMetadata(Kind: "llvm.amdgcn.lds.kernel.id" ); |
170 | if (MD && MD->getNumOperands() == 1) { |
171 | if (ConstantInt *KnownSize = |
172 | mdconst::extract<ConstantInt>(MD: MD->getOperand(I: 0))) { |
173 | uint64_t ZExt = KnownSize->getZExtValue(); |
174 | if (ZExt <= UINT32_MAX) { |
175 | return ZExt; |
176 | } |
177 | } |
178 | } |
179 | return {}; |
180 | } |
181 | |
182 | std::optional<uint32_t> |
183 | AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) { |
184 | if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) |
185 | return {}; |
186 | |
187 | std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange(); |
188 | if (!AbsSymRange) |
189 | return {}; |
190 | |
191 | if (const APInt *V = AbsSymRange->getSingleElement()) { |
192 | std::optional<uint64_t> ZExt = V->tryZExtValue(); |
193 | if (ZExt && (*ZExt <= UINT32_MAX)) { |
194 | return *ZExt; |
195 | } |
196 | } |
197 | |
198 | return {}; |
199 | } |
200 | |
201 | void AMDGPUMachineFunction::setDynLDSAlign(const Function &F, |
202 | const GlobalVariable &GV) { |
203 | const Module *M = F.getParent(); |
204 | const DataLayout &DL = M->getDataLayout(); |
205 | assert(DL.getTypeAllocSize(GV.getValueType()).isZero()); |
206 | |
207 | Align Alignment = |
208 | DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType()); |
209 | if (Alignment <= DynLDSAlign) |
210 | return; |
211 | |
212 | LDSSize = alignTo(Size: StaticLDSSize, A: Alignment); |
213 | DynLDSAlign = Alignment; |
214 | |
215 | // If there is a dynamic LDS variable associated with this function F, every |
216 | // further dynamic LDS instance (allocated by calling setDynLDSAlign) must |
217 | // map to the same address. This holds because no LDS is allocated after the |
218 | // lowering pass if there are dynamic LDS variables present. |
219 | const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F); |
220 | if (Dyn) { |
221 | unsigned Offset = LDSSize; // return this? |
222 | std::optional<uint32_t> Expect = getLDSAbsoluteAddress(GV: *Dyn); |
223 | if (!Expect || (Offset != *Expect)) { |
224 | report_fatal_error(reason: "Inconsistent metadata on dynamic LDS variable" ); |
225 | } |
226 | } |
227 | } |
228 | |
229 | void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) { |
230 | UsesDynamicLDS = DynLDS; |
231 | } |
232 | |
233 | bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; } |
234 | |