AMDGPUMachineFunction.cpp source code [llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp]

1	//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "AMDGPUMachineFunction.h"
10	#include "AMDGPU.h"
11	#include "AMDGPUPerfHintAnalysis.h"
12	#include "AMDGPUSubtarget.h"
13	#include "Utils/AMDGPUBaseInfo.h"
14	#include "llvm/CodeGen/MachineModuleInfo.h"
15	#include "llvm/IR/ConstantRange.h"
16	#include "llvm/IR/Constants.h"
17	#include "llvm/IR/Metadata.h"
18	#include "llvm/Target/TargetMachine.h"
19
20	using namespace llvm;
21
22	static const GlobalVariable *
23	getKernelDynLDSGlobalFromFunction(const Function &F) {
24	const Module *M = F.getParent();
25	SmallString<`64`> KernelDynLDSName("llvm.amdgcn.");
26	KernelDynLDSName += F.getName();
27	KernelDynLDSName += ".dynlds";
28	return M->getNamedGlobal(Name: KernelDynLDSName);
29	}
30
31	static bool hasLDSKernelArgument(const Function &F) {
32	for (const Argument &Arg : F.args()) {
33	Type *ArgTy = Arg.getType();
34	if (auto PtrTy = dyn_cast<PointerType>(Val: ArgTy)) {
35	if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36	return true;
37	}
38	}
39	return false;
40	}
41
42	AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43	const AMDGPUSubtarget &ST)
44	: IsEntryFunction(AMDGPU::isEntryFunctionCC(CC: F.getCallingConv())),
45	IsModuleEntryFunction(
46	AMDGPU::isModuleEntryFunctionCC(CC: F.getCallingConv())),
47	IsChainFunction(AMDGPU::isChainCC(CC: F.getCallingConv())),
48	NoSignedZerosFPMath(false) {
49
50	// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
51	// except reserved size is not correctly aligned.
52
53	Attribute MemBoundAttr = F.getFnAttribute(Kind: "amdgpu-memory-bound");
54	MemoryBound = MemBoundAttr.getValueAsBool();
55
56	Attribute WaveLimitAttr = F.getFnAttribute(Kind: "amdgpu-wave-limiter");
57	WaveLimiter = WaveLimitAttr.getValueAsBool();
58
59	// FIXME: How is this attribute supposed to interact with statically known
60	// global sizes?
61	StringRef S = F.getFnAttribute(Kind: "amdgpu-gds-size").getValueAsString();
62	if (!S.empty())
63	S.consumeInteger(Radix: `0`, Result&: GDSSize);
64
65	// Assume the attribute allocates before any known GDS globals.
66	StaticGDSSize = GDSSize;
67
68	// Second value, if present, is the maximum value that can be assigned.
69	// Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
70	// during codegen.
71	std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
72	F, Name: "amdgpu-lds-size", Default: {`0`, UINT32_MAX}, OnlyFirstRequired: true);
73
74	// The two separate variables are only profitable when the LDS module lowering
75	// pass is disabled. If graphics does not use dynamic LDS, this is never
76	// profitable. Leaving cleanup for a later change.
77	LDSSize = LDSSizeRange.first;
78	StaticLDSSize = LDSSize;
79
80	CallingConv::ID CC = F.getCallingConv();
81	if (CC == CallingConv::AMDGPU_KERNEL \|\| CC == CallingConv::SPIR_KERNEL)
82	ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxAlign&: MaxKernArgAlign);
83
84	// FIXME: Shouldn't be target specific
85	Attribute NSZAttr = F.getFnAttribute(Kind: "no-signed-zeros-fp-math");
86	NoSignedZerosFPMath =
87	NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
88
89	const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
90	if (DynLdsGlobal \|\| hasLDSKernelArgument(F))
91	UsesDynamicLDS = true;
92	}
93
94	unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
95	const GlobalVariable &GV,
96	Align Trailing) {
97	auto Entry = LocalMemoryObjects.insert(KV: std::pair(&GV, `0`));
98	if (!Entry.second)
99	return Entry.first ->second;
100
101	Align Alignment =
102	DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
103
104	unsigned Offset;
105	if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
106
107	std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
108	if (MaybeAbs) {
109	// Absolute address LDS variables that exist prior to the LDS lowering
110	// pass raise a fatal error in that pass. These failure modes are only
111	// reachable if that lowering pass is disabled or broken. If/when adding
112	// support for absolute addresses on user specified variables, the
113	// alignment check moves to the lowering pass and the frame calculation
114	// needs to take the user variables into consideration.
115
116	uint32_t ObjectStart = *MaybeAbs;
117
118	if (ObjectStart != alignTo(Size: ObjectStart, A: Alignment)) {
119	report_fatal_error(reason: "Absolute address LDS variable inconsistent with "
120	"variable alignment");
121	}
122
123	if (isModuleEntryFunction()) {
124	// If this is a module entry function, we can also sanity check against
125	// the static frame. Strictly it would be better to check against the
126	// attribute, i.e. that the variable is within the always-allocated
127	// section, and not within some other non-absolute-address object
128	// allocated here, but the extra error detection is minimal and we would
129	// have to pass the Function around or cache the attribute value.
130	uint32_t ObjectEnd =
131	ObjectStart + DL.getTypeAllocSize(Ty: GV.getValueType());
132	if (ObjectEnd > StaticLDSSize) {
133	report_fatal_error(
134	reason: "Absolute address LDS variable outside of static frame");
135	}
136	}
137
138	Entry.first ->second = ObjectStart;
139	return ObjectStart;
140	}
141
142	/// TODO: We should sort these to minimize wasted space due to alignment
143	/// padding. Currently the padding is decided by the first encountered use
144	/// during lowering.
145	Offset = StaticLDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
146
147	StaticLDSSize += DL.getTypeAllocSize(Ty: GV.getValueType());
148
149	// Align LDS size to trailing, e.g. for aligning dynamic shared memory
150	LDSSize = alignTo(Size: StaticLDSSize, A: Trailing);
151	} else {
152	assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
153	"expected region address space");
154
155	Offset = StaticGDSSize = alignTo(Size: StaticGDSSize, A: Alignment);
156	StaticGDSSize += DL.getTypeAllocSize(Ty: GV.getValueType());
157
158	// FIXME: Apply alignment of dynamic GDS
159	GDSSize = StaticGDSSize;
160	}
161
162	Entry.first ->second = Offset;
163	return Offset;
164	}
165
166	std::optional<uint32_t>
167	AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
168	// TODO: Would be more consistent with the abs symbols to use a range
169	MDNode *MD = F.getMetadata(Kind: "llvm.amdgcn.lds.kernel.id");
170	if (MD && MD->getNumOperands() == `1`) {
171	if (ConstantInt *KnownSize =
172	mdconst::extract<ConstantInt>(MD: MD->getOperand(I: `0`))) {
173	uint64_t ZExt = KnownSize->getZExtValue();
174	if (ZExt <= UINT32_MAX) {
175	return ZExt;
176	}
177	}
178	}
179	return {};
180	}
181
182	std::optional<uint32_t>
183	AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
184	if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
185	return {};
186
187	std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
188	if (!AbsSymRange)
189	return {};
190
191	if (const APInt *V = AbsSymRange ->getSingleElement()) {
192	std::optional<uint64_t> ZExt = V->tryZExtValue();
193	if (ZExt && (*ZExt <= UINT32_MAX)) {
194	return *ZExt;
195	}
196	}
197
198	return {};
199	}
200
201	void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
202	const GlobalVariable &GV) {
203	const Module *M = F.getParent();
204	const DataLayout &DL = M->getDataLayout();
205	assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
206
207	Align Alignment =
208	DL.getValueOrABITypeAlignment(Alignment: GV.getAlign(), Ty: GV.getValueType());
209	if (Alignment <= DynLDSAlign)
210	return;
211
212	LDSSize = alignTo(Size: StaticLDSSize, A: Alignment);
213	DynLDSAlign = Alignment;
214
215	// If there is a dynamic LDS variable associated with this function F, every
216	// further dynamic LDS instance (allocated by calling setDynLDSAlign) must
217	// map to the same address. This holds because no LDS is allocated after the
218	// lowering pass if there are dynamic LDS variables present.
219	const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
220	if (Dyn) {
221	unsigned Offset = LDSSize; // return this?
222	std::optional<uint32_t> Expect = getLDSAbsoluteAddress(GV: *Dyn);
223	if (!Expect \|\| (Offset != *Expect)) {
224	report_fatal_error(reason: "Inconsistent metadata on dynamic LDS variable");
225	}
226	}
227	}
228
229	void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
230	UsesDynamicLDS = DynLDS;
231	}
232
233	bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
234

source code of llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp