SIMachineFunctionInfo.cpp source code [llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp]

1	//===- SIMachineFunctionInfo.cpp - SI Machine Function Info ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "SIMachineFunctionInfo.h"
10	#include "AMDGPUSubtarget.h"
11	#include "AMDGPUTargetMachine.h"
12	#include "GCNSubtarget.h"
13	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14	#include "SIRegisterInfo.h"
15	#include "Utils/AMDGPUBaseInfo.h"
16	#include "llvm/CodeGen/LiveIntervals.h"
17	#include "llvm/CodeGen/MIRParser/MIParser.h"
18	#include "llvm/CodeGen/MachineBasicBlock.h"
19	#include "llvm/CodeGen/MachineFrameInfo.h"
20	#include "llvm/CodeGen/MachineFunction.h"
21	#include "llvm/CodeGen/MachineRegisterInfo.h"
22	#include "llvm/IR/CallingConv.h"
23	#include "llvm/IR/DiagnosticInfo.h"
24	#include "llvm/IR/Function.h"
25	#include <cassert>
26	#include <optional>
27	#include <vector>
28
29	#define MAX_LANES 64
30
31	using namespace llvm;
32
33	const GCNTargetMachine &getTM(const GCNSubtarget *STI) {
34	const SITargetLowering *TLI = STI->getTargetLowering();
35	return static_cast<const GCNTargetMachine &>(TLI->getTargetMachine());
36	}
37
38	SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
39	const GCNSubtarget *STI)
40	: AMDGPUMachineFunction(F, STI), Mode (F, STI), GWSResourcePSV (getTM(STI)),
41	UserSGPRInfo (F, STI), WorkGroupIDX(false), WorkGroupIDY(false*),
42	WorkGroupIDZ(false), WorkGroupInfo(false), LDSKernelId(false),
43	PrivateSegmentWaveByteOffset(false), WorkItemIDX(false),
44	WorkItemIDY(false), WorkItemIDZ(false), ImplicitArgPtr(false),
45	GITPtrHigh(`0xffffffff`), HighBitsOf32BitAddress(`0`) {
46	const GCNSubtarget &ST = *static_cast<const GCNSubtarget *>(STI);
47	FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
48	WavesPerEU = ST.getWavesPerEU(F);
49	MaxNumWorkGroups = ST.getMaxNumWorkGroups(F);
50	assert(MaxNumWorkGroups.size() == `3`);
51
52	Occupancy = ST.computeOccupancy(F, LDSSize: getLDSSize());
53	CallingConv::ID CC = F.getCallingConv();
54
55	VRegFlags.reserve(s: `1024`);
56
57	const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL \|\|
58	CC == CallingConv::SPIR_KERNEL;
59
60	if (IsKernel) {
61	WorkGroupIDX = true;
62	WorkItemIDX = true;
63	} else if (CC == CallingConv::AMDGPU_PS) {
64	PSInputAddr = AMDGPU::getInitialPSInputAddr(F);
65	}
66
67	MayNeedAGPRs = ST.hasMAIInsts();
68
69	if (AMDGPU::isChainCC(CC)) {
70	// Chain functions don't receive an SP from their caller, but are free to
71	// set one up. For now, we can use s32 to match what amdgpu_gfx functions
72	// would use if called, but this can be revisited.
73	// FIXME: Only reserve this if we actually need it.
74	StackPtrOffsetReg = AMDGPU::SGPR32;
75
76	ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;
77
78	ArgInfo.PrivateSegmentBuffer =
79	ArgDescriptor::createRegister(Reg: ScratchRSrcReg);
80
81	ImplicitArgPtr = false;
82	} else if (!isEntryFunction()) {
83	if (CC != CallingConv::AMDGPU_Gfx)
84	ArgInfo = AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
85
86	// TODO: Pick a high register, and shift down, similar to a kernel.
87	FrameOffsetReg = AMDGPU::SGPR33;
88	StackPtrOffsetReg = AMDGPU::SGPR32;
89
90	if (!ST.enableFlatScratch()) {
91	// Non-entry functions have no special inputs for now, other registers
92	// required for scratch access.
93	ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
94
95	ArgInfo.PrivateSegmentBuffer =
96	ArgDescriptor::createRegister(Reg: ScratchRSrcReg);
97	}
98
99	if (!F.hasFnAttribute(Kind: "amdgpu-no-implicitarg-ptr"))
100	ImplicitArgPtr = true;
101	} else {
102	ImplicitArgPtr = false;
103	MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
104	MaxKernArgAlign);
105
106	if (ST.hasGFX90AInsts() &&
107	ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
108	!mayUseAGPRs(F))
109	MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
110	}
111
112	if (!AMDGPU::isGraphics(CC) \|\|
113	((CC == CallingConv::AMDGPU_CS \|\| CC == CallingConv::AMDGPU_CS) &&
114	ST.hasArchitectedSGPRs())) {
115	if (IsKernel \|\| !F.hasFnAttribute(Kind: "amdgpu-no-workgroup-id-x"))
116	WorkGroupIDX = true;
117
118	if (!F.hasFnAttribute(Kind: "amdgpu-no-workgroup-id-y"))
119	WorkGroupIDY = true;
120
121	if (!F.hasFnAttribute(Kind: "amdgpu-no-workgroup-id-z"))
122	WorkGroupIDZ = true;
123	}
124
125	if (!AMDGPU::isGraphics(CC)) {
126	if (IsKernel \|\| !F.hasFnAttribute(Kind: "amdgpu-no-workitem-id-x"))
127	WorkItemIDX = true;
128
129	if (!F.hasFnAttribute(Kind: "amdgpu-no-workitem-id-y") &&
130	ST.getMaxWorkitemID(F, `1`) != `0`)
131	WorkItemIDY = true;
132
133	if (!F.hasFnAttribute(Kind: "amdgpu-no-workitem-id-z") &&
134	ST.getMaxWorkitemID(F, `2`) != `0`)
135	WorkItemIDZ = true;
136
137	if (!IsKernel && !F.hasFnAttribute(Kind: "amdgpu-no-lds-kernel-id"))
138	LDSKernelId = true;
139	}
140
141	if (isEntryFunction()) {
142	// X, XY, and XYZ are the only supported combinations, so make sure Y is
143	// enabled if Z is.
144	if (WorkItemIDZ)
145	WorkItemIDY = true;
146
147	if (!ST.flatScratchIsArchitected()) {
148	PrivateSegmentWaveByteOffset = true;
149
150	// HS and GS always have the scratch wave offset in SGPR5 on GFX9.
151	if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
152	(CC == CallingConv::AMDGPU_HS \|\| CC == CallingConv::AMDGPU_GS))
153	ArgInfo.PrivateSegmentWaveByteOffset =
154	ArgDescriptor::createRegister(AMDGPU::Reg: SGPR5);
155	}
156	}
157
158	Attribute A = F.getFnAttribute(Kind: "amdgpu-git-ptr-high");
159	StringRef S = A.getValueAsString();
160	if (!S.empty())
161	S.consumeInteger(Radix: `0`, Result&: GITPtrHigh);
162
163	A = F.getFnAttribute(Kind: "amdgpu-32bit-address-high-bits");
164	S = A.getValueAsString();
165	if (!S.empty())
166	S.consumeInteger(Radix: `0`, Result&: HighBitsOf32BitAddress);
167
168	// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
169	// VGPR available at all times. For now, reserve highest available VGPR. After
170	// RA, shift it to the lowest available unused VGPR if the one exist.
171	if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
172	VGPRForAGPRCopy =
173	AMDGPU::VGPR_32RegClass.getRegister(ST.getMaxNumVGPRs(F) - `1`);
174	}
175	}
176
177	MachineFunctionInfo *SIMachineFunctionInfo::clone(
178	BumpPtrAllocator &Allocator, MachineFunction &DestMF,
179	const DenseMap<MachineBasicBlock , MachineBasicBlock > &Src2DstMBB)
180	const {
181	return DestMF.cloneInfo<SIMachineFunctionInfo>(Old: *this);
182	}
183
184	void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
185	limitOccupancy(Limit: getMaxWavesPerEU());
186	const GCNSubtarget& ST = MF.getSubtarget<GCNSubtarget>();
187	limitOccupancy(Limit: ST.getOccupancyWithLocalMemSize(getLDSSize(),
188	MF.getFunction()));
189	}
190
191	Register SIMachineFunctionInfo::addPrivateSegmentBuffer(
192	const SIRegisterInfo &TRI) {
193	ArgInfo.PrivateSegmentBuffer =
194	ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
195	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SGPR_128RegClass));
196	NumUserSGPRs += `4`;
197	return ArgInfo.PrivateSegmentBuffer.getRegister();
198	}
199
200	Register SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
201	ArgInfo.DispatchPtr = ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
202	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
203	NumUserSGPRs += `2`;
204	return ArgInfo.DispatchPtr.getRegister();
205	}
206
207	Register SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
208	ArgInfo.QueuePtr = ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
209	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
210	NumUserSGPRs += `2`;
211	return ArgInfo.QueuePtr.getRegister();
212	}
213
214	Register SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
215	ArgInfo.KernargSegmentPtr
216	= ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
217	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
218	NumUserSGPRs += `2`;
219	return ArgInfo.KernargSegmentPtr.getRegister();
220	}
221
222	Register SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
223	ArgInfo.DispatchID = ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
224	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
225	NumUserSGPRs += `2`;
226	return ArgInfo.DispatchID.getRegister();
227	}
228
229	Register SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
230	ArgInfo.FlatScratchInit = ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
231	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
232	NumUserSGPRs += `2`;
233	return ArgInfo.FlatScratchInit.getRegister();
234	}
235
236	Register SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) {
237	ArgInfo.ImplicitBufferPtr = ArgDescriptor::createRegister(Reg: TRI.getMatchingSuperReg(
238	getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass));
239	NumUserSGPRs += `2`;
240	return ArgInfo.ImplicitBufferPtr.getRegister();
241	}
242
243	Register SIMachineFunctionInfo::addLDSKernelId() {
244	ArgInfo.LDSKernelId = ArgDescriptor::createRegister(Reg: getNextUserSGPR());
245	NumUserSGPRs += `1`;
246	return ArgInfo.LDSKernelId.getRegister();
247	}
248
249	SmallVectorImpl<MCRegister> *SIMachineFunctionInfo::addPreloadedKernArg(
250	const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
251	unsigned AllocSizeDWord, int KernArgIdx, int PaddingSGPRs) {
252	assert(!ArgInfo.PreloadKernArgs.count(KernArgIdx) &&
253	"Preload kernel argument allocated twice.");
254	NumUserSGPRs += PaddingSGPRs;
255	// If the available register tuples are aligned with the kernarg to be
256	// preloaded use that register, otherwise we need to use a set of SGPRs and
257	// merge them.
258	Register PreloadReg =
259	TRI.getMatchingSuperReg(getNextUserSGPR(), AMDGPU::sub0, RC);
260	if (PreloadReg &&
261	(RC == &AMDGPU::SReg_32RegClass \|\| RC == &AMDGPU::SReg_64RegClass)) {
262	ArgInfo.PreloadKernArgs [KernArgIdx].Regs.push_back(Elt: PreloadReg);
263	NumUserSGPRs += AllocSizeDWord;
264	} else {
265	for (unsigned I = `0`; I < AllocSizeDWord; ++I) {
266	ArgInfo.PreloadKernArgs [KernArgIdx].Regs.push_back(Elt: getNextUserSGPR());
267	NumUserSGPRs++;
268	}
269	}
270
271	// Track the actual number of SGPRs that HW will preload to.
272	UserSGPRInfo.allocKernargPreloadSGPRs(NumSGPRs: AllocSizeDWord + PaddingSGPRs);
273	return &ArgInfo.PreloadKernArgs [KernArgIdx].Regs;
274	}
275
276	void SIMachineFunctionInfo::allocateWWMSpill(MachineFunction &MF, Register VGPR,
277	uint64_t Size, Align Alignment) {
278	// Skip if it is an entry function or the register is already added.
279	if (isEntryFunction() \|\| WWMSpills.count(Key: VGPR))
280	return;
281
282	// Skip if this is a function with the amdgpu_cs_chain or
283	// amdgpu_cs_chain_preserve calling convention and this is a scratch register.
284	// We never need to allocate a spill for these because we don't even need to
285	// restore the inactive lanes for them (they're scratchier than the usual
286	// scratch registers).
287	if (isChainFunction() && SIRegisterInfo::isChainScratchRegister(VGPR))
288	return;
289
290	WWMSpills.insert(KV: std::make_pair(
291	x&: VGPR, y: MF.getFrameInfo().CreateSpillStackObject(Size, Alignment)));
292	}
293
294	// Separate out the callee-saved and scratch registers.
295	void SIMachineFunctionInfo::splitWWMSpillRegisters(
296	MachineFunction &MF,
297	SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
298	SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const {
299	const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
300	for (auto &Reg : WWMSpills) {
301	if (isCalleeSavedReg(CSRegs, Reg: Reg.first))
302	CalleeSavedRegs.push_back(Elt: Reg);
303	else
304	ScratchRegs.push_back(Elt: Reg);
305	}
306	}
307
308	bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
309	MCPhysReg Reg) const {
310	for (unsigned I = `0`; CSRegs[I]; ++I) {
311	if (CSRegs[I] == Reg)
312	return true;
313	}
314
315	return false;
316	}
317
318	void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange(
319	MachineFunction &MF) {
320	const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
321	MachineRegisterInfo &MRI = MF.getRegInfo();
322	for (unsigned I = `0`, E = SpillPhysVGPRs.size(); I < E; ++I) {
323	Register Reg = SpillPhysVGPRs [I];
324	Register NewReg =
325	TRI->findUnusedRegister(MRI, RC: &AMDGPU::VGPR_32RegClass, MF);
326	if (!NewReg \|\| NewReg >= Reg)
327	break;
328
329	MRI.replaceRegWith(FromReg: Reg, ToReg: NewReg);
330
331	// Update various tables with the new VGPR.
332	SpillPhysVGPRs [I] = NewReg;
333	WWMReservedRegs.remove(X: Reg);
334	WWMReservedRegs.insert(X: NewReg);
335	WWMSpills.insert(KV: std::make_pair(x&: NewReg, y&: WWMSpills [Reg]));
336	WWMSpills.erase(Key: Reg);
337
338	for (MachineBasicBlock &MBB : MF) {
339	MBB.removeLiveIn(Reg);
340	MBB.sortUniqueLiveIns();
341	}
342	}
343	}
344
345	bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
346	MachineFunction &MF, int FI, unsigned LaneIndex) {
347	MachineRegisterInfo &MRI = MF.getRegInfo();
348	Register LaneVGPR;
349	if (!LaneIndex) {
350	LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
351	SpillVGPRs.push_back(Elt: LaneVGPR);
352	} else {
353	LaneVGPR = SpillVGPRs.back();
354	}
355
356	SGPRSpillsToVirtualVGPRLanes [FI].push_back(
357	x: SIRegisterInfo::SpilledReg (LaneVGPR, LaneIndex));
358	return true;
359	}
360
361	bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
362	MachineFunction &MF, int FI, unsigned LaneIndex, bool IsPrologEpilog) {
363	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
364	const SIRegisterInfo *TRI = ST.getRegisterInfo();
365	MachineRegisterInfo &MRI = MF.getRegInfo();
366	Register LaneVGPR;
367	if (!LaneIndex) {
368	// Find the highest available register if called before RA to ensure the
369	// lowest registers are available for allocation. The LaneVGPR, in that
370	// case, will be shifted back to the lowest range after VGPR allocation.
371	LaneVGPR = TRI->findUnusedRegister(MRI, RC: &AMDGPU::VGPR_32RegClass, MF,
372	ReserveHighestVGPR: !IsPrologEpilog);
373	if (LaneVGPR == AMDGPU::NoRegister) {
374	// We have no VGPRs left for spilling SGPRs. Reset because we will not
375	// partially spill the SGPR to VGPRs.
376	SGPRSpillsToPhysicalVGPRLanes.erase(Val: FI);
377	return false;
378	}
379
380	allocateWWMSpill(MF, VGPR: LaneVGPR);
381	reserveWWMRegister(Reg: LaneVGPR);
382	for (MachineBasicBlock &MBB : MF) {
383	MBB.addLiveIn(PhysReg: LaneVGPR);
384	MBB.sortUniqueLiveIns();
385	}
386	SpillPhysVGPRs.push_back(Elt: LaneVGPR);
387	} else {
388	LaneVGPR = SpillPhysVGPRs.back();
389	}
390
391	SGPRSpillsToPhysicalVGPRLanes [FI].push_back(
392	x: SIRegisterInfo::SpilledReg (LaneVGPR, LaneIndex));
393	return true;
394	}
395
396	bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(
397	MachineFunction &MF, int FI, bool SpillToPhysVGPRLane,
398	bool IsPrologEpilog) {
399	std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
400	SpillToPhysVGPRLane ? SGPRSpillsToPhysicalVGPRLanes [FI]
401	: SGPRSpillsToVirtualVGPRLanes [FI];
402
403	// This has already been allocated.
404	if (!SpillLanes.empty())
405	return true;
406
407	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
408	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
409	unsigned WaveSize = ST.getWavefrontSize();
410
411	unsigned Size = FrameInfo.getObjectSize(ObjectIdx: FI);
412	unsigned NumLanes = Size / `4`;
413
414	if (NumLanes > WaveSize)
415	return false;
416
417	assert(Size >= `4` && "invalid sgpr spill size");
418	assert(ST.getRegisterInfo()->spillSGPRToVGPR() &&
419	"not spilling SGPRs to VGPRs");
420
421	unsigned &NumSpillLanes = SpillToPhysVGPRLane ? NumPhysicalVGPRSpillLanes
422	: NumVirtualVGPRSpillLanes;
423
424	for (unsigned I = `0`; I < NumLanes; ++I, ++NumSpillLanes) {
425	unsigned LaneIndex = (NumSpillLanes % WaveSize);
426
427	bool Allocated = SpillToPhysVGPRLane
428	? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex,
429	IsPrologEpilog)
430	: allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
431	if (!Allocated) {
432	NumSpillLanes -= I;
433	return false;
434	}
435	}
436
437	return true;
438	}
439
440	/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
441	/// Either AGPR is spilled to VGPR to vice versa.
442	/// Returns true if a \p FI can be eliminated completely.
443	bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
444	int FI,
445	bool isAGPRtoVGPR) {
446	MachineRegisterInfo &MRI = MF.getRegInfo();
447	MachineFrameInfo &FrameInfo = MF.getFrameInfo();
448	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
449
450	assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
451
452	auto &Spill = VGPRToAGPRSpills [FI];
453
454	// This has already been allocated.
455	if (!Spill.Lanes.empty())
456	return Spill.FullyAllocated;
457
458	unsigned Size = FrameInfo.getObjectSize(ObjectIdx: FI);
459	unsigned NumLanes = Size / `4`;
460	Spill.Lanes.resize(N: NumLanes, NV: AMDGPU::NoRegister);
461
462	const TargetRegisterClass &RC =
463	isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
464	auto Regs = RC.getRegisters();
465
466	auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
467	const SIRegisterInfo *TRI = ST.getRegisterInfo();
468	Spill.FullyAllocated = true;
469
470	// FIXME: Move allocation logic out of MachineFunctionInfo and initialize
471	// once.
472	BitVector OtherUsedRegs;
473	OtherUsedRegs.resize(N: TRI->getNumRegs());
474
475	const uint32_t *CSRMask =
476	TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
477	if (CSRMask)
478	OtherUsedRegs.setBitsInMask(Mask: CSRMask);
479
480	// TODO: Should include register tuples, but doesn't matter with current
481	// usage.
482	for (MCPhysReg Reg : SpillAGPR)
483	OtherUsedRegs.set(Reg);
484	for (MCPhysReg Reg : SpillVGPR)
485	OtherUsedRegs.set(Reg);
486
487	SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
488	for (int I = NumLanes - `1`; I >= `0`; --I) {
489	NextSpillReg = std::find_if(
490	NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
491	return MRI.isAllocatable(PhysReg: Reg) && !MRI.isPhysRegUsed(PhysReg: Reg) &&
492	!OtherUsedRegs [Reg];
493	});
494
495	if (NextSpillReg == Regs.end()) { // Registers exhausted
496	Spill.FullyAllocated = false;
497	break;
498	}
499
500	OtherUsedRegs.set(*NextSpillReg);
501	SpillRegs.push_back(Elt: *NextSpillReg);
502	MRI.reserveReg(*NextSpillReg, TRI);
503	Spill.Lanes [I] = *NextSpillReg++;
504	}
505
506	return Spill.FullyAllocated;
507	}
508
509	bool SIMachineFunctionInfo::removeDeadFrameIndices(
510	MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
511	// Remove dead frame indices from function frame, however keep FP & BP since
512	// spills for them haven't been inserted yet. And also make sure to remove the
513	// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
514	// otherwise, it could result in an unexpected side effect and bug, in case of
515	// any re-mapping of freed frame indices by later pass(es) like "stack slot
516	// coloring".
517	for (auto &R : make_early_inc_range(Range&: SGPRSpillsToVirtualVGPRLanes)) {
518	MFI.RemoveStackObject(ObjectIdx: R.first);
519	SGPRSpillsToVirtualVGPRLanes.erase(Val: R.first);
520	}
521
522	// Remove the dead frame indices of CSR SGPRs which are spilled to physical
523	// VGPR lanes during SILowerSGPRSpills pass.
524	if (!ResetSGPRSpillStackIDs) {
525	for (auto &R : make_early_inc_range(Range&: SGPRSpillsToPhysicalVGPRLanes)) {
526	MFI.RemoveStackObject(ObjectIdx: R.first);
527	SGPRSpillsToPhysicalVGPRLanes.erase(Val: R.first);
528	}
529	}
530	bool HaveSGPRToMemory = false;
531
532	if (ResetSGPRSpillStackIDs) {
533	// All other SGPRs must be allocated on the default stack, so reset the
534	// stack ID.
535	for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd(); I != E;
536	++I) {
537	if (!checkIndexInPrologEpilogSGPRSpills(FI: I)) {
538	if (MFI.getStackID(ObjectIdx: I) == TargetStackID::SGPRSpill) {
539	MFI.setStackID(ObjectIdx: I, ID: TargetStackID::Default);
540	HaveSGPRToMemory = true;
541	}
542	}
543	}
544	}
545
546	for (auto &R : VGPRToAGPRSpills) {
547	if (R.second.IsDead)
548	MFI.RemoveStackObject(ObjectIdx: R.first);
549	}
550
551	return HaveSGPRToMemory;
552	}
553
554	int SIMachineFunctionInfo::getScavengeFI(MachineFrameInfo &MFI,
555	const SIRegisterInfo &TRI) {
556	if (ScavengeFI)
557	return *ScavengeFI;
558
559	ScavengeFI =
560	MFI.CreateStackObject(TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
561	TRI.getSpillAlign(AMDGPU::SGPR_32RegClass), false);
562	return *ScavengeFI;
563	}
564
565	MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
566	assert(NumSystemSGPRs == `0` && "System SGPRs must be added after user SGPRs");
567	return AMDGPU::SGPR0 + NumUserSGPRs;
568	}
569
570	MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
571	return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
572	}
573
574	void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
575	VRegFlags.grow(n: Reg);
576	}
577
578	void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
579	Register SrcReg) {
580	VRegFlags.grow(n: NewReg);
581	VRegFlags [NewReg] = VRegFlags [SrcReg];
582	}
583
584	Register
585	SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
586	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
587	if (!ST.isAmdPalOS())
588	return Register ();
589	Register GitPtrLo = AMDGPU::SGPR0; // Low GIT address passed in
590	if (ST.hasMergedShaders()) {
591	switch (MF.getFunction().getCallingConv()) {
592	case CallingConv::AMDGPU_HS:
593	case CallingConv::AMDGPU_GS:
594	// Low GIT address is passed in s8 rather than s0 for an LS+HS or
595	// ES+GS merged shader on gfx9+.
596	GitPtrLo = AMDGPU::SGPR8;
597	return GitPtrLo;
598	default:
599	return GitPtrLo;
600	}
601	}
602	return GitPtrLo;
603	}
604
605	static yaml::StringValue regToString(Register Reg,
606	const TargetRegisterInfo &TRI) {
607	yaml::StringValue Dest;
608	{
609	raw_string_ostream OS(Dest.Value);
610	OS << printReg(Reg, TRI: &TRI);
611	}
612	return Dest;
613	}
614
615	static std::optional<yaml::SIArgumentInfo>
616	convertArgumentInfo(const AMDGPUFunctionArgInfo &ArgInfo,
617	const TargetRegisterInfo &TRI) {
618	yaml::SIArgumentInfo AI;
619
620	auto convertArg = [&](std::optional<yaml::SIArgument> &A,
621	const ArgDescriptor &Arg) {
622	if (!Arg)
623	return false;
624
625	// Create a register or stack argument.
626	yaml::SIArgument SA = yaml::SIArgument::createArgument(IsReg: Arg.isRegister());
627	if (Arg.isRegister()) {
628	raw_string_ostream OS(SA.RegisterName.Value);
629	OS << printReg(Reg: Arg.getRegister(), TRI: &TRI);
630	} else
631	SA.StackOffset = Arg.getStackOffset();
632	// Check and update the optional mask.
633	if (Arg.isMasked())
634	SA.Mask = Arg.getMask();
635
636	A = SA;
637	return true;
638	};
639
640	// TODO: Need to serialize kernarg preloads.
641	bool Any = false;
642	Any \|= convertArg (AI.PrivateSegmentBuffer, ArgInfo.PrivateSegmentBuffer);
643	Any \|= convertArg (AI.DispatchPtr, ArgInfo.DispatchPtr);
644	Any \|= convertArg (AI.QueuePtr, ArgInfo.QueuePtr);
645	Any \|= convertArg (AI.KernargSegmentPtr, ArgInfo.KernargSegmentPtr);
646	Any \|= convertArg (AI.DispatchID, ArgInfo.DispatchID);
647	Any \|= convertArg (AI.FlatScratchInit, ArgInfo.FlatScratchInit);
648	Any \|= convertArg (AI.LDSKernelId, ArgInfo.LDSKernelId);
649	Any \|= convertArg (AI.PrivateSegmentSize, ArgInfo.PrivateSegmentSize);
650	Any \|= convertArg (AI.WorkGroupIDX, ArgInfo.WorkGroupIDX);
651	Any \|= convertArg (AI.WorkGroupIDY, ArgInfo.WorkGroupIDY);
652	Any \|= convertArg (AI.WorkGroupIDZ, ArgInfo.WorkGroupIDZ);
653	Any \|= convertArg (AI.WorkGroupInfo, ArgInfo.WorkGroupInfo);
654	Any \|= convertArg (AI.PrivateSegmentWaveByteOffset,
655	ArgInfo.PrivateSegmentWaveByteOffset);
656	Any \|= convertArg (AI.ImplicitArgPtr, ArgInfo.ImplicitArgPtr);
657	Any \|= convertArg (AI.ImplicitBufferPtr, ArgInfo.ImplicitBufferPtr);
658	Any \|= convertArg (AI.WorkItemIDX, ArgInfo.WorkItemIDX);
659	Any \|= convertArg (AI.WorkItemIDY, ArgInfo.WorkItemIDY);
660	Any \|= convertArg (AI.WorkItemIDZ, ArgInfo.WorkItemIDZ);
661
662	if (Any)
663	return AI;
664
665	return std::nullopt;
666	}
667
668	yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
669	const llvm::SIMachineFunctionInfo &MFI, const TargetRegisterInfo &TRI,
670	const llvm::MachineFunction &MF)
671	: ExplicitKernArgSize(MFI.getExplicitKernArgSize()),
672	MaxKernArgAlign(MFI.getMaxKernArgAlign()), LDSSize(MFI.getLDSSize()),
673	GDSSize(MFI.getGDSSize()),
674	DynLDSAlign(MFI.getDynLDSAlign()), IsEntryFunction(MFI.isEntryFunction()),
675	NoSignedZerosFPMath(MFI.hasNoSignedZerosFPMath()),
676	MemoryBound(MFI.isMemoryBound()), WaveLimiter(MFI.needsWaveLimiter()),
677	HasSpilledSGPRs(MFI.hasSpilledSGPRs()),
678	HasSpilledVGPRs(MFI.hasSpilledVGPRs()),
679	HighBitsOf32BitAddress(MFI.get32BitAddressHighBits()),
680	Occupancy(MFI.getOccupancy()),
681	ScratchRSrcReg(regToString(Reg: MFI.getScratchRSrcReg(), TRI)),
682	FrameOffsetReg(regToString(Reg: MFI.getFrameOffsetReg(), TRI)),
683	StackPtrOffsetReg(regToString(Reg: MFI.getStackPtrOffsetReg(), TRI)),
684	BytesInStackArgArea(MFI.getBytesInStackArgArea()),
685	ReturnsVoid(MFI.returnsVoid()),
686	ArgInfo(convertArgumentInfo(ArgInfo: MFI.getArgInfo(), TRI)),
687	PSInputAddr(MFI.getPSInputAddr()),
688	PSInputEnable(MFI.getPSInputEnable()),
689	Mode (MFI.getMode()) {
690	for (Register Reg : MFI.getWWMReservedRegs())
691	WWMReservedRegs.push_back(Elt: regToString(Reg, TRI));
692
693	if (MFI.getLongBranchReservedReg())
694	LongBranchReservedReg = regToString(Reg: MFI.getLongBranchReservedReg(), TRI);
695	if (MFI.getVGPRForAGPRCopy())
696	VGPRForAGPRCopy = regToString(Reg: MFI.getVGPRForAGPRCopy(), TRI);
697
698	if (MFI.getSGPRForEXECCopy())
699	SGPRForEXECCopy = regToString(Reg: MFI.getSGPRForEXECCopy(), TRI);
700
701	auto SFI = MFI.getOptionalScavengeFI();
702	if (SFI)
703	ScavengeFI = yaml::FrameIndex (*SFI, MF.getFrameInfo());
704	}
705
706	void yaml::SIMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
707	MappingTraits<SIMachineFunctionInfo>::mapping(YamlIO, MFI&: *this);
708	}
709
710	bool SIMachineFunctionInfo::initializeBaseYamlFields(
711	const yaml::SIMachineFunctionInfo &YamlMFI, const MachineFunction &MF,
712	PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) {
713	ExplicitKernArgSize = YamlMFI.ExplicitKernArgSize;
714	MaxKernArgAlign = YamlMFI.MaxKernArgAlign;
715	LDSSize = YamlMFI.LDSSize;
716	GDSSize = YamlMFI.GDSSize;
717	DynLDSAlign = YamlMFI.DynLDSAlign;
718	PSInputAddr = YamlMFI.PSInputAddr;
719	PSInputEnable = YamlMFI.PSInputEnable;
720	HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
721	Occupancy = YamlMFI.Occupancy;
722	IsEntryFunction = YamlMFI.IsEntryFunction;
723	NoSignedZerosFPMath = YamlMFI.NoSignedZerosFPMath;
724	MemoryBound = YamlMFI.MemoryBound;
725	WaveLimiter = YamlMFI.WaveLimiter;
726	HasSpilledSGPRs = YamlMFI.HasSpilledSGPRs;
727	HasSpilledVGPRs = YamlMFI.HasSpilledVGPRs;
728	BytesInStackArgArea = YamlMFI.BytesInStackArgArea;
729	ReturnsVoid = YamlMFI.ReturnsVoid;
730
731	if (YamlMFI.ScavengeFI) {
732	auto FIOrErr = YamlMFI.ScavengeFI ->getFI(MFI: MF.getFrameInfo());
733	if (!FIOrErr) {
734	// Create a diagnostic for a the frame index.
735	const MemoryBuffer &Buffer =
736	*PFS.SM->getMemoryBuffer(i: PFS.SM->getMainFileID());
737
738	Error = SMDiagnostic (*PFS.SM, SMLoc (), Buffer.getBufferIdentifier(), `1`, `1`,
739	SourceMgr::DK_Error, toString(E: FIOrErr.takeError()),
740	"", std::nullopt, std::nullopt);
741	SourceRange = YamlMFI.ScavengeFI ->SourceRange;
742	return true;
743	}
744	ScavengeFI = *FIOrErr;
745	} else {
746	ScavengeFI = std::nullopt;
747	}
748	return false;
749	}
750
751	bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
752	return !F.hasFnAttribute(Kind: "amdgpu-no-agpr");
753	}
754
755	bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
756	if (UsesAGPRs)
757	return *UsesAGPRs;
758
759	if (!mayNeedAGPRs()) {
760	UsesAGPRs = false;
761	return false;
762	}
763
764	if (!AMDGPU::isEntryFunctionCC(CC: MF.getFunction().getCallingConv()) \|\|
765	MF.getFrameInfo().hasCalls()) {
766	UsesAGPRs = true;
767	return true;
768	}
769
770	const MachineRegisterInfo &MRI = MF.getRegInfo();
771
772	for (unsigned I = `0`, E = MRI.getNumVirtRegs(); I != E; ++I) {
773	const Register Reg = Register::index2VirtReg(Index: I);
774	const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
775	if (RC && SIRegisterInfo::isAGPRClass(RC)) {
776	UsesAGPRs = true;
777	return true;
778	} else if (!RC && !MRI.use_empty(RegNo: Reg) && MRI.getType(Reg).isValid()) {
779	// Defer caching UsesAGPRs, function might not yet been regbank selected.
780	return true;
781	}
782	}
783
784	for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
785	if (MRI.isPhysRegUsed(Reg)) {
786	UsesAGPRs = true;
787	return true;
788	}
789	}
790
791	UsesAGPRs = false;
792	return false;
793	}
794

source code of llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp