SIMachineFunctionInfo.h source code [llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h]

1	//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --- C++ --==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
14	#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
15
16	#include "AMDGPUArgumentUsageInfo.h"
17	#include "AMDGPUMachineFunction.h"
18	#include "AMDGPUTargetMachine.h"
19	#include "GCNSubtarget.h"
20	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21	#include "SIInstrInfo.h"
22	#include "SIModeRegisterDefaults.h"
23	#include "llvm/ADT/SetVector.h"
24	#include "llvm/ADT/SmallVector.h"
25	#include "llvm/CodeGen/MIRYamlMapping.h"
26	#include "llvm/CodeGen/PseudoSourceValue.h"
27	#include "llvm/Support/raw_ostream.h"
28	#include <optional>
29
30	namespace llvm {
31
32	class MachineFrameInfo;
33	class MachineFunction;
34	class SIMachineFunctionInfo;
35	class SIRegisterInfo;
36	class TargetRegisterClass;
37
38	class AMDGPUPseudoSourceValue : public PseudoSourceValue {
39	public:
40	enum AMDGPUPSVKind : unsigned {
41	PSVImage = PseudoSourceValue::TargetCustom,
42	GWSResource
43	};
44
45	protected:
46	AMDGPUPseudoSourceValue(unsigned Kind, const AMDGPUTargetMachine &TM)
47	: PseudoSourceValue (Kind, TM) {}
48
49	public:
50	bool isConstant(const MachineFrameInfo ) const* override {
51	// This should probably be true for most images, but we will start by being
52	// conservative.
53	return false;
54	}
55
56	bool isAliased(const MachineFrameInfo ) const* override {
57	return true;
58	}
59
60	bool mayAlias(const MachineFrameInfo ) const* override {
61	return true;
62	}
63	};
64
65	class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
66	public:
67	explicit AMDGPUGWSResourcePseudoSourceValue(const AMDGPUTargetMachine &TM)
68	: AMDGPUPseudoSourceValue (GWSResource, TM) {}
69
70	static bool classof(const PseudoSourceValue *V) {
71	return V->kind() == GWSResource;
72	}
73
74	// These are inaccessible memory from IR.
75	bool isAliased(const MachineFrameInfo ) const* override {
76	return false;
77	}
78
79	// These are inaccessible memory from IR.
80	bool mayAlias(const MachineFrameInfo ) const* override {
81	return false;
82	}
83
84	void printCustom(raw_ostream &OS) const override {
85	OS << "GWSResource";
86	}
87	};
88
89	namespace yaml {
90
91	struct SIArgument {
92	bool IsRegister;
93	union {
94	StringValue RegisterName;
95	unsigned StackOffset;
96	};
97	std::optional<unsigned> Mask;
98
99	// Default constructor, which creates a stack argument.
100	SIArgument() : IsRegister(false), StackOffset(`0`) {}
101	SIArgument(const SIArgument &Other) {
102	IsRegister = Other.IsRegister;
103	if (IsRegister) {
104	::new ((void *)std::addressof(r&: RegisterName))
105	StringValue (Other.RegisterName);
106	} else
107	StackOffset = Other.StackOffset;
108	Mask = Other.Mask;
109	}
110	SIArgument &operator=(const SIArgument &Other) {
111	IsRegister = Other.IsRegister;
112	if (IsRegister) {
113	::new ((void *)std::addressof(r&: RegisterName))
114	StringValue (Other.RegisterName);
115	} else
116	StackOffset = Other.StackOffset;
117	Mask = Other.Mask;
118	return *this;
119	}
120	~SIArgument() {
121	if (IsRegister)
122	RegisterName.~StringValue();
123	}
124
125	// Helper to create a register or stack argument.
126	static inline SIArgument createArgument(bool IsReg) {
127	if (IsReg)
128	return SIArgument (IsReg);
129	return SIArgument ();
130	}
131
132	private:
133	// Construct a register argument.
134	SIArgument(bool) : IsRegister(true), RegisterName () {}
135	};
136
137	template <> struct MappingTraits<SIArgument> {
138	static void mapping(IO &YamlIO, SIArgument &A) {
139	if (YamlIO.outputting()) {
140	if (A.IsRegister)
141	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
142	else
143	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
144	} else {
145	auto Keys = YamlIO.keys();
146	if (is_contained(Range&: Keys, Element: "reg")) {
147	A = SIArgument::createArgument(IsReg: true);
148	YamlIO.mapRequired(Key: "reg", Val&: A.RegisterName);
149	} else if (is_contained(Range&: Keys, Element: "offset"))
150	YamlIO.mapRequired(Key: "offset", Val&: A.StackOffset);
151	else
152	YamlIO.setError("missing required key 'reg' or 'offset'");
153	}
154	YamlIO.mapOptional(Key: "mask", Val&: A.Mask);
155	}
156	static const bool flow = true;
157	};
158
159	struct SIArgumentInfo {
160	std::optional<SIArgument> PrivateSegmentBuffer;
161	std::optional<SIArgument> DispatchPtr;
162	std::optional<SIArgument> QueuePtr;
163	std::optional<SIArgument> KernargSegmentPtr;
164	std::optional<SIArgument> DispatchID;
165	std::optional<SIArgument> FlatScratchInit;
166	std::optional<SIArgument> PrivateSegmentSize;
167
168	std::optional<SIArgument> WorkGroupIDX;
169	std::optional<SIArgument> WorkGroupIDY;
170	std::optional<SIArgument> WorkGroupIDZ;
171	std::optional<SIArgument> WorkGroupInfo;
172	std::optional<SIArgument> LDSKernelId;
173	std::optional<SIArgument> PrivateSegmentWaveByteOffset;
174
175	std::optional<SIArgument> ImplicitArgPtr;
176	std::optional<SIArgument> ImplicitBufferPtr;
177
178	std::optional<SIArgument> WorkItemIDX;
179	std::optional<SIArgument> WorkItemIDY;
180	std::optional<SIArgument> WorkItemIDZ;
181	};
182
183	template <> struct MappingTraits<SIArgumentInfo> {
184	static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
185	YamlIO.mapOptional(Key: "privateSegmentBuffer", Val&: AI.PrivateSegmentBuffer);
186	YamlIO.mapOptional(Key: "dispatchPtr", Val&: AI.DispatchPtr);
187	YamlIO.mapOptional(Key: "queuePtr", Val&: AI.QueuePtr);
188	YamlIO.mapOptional(Key: "kernargSegmentPtr", Val&: AI.KernargSegmentPtr);
189	YamlIO.mapOptional(Key: "dispatchID", Val&: AI.DispatchID);
190	YamlIO.mapOptional(Key: "flatScratchInit", Val&: AI.FlatScratchInit);
191	YamlIO.mapOptional(Key: "privateSegmentSize", Val&: AI.PrivateSegmentSize);
192
193	YamlIO.mapOptional(Key: "workGroupIDX", Val&: AI.WorkGroupIDX);
194	YamlIO.mapOptional(Key: "workGroupIDY", Val&: AI.WorkGroupIDY);
195	YamlIO.mapOptional(Key: "workGroupIDZ", Val&: AI.WorkGroupIDZ);
196	YamlIO.mapOptional(Key: "workGroupInfo", Val&: AI.WorkGroupInfo);
197	YamlIO.mapOptional(Key: "LDSKernelId", Val&: AI.LDSKernelId);
198	YamlIO.mapOptional(Key: "privateSegmentWaveByteOffset",
199	Val&: AI.PrivateSegmentWaveByteOffset);
200
201	YamlIO.mapOptional(Key: "implicitArgPtr", Val&: AI.ImplicitArgPtr);
202	YamlIO.mapOptional(Key: "implicitBufferPtr", Val&: AI.ImplicitBufferPtr);
203
204	YamlIO.mapOptional(Key: "workItemIDX", Val&: AI.WorkItemIDX);
205	YamlIO.mapOptional(Key: "workItemIDY", Val&: AI.WorkItemIDY);
206	YamlIO.mapOptional(Key: "workItemIDZ", Val&: AI.WorkItemIDZ);
207	}
208	};
209
210	// Default to default mode for default calling convention.
211	struct SIMode {
212	bool IEEE = true;
213	bool DX10Clamp = true;
214	bool FP32InputDenormals = true;
215	bool FP32OutputDenormals = true;
216	bool FP64FP16InputDenormals = true;
217	bool FP64FP16OutputDenormals = true;
218
219	SIMode() = default;
220
221	SIMode(const SIModeRegisterDefaults &Mode) {
222	IEEE = Mode.IEEE;
223	DX10Clamp = Mode.DX10Clamp;
224	FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
225	FP32OutputDenormals =
226	Mode.FP32Denormals.Output != DenormalMode::PreserveSign;
227	FP64FP16InputDenormals =
228	Mode.FP64FP16Denormals.Input != DenormalMode::PreserveSign;
229	FP64FP16OutputDenormals =
230	Mode.FP64FP16Denormals.Output != DenormalMode::PreserveSign;
231	}
232
233	bool operator ==(const SIMode Other) const {
234	return IEEE == Other.IEEE &&
235	DX10Clamp == Other.DX10Clamp &&
236	FP32InputDenormals == Other.FP32InputDenormals &&
237	FP32OutputDenormals == Other.FP32OutputDenormals &&
238	FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
239	FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
240	}
241	};
242
243	template <> struct MappingTraits<SIMode> {
244	static void mapping(IO &YamlIO, SIMode &Mode) {
245	YamlIO.mapOptional(Key: "ieee", Val&: Mode.IEEE, Default: true);
246	YamlIO.mapOptional(Key: "dx10-clamp", Val&: Mode.DX10Clamp, Default: true);
247	YamlIO.mapOptional(Key: "fp32-input-denormals", Val&: Mode.FP32InputDenormals, Default: true);
248	YamlIO.mapOptional(Key: "fp32-output-denormals", Val&: Mode.FP32OutputDenormals, Default: true);
249	YamlIO.mapOptional(Key: "fp64-fp16-input-denormals", Val&: Mode.FP64FP16InputDenormals, Default: true);
250	YamlIO.mapOptional(Key: "fp64-fp16-output-denormals", Val&: Mode.FP64FP16OutputDenormals, Default: true);
251	}
252	};
253
254	struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
255	uint64_t ExplicitKernArgSize = `0`;
256	Align MaxKernArgAlign;
257	uint32_t LDSSize = `0`;
258	uint32_t GDSSize = `0`;
259	Align DynLDSAlign;
260	bool IsEntryFunction = false;
261	bool IsChainFunction = false;
262	bool NoSignedZerosFPMath = false;
263	bool MemoryBound = false;
264	bool WaveLimiter = false;
265	bool HasSpilledSGPRs = false;
266	bool HasSpilledVGPRs = false;
267	uint32_t HighBitsOf32BitAddress = `0`;
268
269	// TODO: 10 may be a better default since it's the maximum.
270	unsigned Occupancy = `0`;
271
272	SmallVector<StringValue> WWMReservedRegs;
273
274	StringValue ScratchRSrcReg = "$private_rsrc_reg";
275	StringValue FrameOffsetReg = "$fp_reg";
276	StringValue StackPtrOffsetReg = "$sp_reg";
277
278	unsigned BytesInStackArgArea = `0`;
279	bool ReturnsVoid = true;
280
281	std::optional<SIArgumentInfo> ArgInfo;
282
283	unsigned PSInputAddr = `0`;
284	unsigned PSInputEnable = `0`;
285
286	SIMode Mode;
287	std::optional<FrameIndex> ScavengeFI;
288	StringValue VGPRForAGPRCopy;
289	StringValue SGPRForEXECCopy;
290	StringValue LongBranchReservedReg;
291
292	SIMachineFunctionInfo() = default;
293	SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
294	const TargetRegisterInfo &TRI,
295	const llvm::MachineFunction &MF);
296
297	void mappingImpl(yaml::IO &YamlIO) override;
298	~SIMachineFunctionInfo() = default;
299	};
300
301	template <> struct MappingTraits<SIMachineFunctionInfo> {
302	static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
303	YamlIO.mapOptional(Key: "explicitKernArgSize", Val&: MFI.ExplicitKernArgSize,
304	UINT64_C(`0`));
305	YamlIO.mapOptional(Key: "maxKernArgAlign", Val&: MFI.MaxKernArgAlign);
306	YamlIO.mapOptional(Key: "ldsSize", Val&: MFI.LDSSize, Default: `0u`);
307	YamlIO.mapOptional(Key: "gdsSize", Val&: MFI.GDSSize, Default: `0u`);
308	YamlIO.mapOptional(Key: "dynLDSAlign", Val&: MFI.DynLDSAlign, Default: Align ());
309	YamlIO.mapOptional(Key: "isEntryFunction", Val&: MFI.IsEntryFunction, Default: false);
310	YamlIO.mapOptional(Key: "isChainFunction", Val&: MFI.IsChainFunction, Default: false);
311	YamlIO.mapOptional(Key: "noSignedZerosFPMath", Val&: MFI.NoSignedZerosFPMath, Default: false);
312	YamlIO.mapOptional(Key: "memoryBound", Val&: MFI.MemoryBound, Default: false);
313	YamlIO.mapOptional(Key: "waveLimiter", Val&: MFI.WaveLimiter, Default: false);
314	YamlIO.mapOptional(Key: "hasSpilledSGPRs", Val&: MFI.HasSpilledSGPRs, Default: false);
315	YamlIO.mapOptional(Key: "hasSpilledVGPRs", Val&: MFI.HasSpilledVGPRs, Default: false);
316	YamlIO.mapOptional(Key: "scratchRSrcReg", Val&: MFI.ScratchRSrcReg,
317	Default: StringValue ("$private_rsrc_reg"));
318	YamlIO.mapOptional(Key: "frameOffsetReg", Val&: MFI.FrameOffsetReg,
319	Default: StringValue ("$fp_reg"));
320	YamlIO.mapOptional(Key: "stackPtrOffsetReg", Val&: MFI.StackPtrOffsetReg,
321	Default: StringValue ("$sp_reg"));
322	YamlIO.mapOptional(Key: "bytesInStackArgArea", Val&: MFI.BytesInStackArgArea, Default: `0u`);
323	YamlIO.mapOptional(Key: "returnsVoid", Val&: MFI.ReturnsVoid, Default: true);
324	YamlIO.mapOptional(Key: "argumentInfo", Val&: MFI.ArgInfo);
325	YamlIO.mapOptional(Key: "psInputAddr", Val&: MFI.PSInputAddr, Default: `0u`);
326	YamlIO.mapOptional(Key: "psInputEnable", Val&: MFI.PSInputEnable, Default: `0u`);
327	YamlIO.mapOptional(Key: "mode", Val&: MFI.Mode, Default: SIMode ());
328	YamlIO.mapOptional(Key: "highBitsOf32BitAddress",
329	Val&: MFI.HighBitsOf32BitAddress, Default: `0u`);
330	YamlIO.mapOptional(Key: "occupancy", Val&: MFI.Occupancy, Default: `0`);
331	YamlIO.mapOptional(Key: "wwmReservedRegs", Val&: MFI.WWMReservedRegs);
332	YamlIO.mapOptional(Key: "scavengeFI", Val&: MFI.ScavengeFI);
333	YamlIO.mapOptional(Key: "vgprForAGPRCopy", Val&: MFI.VGPRForAGPRCopy,
334	Default: StringValue ()); // Don't print out when it's empty.
335	YamlIO.mapOptional(Key: "sgprForEXECCopy", Val&: MFI.SGPRForEXECCopy,
336	Default: StringValue ()); // Don't print out when it's empty.
337	YamlIO.mapOptional(Key: "longBranchReservedReg", Val&: MFI.LongBranchReservedReg,
338	Default: StringValue ());
339	}
340	};
341
342	} // end namespace yaml
343
344	// A CSR SGPR value can be preserved inside a callee using one of the following
345	// methods.
346	// 1. Copy to an unused scratch SGPR.
347	// 2. Spill to a VGPR lane.
348	// 3. Spill to memory via. a scratch VGPR.
349	// class PrologEpilogSGPRSaveRestoreInfo represents the save/restore method used
350	// for an SGPR at function prolog/epilog.
351	enum class SGPRSaveKind : uint8_t {
352	COPY_TO_SCRATCH_SGPR,
353	SPILL_TO_VGPR_LANE,
354	SPILL_TO_MEM
355	};
356
357	class PrologEpilogSGPRSaveRestoreInfo {
358	SGPRSaveKind Kind;
359	union {
360	int Index;
361	Register Reg;
362	};
363
364	public:
365	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, int I) : Kind(K), Index(I) {}
366	PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind K, Register R)
367	: Kind(K), Reg (R) {}
368	Register getReg() const { return Reg; }
369	int getIndex() const { return Index; }
370	SGPRSaveKind getKind() const { return Kind; }
371	};
372
373	/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
374	/// tells the hardware which interpolation parameters to load.
375	class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
376	private MachineRegisterInfo::Delegate {
377	friend class GCNTargetMachine;
378
379	// State of MODE register, assumed FP mode.
380	SIModeRegisterDefaults Mode;
381
382	// Registers that may be reserved for spilling purposes. These may be the same
383	// as the input registers.
384	Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
385
386	// This is the unswizzled offset from the current dispatch's scratch wave
387	// base to the beginning of the current function's frame.
388	Register FrameOffsetReg = AMDGPU::FP_REG;
389
390	// This is an ABI register used in the non-entry calling convention to
391	// communicate the unswizzled offset from the current dispatch's scratch wave
392	// base to the beginning of the new function's frame.
393	Register StackPtrOffsetReg = AMDGPU::SP_REG;
394
395	// Registers that may be reserved when RA doesn't allocate enough
396	// registers to plan for the case where an indirect branch ends up
397	// being needed during branch relaxation.
398	Register LongBranchReservedReg;
399
400	AMDGPUFunctionArgInfo ArgInfo;
401
402	// Graphics info.
403	unsigned PSInputAddr = `0`;
404	unsigned PSInputEnable = `0`;
405
406	/// Number of bytes of arguments this function has on the stack. If the callee
407	/// is expected to restore the argument stack this should be a multiple of 16,
408	/// all usable during a tail call.
409	///
410	/// The alternative would forbid tail call optimisation in some cases: if we
411	/// want to transfer control from a function with 8-bytes of stack-argument
412	/// space to a function with 16-bytes then misalignment of this value would
413	/// make a stack adjustment necessary, which could not be undone by the
414	/// callee.
415	unsigned BytesInStackArgArea = `0`;
416
417	bool ReturnsVoid = true;
418
419	// A pair of default/requested minimum/maximum flat work group sizes.
420	// Minimum - first, maximum - second.
421	std::pair<unsigned, unsigned> FlatWorkGroupSizes = {`0`, `0`};
422
423	// A pair of default/requested minimum/maximum number of waves per execution
424	// unit. Minimum - first, maximum - second.
425	std::pair<unsigned, unsigned> WavesPerEU = {`0`, `0`};
426
427	const AMDGPUGWSResourcePseudoSourceValue GWSResourcePSV;
428
429	// Default/requested number of work groups for the function.
430	SmallVector<unsigned> MaxNumWorkGroups = {`0`, `0`, `0`};
431
432	private:
433	unsigned NumUserSGPRs = `0`;
434	unsigned NumSystemSGPRs = `0`;
435
436	bool HasSpilledSGPRs = false;
437	bool HasSpilledVGPRs = false;
438	bool HasNonSpillStackObjects = false;
439	bool IsStackRealigned = false;
440
441	unsigned NumSpilledSGPRs = `0`;
442	unsigned NumSpilledVGPRs = `0`;
443
444	// Tracks information about user SGPRs that will be setup by hardware which
445	// will apply to all wavefronts of the grid.
446	GCNUserSGPRUsageInfo UserSGPRInfo;
447
448	// Feature bits required for inputs passed in system SGPRs.
449	bool WorkGroupIDX : `1`; // Always initialized.
450	bool WorkGroupIDY : `1`;
451	bool WorkGroupIDZ : `1`;
452	bool WorkGroupInfo : `1`;
453	bool LDSKernelId : `1`;
454	bool PrivateSegmentWaveByteOffset : `1`;
455
456	bool WorkItemIDX : `1`; // Always initialized.
457	bool WorkItemIDY : `1`;
458	bool WorkItemIDZ : `1`;
459
460	// Pointer to where the ABI inserts special kernel arguments separate from the
461	// user arguments. This is an offset from the KernargSegmentPtr.
462	bool ImplicitArgPtr : `1`;
463
464	bool MayNeedAGPRs : `1`;
465
466	// The hard-wired high half of the address of the global information table
467	// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
468	// current hardware only allows a 16 bit value.
469	unsigned GITPtrHigh;
470
471	unsigned HighBitsOf32BitAddress;
472
473	// Flags associated with the virtual registers.
474	IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags;
475
476	// Current recorded maximum possible occupancy.
477	unsigned Occupancy;
478
479	mutable std::optional<bool> UsesAGPRs;
480
481	MCPhysReg getNextUserSGPR() const;
482
483	MCPhysReg getNextSystemSGPR() const;
484
485	// MachineRegisterInfo callback functions to notify events.
486	void MRI_NoteNewVirtualRegister(Register Reg) override;
487	void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
488
489	public:
490	struct VGPRSpillToAGPR {
491	SmallVector<MCPhysReg, `32`> Lanes;
492	bool FullyAllocated = false;
493	bool IsDead = false;
494	};
495
496	private:
497	// To track virtual VGPR + lane index for each subregister of the SGPR spilled
498	// to frameindex key during SILowerSGPRSpills pass.
499	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
500	SGPRSpillsToVirtualVGPRLanes;
501	// To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
502	// like Frame Pointer identified during PrologEpilogInserter.
503	DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
504	SGPRSpillsToPhysicalVGPRLanes;
505	unsigned NumVirtualVGPRSpillLanes = `0`;
506	unsigned NumPhysicalVGPRSpillLanes = `0`;
507	SmallVector<Register, `2`> SpillVGPRs;
508	SmallVector<Register, `2`> SpillPhysVGPRs;
509	using WWMSpillsMap = MapVector<Register, int>;
510	// To track the registers used in instructions that can potentially modify the
511	// inactive lanes. The WWM instructions and the writelane instructions for
512	// spilling SGPRs to VGPRs fall under such category of operations. The VGPRs
513	// modified by them should be spilled/restored at function prolog/epilog to
514	// avoid any undesired outcome. Each entry in this map holds a pair of values,
515	// the VGPR and its stack slot index.
516	WWMSpillsMap WWMSpills;
517
518	using ReservedRegSet = SmallSetVector<Register, `8`>;
519	// To track the VGPRs reserved for WWM instructions. They get stack slots
520	// later during PrologEpilogInserter and get added into the superset WWMSpills
521	// for actual spilling. A separate set makes the register reserved part and
522	// the serialization easier.
523	ReservedRegSet WWMReservedRegs;
524
525	using PrologEpilogSGPRSpillsMap =
526	DenseMap<Register, PrologEpilogSGPRSaveRestoreInfo>;
527	// To track the SGPR spill method used for a CSR SGPR register during
528	// frame lowering. Even though the SGPR spills are handled during
529	// SILowerSGPRSpills pass, some special handling needed later during the
530	// PrologEpilogInserter.
531	PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills;
532
533	// To save/restore EXEC MASK around WWM spills and copies.
534	Register SGPRForEXECCopy;
535
536	DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
537
538	// AGPRs used for VGPR spills.
539	SmallVector<MCPhysReg, `32`> SpillAGPR;
540
541	// VGPRs used for AGPR spills.
542	SmallVector<MCPhysReg, `32`> SpillVGPR;
543
544	// Emergency stack slot. Sometimes, we create this before finalizing the stack
545	// frame, so save it here and add it to the RegScavenger later.
546	std::optional<int> ScavengeFI;
547
548	private:
549	Register VGPRForAGPRCopy;
550
551	bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
552	unsigned LaneIndex);
553	bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
554	unsigned LaneIndex,
555	bool IsPrologEpilog);
556
557	public:
558	Register getVGPRForAGPRCopy() const {
559	return VGPRForAGPRCopy;
560	}
561
562	void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy) {
563	VGPRForAGPRCopy = NewVGPRForAGPRCopy;
564	}
565
566	bool isCalleeSavedReg(const MCPhysReg CSRegs, MCPhysReg Reg) const*;
567
568	public:
569	SIMachineFunctionInfo(const SIMachineFunctionInfo &MFI) = default;
570	SIMachineFunctionInfo(const Function &F, const GCNSubtarget *STI);
571
572	MachineFunctionInfo *
573	clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
574	const DenseMap<MachineBasicBlock , MachineBasicBlock > &Src2DstMBB)
575	const override;
576
577	bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI,
578	const MachineFunction &MF,
579	PerFunctionMIParsingState &PFS,
580	SMDiagnostic &Error, SMRange &SourceRange);
581
582	void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(X: Reg); }
583
584	SIModeRegisterDefaults getMode() const { return Mode; }
585
586	ArrayRef<SIRegisterInfo::SpilledReg>
587	getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
588	auto I = SGPRSpillsToVirtualVGPRLanes.find(Val: FrameIndex);
589	return (I == SGPRSpillsToVirtualVGPRLanes.end())
590	? ArrayRef<SIRegisterInfo::SpilledReg>()
591	: ArrayRef(I ->second);
592	}
593
594	ArrayRef<Register> getSGPRSpillVGPRs() const { return SpillVGPRs; }
595
596	const WWMSpillsMap &getWWMSpills() const { return WWMSpills; }
597	const ReservedRegSet &getWWMReservedRegs() const { return WWMReservedRegs; }
598
599	const PrologEpilogSGPRSpillsMap &getPrologEpilogSGPRSpills() const {
600	return PrologEpilogSGPRSpills;
601	}
602
603	GCNUserSGPRUsageInfo &getUserSGPRInfo() { return UserSGPRInfo; }
604
605	const GCNUserSGPRUsageInfo &getUserSGPRInfo() const { return UserSGPRInfo; }
606
607	void addToPrologEpilogSGPRSpills(Register Reg,
608	PrologEpilogSGPRSaveRestoreInfo SI) {
609	PrologEpilogSGPRSpills.insert(KV: std::make_pair(x&: Reg, y&: SI));
610	}
611
612	// Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
613	// on success and false otherwise.
614	bool hasPrologEpilogSGPRSpillEntry(Register Reg) const {
615	return PrologEpilogSGPRSpills.contains(Val: Reg);
616	}
617
618	// Get the scratch SGPR if allocated to save/restore \p Reg.
619	Register getScratchSGPRCopyDstReg(Register Reg) const {
620	auto I = PrologEpilogSGPRSpills.find(Val: Reg);
621	if (I != PrologEpilogSGPRSpills.end() &&
622	I ->second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
623	return I ->second.getReg();
624
625	return AMDGPU::NoRegister;
626	}
627
628	// Get all scratch SGPRs allocated to copy/restore the SGPR spills.
629	void getAllScratchSGPRCopyDstRegs(SmallVectorImpl<Register> &Regs) const {
630	for (const auto &SI : PrologEpilogSGPRSpills) {
631	if (SI.second.getKind() == SGPRSaveKind::COPY_TO_SCRATCH_SGPR)
632	Regs.push_back(Elt: SI.second.getReg());
633	}
634	}
635
636	// Check if \p FI is allocated for any SGPR spill to a VGPR lane during PEI.
637	bool checkIndexInPrologEpilogSGPRSpills(int FI) const {
638	return find_if(Range: PrologEpilogSGPRSpills,
639	P: [FI](const std::pair<Register,
640	PrologEpilogSGPRSaveRestoreInfo> &SI) {
641	return SI.second.getKind() ==
642	SGPRSaveKind::SPILL_TO_VGPR_LANE &&
643	SI.second.getIndex() == FI;
644	}) != PrologEpilogSGPRSpills.end();
645	}
646
647	const PrologEpilogSGPRSaveRestoreInfo &
648	getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
649	auto I = PrologEpilogSGPRSpills.find(Val: Reg);
650	assert(I != PrologEpilogSGPRSpills.end());
651
652	return I ->second;
653	}
654
655	ArrayRef<SIRegisterInfo::SpilledReg>
656	getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
657	auto I = SGPRSpillsToPhysicalVGPRLanes.find(Val: FrameIndex);
658	return (I == SGPRSpillsToPhysicalVGPRLanes.end())
659	? ArrayRef<SIRegisterInfo::SpilledReg>()
660	: ArrayRef(I ->second);
661	}
662
663	void setFlag(Register Reg, uint8_t Flag) {
664	assert(Reg.isVirtual());
665	if (VRegFlags.inBounds(n: Reg))
666	VRegFlags [Reg] \|= Flag;
667	}
668
669	bool checkFlag(Register Reg, uint8_t Flag) const {
670	if (Reg.isPhysical())
671	return false;
672
673	return VRegFlags.inBounds(n: Reg) && VRegFlags [Reg] & Flag;
674	}
675
676	bool hasVRegFlags() { return VRegFlags.size(); }
677
678	void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = `4`,
679	Align Alignment = Align (`4`));
680
681	void splitWWMSpillRegisters(
682	MachineFunction &MF,
683	SmallVectorImpl<std::pair<Register, int>> &CalleeSavedRegs,
684	SmallVectorImpl<std::pair<Register, int>> &ScratchRegs) const;
685
686	ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
687	return SpillAGPR;
688	}
689
690	Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; }
691
692	void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; }
693
694	ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
695	return SpillVGPR;
696	}
697
698	MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
699	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
700	return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
701	: I ->second.Lanes [Lane];
702	}
703
704	void setVGPRToAGPRSpillDead(int FrameIndex) {
705	auto I = VGPRToAGPRSpills.find(Val: FrameIndex);
706	if (I != VGPRToAGPRSpills.end())
707	I ->second.IsDead = true;
708	}
709
710	// To bring the Physical VGPRs in the highest range allocated for CSR SGPR
711	// spilling into the lowest available range.
712	void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF);
713
714	bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI,
715	bool SpillToPhysVGPRLane = false,
716	bool IsPrologEpilog = false);
717	bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
718
719	/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
720	/// to the default stack.
721	bool removeDeadFrameIndices(MachineFrameInfo &MFI,
722	bool ResetSGPRSpillStackIDs);
723
724	int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
725	std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
726
727	unsigned getBytesInStackArgArea() const {
728	return BytesInStackArgArea;
729	}
730
731	void setBytesInStackArgArea(unsigned Bytes) {
732	BytesInStackArgArea = Bytes;
733	}
734
735	// Add user SGPRs.
736	Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
737	Register addDispatchPtr(const SIRegisterInfo &TRI);
738	Register addQueuePtr(const SIRegisterInfo &TRI);
739	Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
740	Register addDispatchID(const SIRegisterInfo &TRI);
741	Register addFlatScratchInit(const SIRegisterInfo &TRI);
742	Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
743	Register addLDSKernelId();
744	SmallVectorImpl<MCRegister> *
745	addPreloadedKernArg(const SIRegisterInfo &TRI, const TargetRegisterClass *RC,
746	unsigned AllocSizeDWord, int KernArgIdx,
747	int PaddingSGPRs);
748
749	/// Increment user SGPRs used for padding the argument list only.
750	Register addReservedUserSGPR() {
751	Register Next = getNextUserSGPR();
752	++NumUserSGPRs;
753	return Next;
754	}
755
756	// Add system SGPRs.
757	Register addWorkGroupIDX() {
758	ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
759	NumSystemSGPRs += `1`;
760	return ArgInfo.WorkGroupIDX.getRegister();
761	}
762
763	Register addWorkGroupIDY() {
764	ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
765	NumSystemSGPRs += `1`;
766	return ArgInfo.WorkGroupIDY.getRegister();
767	}
768
769	Register addWorkGroupIDZ() {
770	ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
771	NumSystemSGPRs += `1`;
772	return ArgInfo.WorkGroupIDZ.getRegister();
773	}
774
775	Register addWorkGroupInfo() {
776	ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
777	NumSystemSGPRs += `1`;
778	return ArgInfo.WorkGroupInfo.getRegister();
779	}
780
781	bool hasLDSKernelId() const { return LDSKernelId; }
782
783	// Add special VGPR inputs
784	void setWorkItemIDX(ArgDescriptor Arg) {
785	ArgInfo.WorkItemIDX = Arg;
786	}
787
788	void setWorkItemIDY(ArgDescriptor Arg) {
789	ArgInfo.WorkItemIDY = Arg;
790	}
791
792	void setWorkItemIDZ(ArgDescriptor Arg) {
793	ArgInfo.WorkItemIDZ = Arg;
794	}
795
796	Register addPrivateSegmentWaveByteOffset() {
797	ArgInfo.PrivateSegmentWaveByteOffset
798	= ArgDescriptor::createRegister(Reg: getNextSystemSGPR());
799	NumSystemSGPRs += `1`;
800	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
801	}
802
803	void setPrivateSegmentWaveByteOffset(Register Reg) {
804	ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
805	}
806
807	bool hasWorkGroupIDX() const {
808	return WorkGroupIDX;
809	}
810
811	bool hasWorkGroupIDY() const {
812	return WorkGroupIDY;
813	}
814
815	bool hasWorkGroupIDZ() const {
816	return WorkGroupIDZ;
817	}
818
819	bool hasWorkGroupInfo() const {
820	return WorkGroupInfo;
821	}
822
823	bool hasPrivateSegmentWaveByteOffset() const {
824	return PrivateSegmentWaveByteOffset;
825	}
826
827	bool hasWorkItemIDX() const {
828	return WorkItemIDX;
829	}
830
831	bool hasWorkItemIDY() const {
832	return WorkItemIDY;
833	}
834
835	bool hasWorkItemIDZ() const {
836	return WorkItemIDZ;
837	}
838
839	bool hasImplicitArgPtr() const {
840	return ImplicitArgPtr;
841	}
842
843	AMDGPUFunctionArgInfo &getArgInfo() {
844	return ArgInfo;
845	}
846
847	const AMDGPUFunctionArgInfo &getArgInfo() const {
848	return ArgInfo;
849	}
850
851	std::tuple<const ArgDescriptor , const* TargetRegisterClass *, LLT>
852	getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
853	return ArgInfo.getPreloadedValue(Value);
854	}
855
856	MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
857	auto Arg = std::get<`0`>(t: ArgInfo.getPreloadedValue(Value));
858	return Arg ? Arg->getRegister() : MCRegister ();
859	}
860
861	unsigned getGITPtrHigh() const {
862	return GITPtrHigh;
863	}
864
865	Register getGITPtrLoReg(const MachineFunction &MF) const;
866
867	uint32_t get32BitAddressHighBits() const {
868	return HighBitsOf32BitAddress;
869	}
870
871	unsigned getNumUserSGPRs() const {
872	return NumUserSGPRs;
873	}
874
875	unsigned getNumPreloadedSGPRs() const {
876	return NumUserSGPRs + NumSystemSGPRs;
877	}
878
879	unsigned getNumKernargPreloadedSGPRs() const {
880	return UserSGPRInfo.getNumKernargPreloadSGPRs();
881	}
882
883	Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
884	return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
885	}
886
887	/// Returns the physical register reserved for use as the resource
888	/// descriptor for scratch accesses.
889	Register getScratchRSrcReg() const {
890	return ScratchRSrcReg;
891	}
892
893	void setScratchRSrcReg(Register Reg) {
894	assert(Reg != `0` && "Should never be unset");
895	ScratchRSrcReg = Reg;
896	}
897
898	Register getFrameOffsetReg() const {
899	return FrameOffsetReg;
900	}
901
902	void setFrameOffsetReg(Register Reg) {
903	assert(Reg != `0` && "Should never be unset");
904	FrameOffsetReg = Reg;
905	}
906
907	void setStackPtrOffsetReg(Register Reg) {
908	assert(Reg != `0` && "Should never be unset");
909	StackPtrOffsetReg = Reg;
910	}
911
912	void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
913
914	// Note the unset value for this is AMDGPU::SP_REG rather than
915	// NoRegister. This is mostly a workaround for MIR tests where state that
916	// can't be directly computed from the function is not preserved in serialized
917	// MIR.
918	Register getStackPtrOffsetReg() const {
919	return StackPtrOffsetReg;
920	}
921
922	Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
923
924	Register getQueuePtrUserSGPR() const {
925	return ArgInfo.QueuePtr.getRegister();
926	}
927
928	Register getImplicitBufferPtrUserSGPR() const {
929	return ArgInfo.ImplicitBufferPtr.getRegister();
930	}
931
932	bool hasSpilledSGPRs() const {
933	return HasSpilledSGPRs;
934	}
935
936	void setHasSpilledSGPRs(bool Spill = true) {
937	HasSpilledSGPRs = Spill;
938	}
939
940	bool hasSpilledVGPRs() const {
941	return HasSpilledVGPRs;
942	}
943
944	void setHasSpilledVGPRs(bool Spill = true) {
945	HasSpilledVGPRs = Spill;
946	}
947
948	bool hasNonSpillStackObjects() const {
949	return HasNonSpillStackObjects;
950	}
951
952	void setHasNonSpillStackObjects(bool StackObject = true) {
953	HasNonSpillStackObjects = StackObject;
954	}
955
956	bool isStackRealigned() const {
957	return IsStackRealigned;
958	}
959
960	void setIsStackRealigned(bool Realigned = true) {
961	IsStackRealigned = Realigned;
962	}
963
964	unsigned getNumSpilledSGPRs() const {
965	return NumSpilledSGPRs;
966	}
967
968	unsigned getNumSpilledVGPRs() const {
969	return NumSpilledVGPRs;
970	}
971
972	void addToSpilledSGPRs(unsigned num) {
973	NumSpilledSGPRs += num;
974	}
975
976	void addToSpilledVGPRs(unsigned num) {
977	NumSpilledVGPRs += num;
978	}
979
980	unsigned getPSInputAddr() const {
981	return PSInputAddr;
982	}
983
984	unsigned getPSInputEnable() const {
985	return PSInputEnable;
986	}
987
988	bool isPSInputAllocated(unsigned Index) const {
989	return PSInputAddr & (`1` << Index);
990	}
991
992	void markPSInputAllocated(unsigned Index) {
993	PSInputAddr \|= `1` << Index;
994	}
995
996	void markPSInputEnabled(unsigned Index) {
997	PSInputEnable \|= `1` << Index;
998	}
999
1000	bool returnsVoid() const {
1001	return ReturnsVoid;
1002	}
1003
1004	void setIfReturnsVoid(bool Value) {
1005	ReturnsVoid = Value;
1006	}
1007
1008	/// \returns A pair of default/requested minimum/maximum flat work group sizes
1009	/// for this function.
1010	std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
1011	return FlatWorkGroupSizes;
1012	}
1013
1014	/// \returns Default/requested minimum flat work group size for this function.
1015	unsigned getMinFlatWorkGroupSize() const {
1016	return FlatWorkGroupSizes.first;
1017	}
1018
1019	/// \returns Default/requested maximum flat work group size for this function.
1020	unsigned getMaxFlatWorkGroupSize() const {
1021	return FlatWorkGroupSizes.second;
1022	}
1023
1024	/// \returns A pair of default/requested minimum/maximum number of waves per
1025	/// execution unit.
1026	std::pair<unsigned, unsigned> getWavesPerEU() const {
1027	return WavesPerEU;
1028	}
1029
1030	/// \returns Default/requested minimum number of waves per execution unit.
1031	unsigned getMinWavesPerEU() const {
1032	return WavesPerEU.first;
1033	}
1034
1035	/// \returns Default/requested maximum number of waves per execution unit.
1036	unsigned getMaxWavesPerEU() const {
1037	return WavesPerEU.second;
1038	}
1039
1040	const AMDGPUGWSResourcePseudoSourceValue *
1041	getGWSPSV(const AMDGPUTargetMachine &TM) {
1042	return &GWSResourcePSV;
1043	}
1044
1045	unsigned getOccupancy() const {
1046	return Occupancy;
1047	}
1048
1049	unsigned getMinAllowedOccupancy() const {
1050	if (!isMemoryBound() && !needsWaveLimiter())
1051	return Occupancy;
1052	return (Occupancy < `4`) ? Occupancy : `4`;
1053	}
1054
1055	void limitOccupancy(const MachineFunction &MF);
1056
1057	void limitOccupancy(unsigned Limit) {
1058	if (Occupancy > Limit)
1059	Occupancy = Limit;
1060	}
1061
1062	void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
1063	if (Occupancy < Limit)
1064	Occupancy = Limit;
1065	limitOccupancy(MF);
1066	}
1067
1068	bool mayNeedAGPRs() const {
1069	return MayNeedAGPRs;
1070	}
1071
1072	// \returns true if a function has a use of AGPRs via inline asm or
1073	// has a call which may use it.
1074	bool mayUseAGPRs(const Function &F) const;
1075
1076	// \returns true if a function needs or may need AGPRs.
1077	bool usesAGPRs(const MachineFunction &MF) const;
1078
1079	/// \returns Default/requested number of work groups for this function.
1080	SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
1081
1082	unsigned getMaxNumWorkGroupsX() const { return MaxNumWorkGroups [`0`]; }
1083	unsigned getMaxNumWorkGroupsY() const { return MaxNumWorkGroups [`1`]; }
1084	unsigned getMaxNumWorkGroupsZ() const { return MaxNumWorkGroups [`2`]; }
1085	};
1086
1087	} // end namespace llvm
1088
1089	#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
1090

source code of llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h