AMDGPUSetWavePriority.cpp source code [llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp]

1	//===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// Pass to temporarily raise the wave priority beginning the start of
11	/// the shader function until its last VMEM instructions to allow younger
12	/// waves to issue their VMEM instructions as well.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "AMDGPU.h"
17	#include "GCNSubtarget.h"
18	#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19	#include "SIInstrInfo.h"
20	#include "llvm/ADT/PostOrderIterator.h"
21	#include "llvm/CodeGen/MachineFunctionPass.h"
22	#include "llvm/InitializePasses.h"
23	#include "llvm/Support/Allocator.h"
24
25	using namespace llvm;
26
27	#define DEBUG_TYPE "amdgpu-set-wave-priority"
28
29	static cl::opt<unsigned> DefaultVALUInstsThreshold(
30	"amdgpu-set-wave-priority-valu-insts-threshold",
31	cl::desc ("VALU instruction count threshold for adjusting wave priority"),
32	cl::init(Val: `100`), cl::Hidden);
33
34	namespace {
35
36	struct MBBInfo {
37	MBBInfo() = default;
38	unsigned NumVALUInstsAtStart = `0`;
39	bool MayReachVMEMLoad = false;
40	MachineInstr LastVMEMLoad = nullptr*;
41	};
42
43	using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
44
45	class AMDGPUSetWavePriority : public MachineFunctionPass {
46	public:
47	static char ID;
48
49	AMDGPUSetWavePriority() : MachineFunctionPass (ID) {}
50
51	StringRef getPassName() const override { return "Set wave priority"; }
52
53	bool runOnMachineFunction(MachineFunction &MF) override;
54
55	private:
56	MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
57	MachineBasicBlock::iterator I,
58	unsigned priority) const;
59
60	const SIInstrInfo *TII;
61	};
62
63	} // End anonymous namespace.
64
65	INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
66	false)
67
68	char AMDGPUSetWavePriority::ID = `0`;
69
70	FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
71	return new AMDGPUSetWavePriority ();
72	}
73
74	MachineInstr *
75	AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
76	MachineBasicBlock::iterator I,
77	unsigned priority) const {
78	return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
79	.addImm(priority);
80	}
81
82	// Checks that for every predecessor Pred that can reach a VMEM load,
83	// none of Pred's successors can reach a VMEM load.
84	static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
85	MBBInfoSet &MBBInfos) {
86	for (const MachineBasicBlock *Pred : MBB.predecessors()) {
87	if (!MBBInfos [Pred].MayReachVMEMLoad)
88	continue;
89	for (const MachineBasicBlock *Succ : Pred->successors()) {
90	if (MBBInfos [Succ].MayReachVMEMLoad)
91	return false;
92	}
93	}
94	return true;
95	}
96
97	static bool isVMEMLoad(const MachineInstr &MI) {
98	return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
99	}
100
101	bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
102	const unsigned HighPriority = `3`;
103	const unsigned LowPriority = `0`;
104
105	Function &F = MF.getFunction();
106	if (skipFunction(F) \|\| !AMDGPU::isEntryFunctionCC(CC: F.getCallingConv()))
107	return false;
108
109	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110	TII = ST.getInstrInfo();
111
112	unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
113	Attribute A = F.getFnAttribute(Kind: "amdgpu-wave-priority-threshold");
114	if (A.isValid())
115	A.getValueAsString().getAsInteger(Radix: `0`, Result&: VALUInstsThreshold);
116
117	// Find VMEM loads that may be executed before long-enough sequences of
118	// VALU instructions. We currently assume that backedges/loops, branch
119	// probabilities and other details can be ignored, so we essentially
120	// determine the largest number of VALU instructions along every
121	// possible path from the start of the function that may potentially be
122	// executed provided no backedge is ever taken.
123	MBBInfoSet MBBInfos;
124	for (MachineBasicBlock *MBB : post_order(G: &MF)) {
125	bool AtStart = true;
126	unsigned MaxNumVALUInstsInMiddle = `0`;
127	unsigned NumVALUInstsAtEnd = `0`;
128	for (MachineInstr &MI : *MBB) {
129	if (isVMEMLoad(MI)) {
130	AtStart = false;
131	MBBInfo &Info = MBBInfos [MBB];
132	Info.NumVALUInstsAtStart = `0`;
133	MaxNumVALUInstsInMiddle = `0`;
134	NumVALUInstsAtEnd = `0`;
135	Info.LastVMEMLoad = &MI;
136	} else if (SIInstrInfo::isDS(MI)) {
137	AtStart = false;
138	MaxNumVALUInstsInMiddle =
139	std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd);
140	NumVALUInstsAtEnd = `0`;
141	} else if (SIInstrInfo::isVALU(MI)) {
142	if (AtStart)
143	++MBBInfos [MBB].NumVALUInstsAtStart;
144	++NumVALUInstsAtEnd;
145	}
146	}
147
148	bool SuccsMayReachVMEMLoad = false;
149	unsigned NumFollowingVALUInsts = `0`;
150	for (const MachineBasicBlock *Succ : MBB->successors()) {
151	SuccsMayReachVMEMLoad \|= MBBInfos [Succ].MayReachVMEMLoad;
152	NumFollowingVALUInsts =
153	std::max(a: NumFollowingVALUInsts, b: MBBInfos [Succ].NumVALUInstsAtStart);
154	}
155	MBBInfo &Info = MBBInfos [MBB];
156	if (AtStart)
157	Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
158	NumVALUInstsAtEnd += NumFollowingVALUInsts;
159
160	unsigned MaxNumVALUInsts =
161	std::max(a: MaxNumVALUInstsInMiddle, b: NumVALUInstsAtEnd);
162	Info.MayReachVMEMLoad =
163	SuccsMayReachVMEMLoad \|\|
164	(Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
165	}
166
167	MachineBasicBlock &Entry = MF.front();
168	if (!MBBInfos [&Entry].MayReachVMEMLoad)
169	return false;
170
171	// Raise the priority at the beginning of the shader.
172	MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
173	while (I != E && !SIInstrInfo::isVALU(MI: *I) && !I ->isTerminator())
174	++I;
175	BuildSetprioMI(MBB&: Entry, I, priority: HighPriority);
176
177	// Lower the priority on edges where control leaves blocks from which
178	// the VMEM loads are reachable.
179	SmallSet<MachineBasicBlock *, `16`> PriorityLoweringBlocks;
180	for (MachineBasicBlock &MBB : MF) {
181	if (MBBInfos [&MBB].MayReachVMEMLoad) {
182	if (MBB.succ_empty())
183	PriorityLoweringBlocks.insert(Ptr: &MBB);
184	continue;
185	}
186
187	if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
188	for (MachineBasicBlock *Pred : MBB.predecessors()) {
189	if (MBBInfos [Pred].MayReachVMEMLoad)
190	PriorityLoweringBlocks.insert(Ptr: Pred);
191	}
192	continue;
193	}
194
195	// Where lowering the priority in predecessors is not possible, the
196	// block receiving control either was not part of a loop in the first
197	// place or the loop simplification/canonicalization pass should have
198	// already tried to split the edge and insert a preheader, and if for
199	// whatever reason it failed to do so, then this leaves us with the
200	// only option of lowering the priority within the loop.
201	PriorityLoweringBlocks.insert(Ptr: &MBB);
202	}
203
204	for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
205	BuildSetprioMI(
206	MBB&: *MBB,
207	I: MBBInfos [MBB].LastVMEMLoad
208	? std::next(x: MachineBasicBlock::iterator (MBBInfos [MBB].LastVMEMLoad))
209	: MBB->begin(),
210	priority: LowPriority);
211	}
212
213	return true;
214	}
215

source code of llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp