GCNSchedStrategy.h source code [llvm/lib/Target/AMDGPU/GCNSchedStrategy.h]

1	//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -- C++ --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14	#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16	#include "GCNRegPressure.h"
17	#include "llvm/ADT/MapVector.h"
18	#include "llvm/CodeGen/MachineScheduler.h"
19
20	namespace llvm {
21
22	class SIMachineFunctionInfo;
23	class SIRegisterInfo;
24	class GCNSubtarget;
25	class GCNSchedStage;
26
27	enum class GCNSchedStageID : unsigned {
28	OccInitialSchedule = `0`,
29	UnclusteredHighRPReschedule = `1`,
30	ClusteredLowOccupancyReschedule = `2`,
31	PreRARematerialize = `3`,
32	ILPInitialSchedule = `4`
33	};
34
35	#ifndef NDEBUG
36	raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
37	#endif
38
39	/// This is a minimal scheduler strategy. The main difference between this
40	/// and the GenericScheduler is that GCNSchedStrategy uses different
41	/// heuristics to determine excess/critical pressure sets.
42	class GCNSchedStrategy : public GenericScheduler {
43	protected:
44	SUnit pickNodeBidirectional(bool* &IsTopNode);
45
46	void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
47	const RegPressureTracker &RPTracker,
48	SchedCandidate &Cand);
49
50	void initCandidate(SchedCandidate &Cand, SUnit *SU,
51	bool AtTop, const RegPressureTracker &RPTracker,
52	const SIRegisterInfo *SRI,
53	unsigned SGPRPressure, unsigned VGPRPressure);
54
55	std::vector<unsigned> Pressure;
56
57	std::vector<unsigned> MaxPressure;
58
59	unsigned SGPRExcessLimit;
60
61	unsigned VGPRExcessLimit;
62
63	unsigned TargetOccupancy;
64
65	MachineFunction *MF;
66
67	// Scheduling stages for this strategy.
68	SmallVector<GCNSchedStageID, `4`> SchedStages;
69
70	// Pointer to the current SchedStageID.
71	SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
72
73	public:
74	// schedule() have seen register pressure over the critical limits and had to
75	// track register pressure for actual scheduling heuristics.
76	bool HasHighPressure;
77
78	// Schedule known to have excess register pressure. Be more conservative in
79	// increasing ILP and preserving VGPRs.
80	bool KnownExcessRP = false;
81
82	// An error margin is necessary because of poor performance of the generic RP
83	// tracker and can be adjusted up for tuning heuristics to try and more
84	// aggressively reduce register pressure.
85	unsigned ErrorMargin = `3`;
86
87	// Bias for SGPR limits under a high register pressure.
88	const unsigned HighRPSGPRBias = `7`;
89
90	// Bias for VGPR limits under a high register pressure.
91	const unsigned HighRPVGPRBias = `7`;
92
93	unsigned SGPRCriticalLimit;
94
95	unsigned VGPRCriticalLimit;
96
97	unsigned SGPRLimitBias = `0`;
98
99	unsigned VGPRLimitBias = `0`;
100
101	GCNSchedStrategy(const MachineSchedContext *C);
102
103	SUnit pickNode(bool* &IsTopNode) override;
104
105	void initialize(ScheduleDAGMI *DAG) override;
106
107	unsigned getTargetOccupancy() { return TargetOccupancy; }
108
109	void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
110
111	GCNSchedStageID getCurrentStage();
112
113	// Advances stage. Returns true if there are remaining stages.
114	bool advanceStage();
115
116	bool hasNextStage() const;
117
118	GCNSchedStageID getNextStage() const;
119	};
120
121	/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
122	/// maximum number of waves per simd).
123	class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
124	public:
125	GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
126	};
127
128	/// The goal of this scheduling strategy is to maximize ILP for a single wave
129	/// (i.e. latency hiding).
130	class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
131	protected:
132	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
133	SchedBoundary Zone) const* override;
134
135	public:
136	GCNMaxILPSchedStrategy(const MachineSchedContext *C);
137	};
138
139	class ScheduleMetrics {
140	unsigned ScheduleLength;
141	unsigned BubbleCycles;
142
143	public:
144	ScheduleMetrics() {}
145	ScheduleMetrics(unsigned L, unsigned BC)
146	: ScheduleLength(L), BubbleCycles(BC) {}
147	unsigned getLength() const { return ScheduleLength; }
148	unsigned getBubbles() const { return BubbleCycles; }
149	unsigned getMetric() const {
150	unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
151	// Metric is zero if the amount of bubbles is less than 1% which is too
152	// small. So, return 1.
153	return Metric ? Metric : `1`;
154	}
155	static const unsigned ScaleFactor;
156	};
157
158	inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
159	dbgs() << "\n Schedule Metric (scaled by "
160	<< ScheduleMetrics::ScaleFactor
161	<< " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
162	<< Sm.getLength() << " ]\n";
163	return OS;
164	}
165
166	class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
167	friend class GCNSchedStage;
168	friend class OccInitialScheduleStage;
169	friend class UnclusteredHighRPStage;
170	friend class ClusteredLowOccStage;
171	friend class PreRARematStage;
172	friend class ILPInitialScheduleStage;
173
174	const GCNSubtarget &ST;
175
176	SIMachineFunctionInfo &MFI;
177
178	// Occupancy target at the beginning of function scheduling cycle.
179	unsigned StartingOccupancy;
180
181	// Minimal real occupancy recorder for the function.
182	unsigned MinOccupancy;
183
184	// Vector of regions recorder for later rescheduling
185	SmallVector<std::pair<MachineBasicBlock::iterator,
186	MachineBasicBlock::iterator>, `32`> Regions;
187
188	// Records if a region is not yet scheduled, or schedule has been reverted,
189	// or we generally desire to reschedule it.
190	BitVector RescheduleRegions;
191
192	// Record regions with high register pressure.
193	BitVector RegionsWithHighRP;
194
195	// Record regions with excess register pressure over the physical register
196	// limit. Register pressure in these regions usually will result in spilling.
197	BitVector RegionsWithExcessRP;
198
199	// Regions that has the same occupancy as the latest MinOccupancy
200	BitVector RegionsWithMinOcc;
201
202	// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
203	BitVector RegionsWithIGLPInstrs;
204
205	// Region live-in cache.
206	SmallVector<GCNRPTracker::LiveRegSet, `32`> LiveIns;
207
208	// Region pressure cache.
209	SmallVector<GCNRegPressure, `32`> Pressure;
210
211	// Temporary basic block live-in cache.
212	DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
213
214	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
215
216	DenseMap<MachineInstr , GCNRPTracker::LiveRegSet> getBBLiveInMap() const*;
217
218	// Return current region pressure.
219	GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
220
221	// Compute and cache live-ins and pressure for all regions in block.
222	void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
223
224	// Update region boundaries when removing MI or inserting NewMI before MI.
225	void updateRegionBoundaries(
226	SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
227	MachineBasicBlock::iterator>> &RegionBoundaries,
228	MachineBasicBlock::iterator MI, MachineInstr *NewMI,
229	bool Removing = false);
230
231	void runSchedStages();
232
233	std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
234
235	public:
236	GCNScheduleDAGMILive(MachineSchedContext *C,
237	std::unique_ptr<MachineSchedStrategy> S);
238
239	void schedule() override;
240
241	void finalizeSchedule() override;
242	};
243
244	// GCNSchedStrategy applies multiple scheduling stages to a function.
245	class GCNSchedStage {
246	protected:
247	GCNScheduleDAGMILive &DAG;
248
249	GCNSchedStrategy &S;
250
251	MachineFunction &MF;
252
253	SIMachineFunctionInfo &MFI;
254
255	const GCNSubtarget &ST;
256
257	const GCNSchedStageID StageID;
258
259	// The current block being scheduled.
260	MachineBasicBlock CurrentMBB = nullptr*;
261
262	// Current region index.
263	unsigned RegionIdx = `0`;
264
265	// Record the original order of instructions before scheduling.
266	std::vector<MachineInstr *> Unsched;
267
268	// RP before scheduling the current region.
269	GCNRegPressure PressureBefore;
270
271	// RP after scheduling the current region.
272	GCNRegPressure PressureAfter;
273
274	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
275
276	GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
277
278	public:
279	// Initialize state for a scheduling stage. Returns false if the current stage
280	// should be skipped.
281	virtual bool initGCNSchedStage();
282
283	// Finalize state after finishing a scheduling pass on the function.
284	virtual void finalizeGCNSchedStage();
285
286	// Setup for scheduling a region. Returns false if the current region should
287	// be skipped.
288	virtual bool initGCNRegion();
289
290	// Track whether a new region is also a new MBB.
291	void setupNewBlock();
292
293	// Finalize state after scheudling a region.
294	void finalizeGCNRegion();
295
296	// Check result of scheduling.
297	void checkScheduling();
298
299	// computes the given schedule virtual execution time in clocks
300	ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
301	ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
302	unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
303	DenseMap<unsigned, unsigned> &ReadyCycles,
304	const TargetSchedModel &SM);
305
306	// Returns true if scheduling should be reverted.
307	virtual bool shouldRevertScheduling(unsigned WavesAfter);
308
309	// Returns true if current region has known excess pressure.
310	bool isRegionWithExcessRP() const {
311	return DAG.RegionsWithExcessRP [RegionIdx];
312	}
313
314	// Returns true if the new schedule may result in more spilling.
315	bool mayCauseSpilling(unsigned WavesAfter);
316
317	// Attempt to revert scheduling for this region.
318	void revertScheduling();
319
320	void advanceRegion() { RegionIdx++; }
321
322	virtual ~GCNSchedStage() = default;
323	};
324
325	class OccInitialScheduleStage : public GCNSchedStage {
326	public:
327	bool shouldRevertScheduling(unsigned WavesAfter) override;
328
329	OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
330	: GCNSchedStage (StageID, DAG) {}
331	};
332
333	class UnclusteredHighRPStage : public GCNSchedStage {
334	private:
335	// Save the initial occupancy before starting this stage.
336	unsigned InitialOccupancy;
337
338	public:
339	bool initGCNSchedStage() override;
340
341	void finalizeGCNSchedStage() override;
342
343	bool initGCNRegion() override;
344
345	bool shouldRevertScheduling(unsigned WavesAfter) override;
346
347	UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
348	: GCNSchedStage (StageID, DAG) {}
349	};
350
351	// Retry function scheduling if we found resulting occupancy and it is
352	// lower than used for other scheduling passes. This will give more freedom
353	// to schedule low register pressure blocks.
354	class ClusteredLowOccStage : public GCNSchedStage {
355	public:
356	bool initGCNSchedStage() override;
357
358	bool initGCNRegion() override;
359
360	bool shouldRevertScheduling(unsigned WavesAfter) override;
361
362	ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
363	: GCNSchedStage (StageID, DAG) {}
364	};
365
366	class PreRARematStage : public GCNSchedStage {
367	private:
368	// Each region at MinOccupancy will have their own list of trivially
369	// rematerializable instructions we can remat to reduce RP. The list maps an
370	// instruction to the position we should remat before, usually the MI using
371	// the rematerializable instruction.
372	MapVector<unsigned, MapVector<MachineInstr , MachineInstr >>
373	RematerializableInsts;
374
375	// Map a trivially rematerializable def to a list of regions at MinOccupancy
376	// that has the defined reg as a live-in.
377	DenseMap<MachineInstr , SmallVector<unsigned*, `4`>> RematDefToLiveInRegions;
378
379	// Collect all trivially rematerializable VGPR instructions with a single def
380	// and single use outside the defining block into RematerializableInsts.
381	void collectRematerializableInstructions();
382
383	bool isTriviallyReMaterializable(const MachineInstr &MI);
384
385	// TODO: Should also attempt to reduce RP of SGPRs and AGPRs
386	// Attempt to reduce RP of VGPR by sinking trivially rematerializable
387	// instructions. Returns true if we were able to sink instruction(s).
388	bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
389	const TargetInstrInfo *TII);
390
391	public:
392	bool initGCNSchedStage() override;
393
394	bool initGCNRegion() override;
395
396	bool shouldRevertScheduling(unsigned WavesAfter) override;
397
398	PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
399	: GCNSchedStage (StageID, DAG) {}
400	};
401
402	class ILPInitialScheduleStage : public GCNSchedStage {
403	public:
404	bool shouldRevertScheduling(unsigned WavesAfter) override;
405
406	ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
407	: GCNSchedStage (StageID, DAG) {}
408	};
409
410	class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
411	private:
412	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
413
414	bool HasIGLPInstrs = false;
415
416	public:
417	void schedule() override;
418
419	void finalizeSchedule() override;
420
421	GCNPostScheduleDAGMILive(MachineSchedContext *C,
422	std::unique_ptr<MachineSchedStrategy> S,
423	bool RemoveKillFlags);
424	};
425
426	} // End namespace llvm
427
428	#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
429

source code of llvm/lib/Target/AMDGPU/GCNSchedStrategy.h