GCNSchedStrategy.cpp source code [llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp]

1	//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file
10	/// This contains a MachineSchedStrategy implementation for maximizing wave
11	/// occupancy on GCN hardware.
12	///
13	/// This pass will apply multiple scheduling stages to the same function.
14	/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15	/// entry point for the scheduling of those regions is
16	/// GCNScheduleDAGMILive::runSchedStages.
17
18	/// Generally, the reason for having multiple scheduling stages is to account
19	/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20	/// few scheduling regions will have register pressure high enough to limit
21	/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22	/// other regions.
23	///
24	//===----------------------------------------------------------------------===//
25
26	#include "GCNSchedStrategy.h"
27	#include "AMDGPUIGroupLP.h"
28	#include "SIMachineFunctionInfo.h"
29	#include "llvm/CodeGen/RegisterClassInfo.h"
30
31	#define DEBUG_TYPE "machine-scheduler"
32
33	using namespace llvm;
34
35	static cl::opt<bool> DisableUnclusterHighRP(
36	"amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37	cl::desc ("Disable unclustered high register pressure "
38	"reduction scheduling stage."),
39	cl::init(Val: false));
40
41	static cl::opt<bool> DisableClusteredLowOccupancy(
42	"amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43	cl::desc ("Disable clustered low occupancy "
44	"rescheduling for ILP scheduling stage."),
45	cl::init(Val: false));
46
47	static cl::opt<unsigned> ScheduleMetricBias(
48	"amdgpu-schedule-metric-bias", cl::Hidden,
49	cl::desc (
50	"Sets the bias which adds weight to occupancy vs latency. Set it to "
51	"100 to chase the occupancy only."),
52	cl::init(Val: `10`));
53
54	static cl::opt<bool>
55	RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56	cl::desc ("Relax occupancy targets for kernels which are memory "
57	"bound (amdgpu-membound-threshold), or "
58	"Wave Limited (amdgpu-limit-wave-threshold)."),
59	cl::init(Val: false));
60
61	const unsigned ScheduleMetrics::ScaleFactor = `100`;
62
63	GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
64	: GenericScheduler (C), TargetOccupancy(`0`), MF(nullptr),
65	HasHighPressure(false) {}
66
67	void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
68	GenericScheduler::initialize(dag: DAG);
69
70	MF = &DAG->MF;
71
72	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
73
74	SGPRExcessLimit =
75	Context->RegClassInfo->getNumAllocatableRegs(RC: &AMDGPU::SGPR_32RegClass);
76	VGPRExcessLimit =
77	Context->RegClassInfo->getNumAllocatableRegs(RC: &AMDGPU::VGPR_32RegClass);
78
79	SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
80	// Set the initial TargetOccupnacy to the maximum occupancy that we can
81	// achieve for this function. This effectively sets a lower bound on the
82	// 'Critical' register limits in the scheduler.
83	// Allow for lower occupancy targets if kernel is wave limited or memory
84	// bound, and using the relaxed occupancy feature.
85	TargetOccupancy =
86	RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();
87	SGPRCriticalLimit =
88	std::min(a: ST.getMaxNumSGPRs(WavesPerEU: TargetOccupancy, Addressable: true), b: SGPRExcessLimit);
89
90	if (!KnownExcessRP) {
91	VGPRCriticalLimit =
92	std::min(a: ST.getMaxNumVGPRs(WavesPerEU: TargetOccupancy), b: VGPRExcessLimit);
93	} else {
94	// This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95	// returns a reasonably small number for targets with lots of VGPRs, such
96	// as GFX10 and GFX11.
97	LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98	"VGPRCriticalLimit calculation method.\n");
99
100	unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101	unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102	unsigned VGPRBudget = alignDown(Value: Addressable / TargetOccupancy, Align: Granule);
103	VGPRBudget = std::max(a: VGPRBudget, b: Granule);
104	VGPRCriticalLimit = std::min(a: VGPRBudget, b: VGPRExcessLimit);
105	}
106
107	// Subtract error margin and bias from register limits and avoid overflow.
108	SGPRCriticalLimit -= std::min(a: SGPRLimitBias + ErrorMargin, b: SGPRCriticalLimit);
109	VGPRCriticalLimit -= std::min(a: VGPRLimitBias + ErrorMargin, b: VGPRCriticalLimit);
110	SGPRExcessLimit -= std::min(a: SGPRLimitBias + ErrorMargin, b: SGPRExcessLimit);
111	VGPRExcessLimit -= std::min(a: VGPRLimitBias + ErrorMargin, b: VGPRExcessLimit);
112
113	LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114	<< ", VGPRExcessLimit = " << VGPRExcessLimit
115	<< ", SGPRCriticalLimit = " << SGPRCriticalLimit
116	<< ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117	}
118
119	void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
120	bool AtTop,
121	const RegPressureTracker &RPTracker,
122	const SIRegisterInfo *SRI,
123	unsigned SGPRPressure,
124	unsigned VGPRPressure) {
125	Cand.SU = SU;
126	Cand.AtTop = AtTop;
127
128	if (!DAG->isTrackingPressure())
129	return;
130
131	// getDownwardPressure() and getUpwardPressure() make temporary changes to
132	// the tracker, so we need to pass those function a non-const copy.
133	RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
134
135	Pressure.clear();
136	MaxPressure.clear();
137
138	if (AtTop)
139	TempTracker.getDownwardPressure(MI: SU->getInstr(), PressureResult&: Pressure, MaxPressureResult&: MaxPressure);
140	else {
141	// FIXME: I think for bottom up scheduling, the register pressure is cached
142	// and can be retrieved by DAG->getPressureDif(SU).
143	TempTracker.getUpwardPressure(MI: SU->getInstr(), PressureResult&: Pressure, MaxPressureResult&: MaxPressure);
144	}
145
146	unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
147	unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
148
149	// If two instructions increase the pressure of different register sets
150	// by the same amount, the generic scheduler will prefer to schedule the
151	// instruction that increases the set with the least amount of registers,
152	// which in our case would be SGPRs. This is rarely what we want, so
153	// when we report excess/critical register pressure, we do it either
154	// only for VGPRs or only for SGPRs.
155
156	// FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
157	const unsigned MaxVGPRPressureInc = `16`;
158	bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159	bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160
161
162	// FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163	// to increase the likelihood we don't go over the limits. We should improve
164	// the analysis to look through dependencies to find the path with the least
165	// register pressure.
166
167	// We only need to update the RPDelta for instructions that increase register
168	// pressure. Instructions that decrease or keep reg pressure the same will be
169	// marked as RegExcess in tryCandidate() when they are compared with
170	// instructions that increase the register pressure.
171	if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
172	HasHighPressure = true;
173	Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
174	Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
175	}
176
177	if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
178	HasHighPressure = true;
179	Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180	Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
181	}
182
183	// Register pressure is considered 'CRITICAL' if it is approaching a value
184	// that would reduce the wave occupancy for the execution unit. When
185	// register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
186	// has the same cost, so we don't need to prefer one over the other.
187
188	int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
189	int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
190
191	if (SGPRDelta >= `0` \|\| VGPRDelta >= `0`) {
192	HasHighPressure = true;
193	if (SGPRDelta > VGPRDelta) {
194	Cand.RPDelta.CriticalMax =
195	PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
196	Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
197	} else {
198	Cand.RPDelta.CriticalMax =
199	PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
200	Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
201	}
202	}
203	}
204
205	// This function is mostly cut and pasted from
206	// GenericScheduler::pickNodeFromQueue()
207	void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
208	const CandPolicy &ZonePolicy,
209	const RegPressureTracker &RPTracker,
210	SchedCandidate &Cand) {
211	const SIRegisterInfo SRI = static_cast<const* SIRegisterInfo*>(TRI);
212	ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
213	unsigned SGPRPressure = `0`;
214	unsigned VGPRPressure = `0`;
215	if (DAG->isTrackingPressure()) {
216	SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
217	VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
218	}
219	ReadyQueue &Q = Zone.Available;
220	for (SUnit *SU : Q) {
221
222	SchedCandidate TryCand(ZonePolicy);
223	initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, SRI,
224	SGPRPressure, VGPRPressure);
225	// Pass SchedBoundary only when comparing nodes from the same boundary.
226	SchedBoundary ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr*;
227	tryCandidate(Cand, TryCand, Zone: ZoneArg);
228	if (TryCand.Reason != NoCand) {
229	// Initialize resource delta if needed in case future heuristics query it.
230	if (TryCand.ResDelta == SchedResourceDelta ())
231	TryCand.initResourceDelta(DAG: Zone.DAG, SchedModel);
232	Cand.setBest(TryCand);
233	LLVM_DEBUG(traceCandidate(Cand));
234	}
235	}
236	}
237
238	// This function is mostly cut and pasted from
239	// GenericScheduler::pickNodeBidirectional()
240	SUnit GCNSchedStrategy::pickNodeBidirectional(bool* &IsTopNode) {
241	// Schedule as far as possible in the direction of no choice. This is most
242	// efficient, but also provides the best heuristics for CriticalPSets.
243	if (SUnit *SU = Bot.pickOnlyChoice()) {
244	IsTopNode = false;
245	return SU;
246	}
247	if (SUnit *SU = Top.pickOnlyChoice()) {
248	IsTopNode = true;
249	return SU;
250	}
251	// Set the bottom-up policy based on the state of the current bottom zone and
252	// the instructions outside the zone, including the top zone.
253	CandPolicy BotPolicy;
254	setPolicy(Policy&: BotPolicy, /IsPostRA=/false, CurrZone&: Bot, OtherZone: &Top);
255	// Set the top-down policy based on the state of the current top zone and
256	// the instructions outside the zone, including the bottom zone.
257	CandPolicy TopPolicy;
258	setPolicy(Policy&: TopPolicy, /IsPostRA=/false, CurrZone&: Top, OtherZone: &Bot);
259
260	// See if BotCand is still valid (because we previously scheduled from Top).
261	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
262	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
263	BotCand.Policy != BotPolicy) {
264	BotCand.reset(NewPolicy: CandPolicy ());
265	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
266	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
267	} else {
268	LLVM_DEBUG(traceCandidate(BotCand));
269	#ifndef NDEBUG
270	if (VerifyScheduling) {
271	SchedCandidate TCand;
272	TCand.reset(NewPolicy: CandPolicy ());
273	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: TCand);
274	assert(TCand.SU == BotCand.SU &&
275	"Last pick result should correspond to re-picking right now");
276	}
277	#endif
278	}
279
280	// Check if the top Q has a better candidate.
281	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
282	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
283	TopCand.Policy != TopPolicy) {
284	TopCand.reset(NewPolicy: CandPolicy ());
285	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
286	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
287	} else {
288	LLVM_DEBUG(traceCandidate(TopCand));
289	#ifndef NDEBUG
290	if (VerifyScheduling) {
291	SchedCandidate TCand;
292	TCand.reset(NewPolicy: CandPolicy ());
293	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TCand);
294	assert(TCand.SU == TopCand.SU &&
295	"Last pick result should correspond to re-picking right now");
296	}
297	#endif
298	}
299
300	// Pick best from BotCand and TopCand.
301	LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
302	dbgs() << "Bot Cand: "; traceCandidate(BotCand););
303	SchedCandidate Cand = BotCand;
304	TopCand.Reason = NoCand;
305	tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr);
306	if (TopCand.Reason != NoCand) {
307	Cand.setBest(TopCand);
308	}
309	LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
310
311	IsTopNode = Cand.AtTop;
312	return Cand.SU;
313	}
314
315	// This function is mostly cut and pasted from
316	// GenericScheduler::pickNode()
317	SUnit GCNSchedStrategy::pickNode(bool* &IsTopNode) {
318	if (DAG->top() == DAG->bottom()) {
319	assert(Top.Available.empty() && Top.Pending.empty() &&
320	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
321	return nullptr;
322	}
323	SUnit *SU;
324	do {
325	if (RegionPolicy.OnlyTopDown) {
326	SU = Top.pickOnlyChoice();
327	if (!SU) {
328	CandPolicy NoPolicy;
329	TopCand.reset(NewPolicy: NoPolicy);
330	pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
331	assert(TopCand.Reason != NoCand && "failed to find a candidate");
332	SU = TopCand.SU;
333	}
334	IsTopNode = true;
335	} else if (RegionPolicy.OnlyBottomUp) {
336	SU = Bot.pickOnlyChoice();
337	if (!SU) {
338	CandPolicy NoPolicy;
339	BotCand.reset(NewPolicy: NoPolicy);
340	pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
341	assert(BotCand.Reason != NoCand && "failed to find a candidate");
342	SU = BotCand.SU;
343	}
344	IsTopNode = false;
345	} else {
346	SU = pickNodeBidirectional(IsTopNode);
347	}
348	} while (SU->isScheduled);
349
350	if (SU->isTopReady())
351	Top.removeReady(SU);
352	if (SU->isBottomReady())
353	Bot.removeReady(SU);
354
355	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
356	<< *SU->getInstr());
357	return SU;
358	}
359
360	GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
361	assert(CurrentStage && CurrentStage != SchedStages.end());
362	return *CurrentStage;
363	}
364
365	bool GCNSchedStrategy::advanceStage() {
366	assert(CurrentStage != SchedStages.end());
367	if (!CurrentStage)
368	CurrentStage = SchedStages.begin();
369	else
370	CurrentStage++;
371
372	return CurrentStage != SchedStages.end();
373	}
374
375	bool GCNSchedStrategy::hasNextStage() const {
376	assert(CurrentStage);
377	return std::next(x: CurrentStage) != SchedStages.end();
378	}
379
380	GCNSchedStageID GCNSchedStrategy::getNextStage() const {
381	assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
382	return *std::next(x: CurrentStage);
383	}
384
385	GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
386	const MachineSchedContext *C)
387	: GCNSchedStrategy (C) {
388	SchedStages.push_back(Elt: GCNSchedStageID::OccInitialSchedule);
389	SchedStages.push_back(Elt: GCNSchedStageID::UnclusteredHighRPReschedule);
390	SchedStages.push_back(Elt: GCNSchedStageID::ClusteredLowOccupancyReschedule);
391	SchedStages.push_back(Elt: GCNSchedStageID::PreRARematerialize);
392	}
393
394	GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
395	: GCNSchedStrategy (C) {
396	SchedStages.push_back(Elt: GCNSchedStageID::ILPInitialSchedule);
397	}
398
399	bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
400	SchedCandidate &TryCand,
401	SchedBoundary Zone) const* {
402	// Initialize the candidate if needed.
403	if (!Cand.isValid()) {
404	TryCand.Reason = NodeOrder;
405	return true;
406	}
407
408	// Avoid spilling by exceeding the register limit.
409	if (DAG->isTrackingPressure() &&
410	tryPressure(TryP: TryCand.RPDelta.Excess, CandP: Cand.RPDelta.Excess, TryCand, Cand,
411	Reason: RegExcess, TRI, MF: DAG->MF))
412	return TryCand.Reason != NoCand;
413
414	// Bias PhysReg Defs and copies to their uses and defined respectively.
415	if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
416	CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
417	return TryCand.Reason != NoCand;
418
419	bool SameBoundary = Zone != nullptr;
420	if (SameBoundary) {
421	// Prioritize instructions that read unbuffered resources by stall cycles.
422	if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
423	CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
424	return TryCand.Reason != NoCand;
425
426	// Avoid critical resource consumption and balance the schedule.
427	TryCand.initResourceDelta(DAG, SchedModel);
428	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
429	TryCand, Cand, Reason: ResourceReduce))
430	return TryCand.Reason != NoCand;
431	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
432	CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand,
433	Reason: ResourceDemand))
434	return TryCand.Reason != NoCand;
435
436	// Unconditionally try to reduce latency.
437	if (tryLatency(TryCand, Cand, Zone&: *Zone))
438	return TryCand.Reason != NoCand;
439
440	// Weak edges are for clustering and other constraints.
441	if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
442	CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: Weak))
443	return TryCand.Reason != NoCand;
444	}
445
446	// Keep clustered nodes together to encourage downstream peephole
447	// optimizations which may reduce resource requirements.
448	//
449	// This is a best effort to set things up for a post-RA pass. Optimizations
450	// like generating loads of multiple registers should ideally be done within
451	// the scheduler pass by combining the loads during DAG postprocessing.
452	const SUnit *CandNextClusterSU =
453	Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
454	const SUnit *TryCandNextClusterSU =
455	TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
456	if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU,
457	CandVal: Cand.SU == CandNextClusterSU, TryCand, Cand, Reason: Cluster))
458	return TryCand.Reason != NoCand;
459
460	// Avoid increasing the max critical pressure in the scheduled region.
461	if (DAG->isTrackingPressure() &&
462	tryPressure(TryP: TryCand.RPDelta.CriticalMax, CandP: Cand.RPDelta.CriticalMax,
463	TryCand, Cand, Reason: RegCritical, TRI, MF: DAG->MF))
464	return TryCand.Reason != NoCand;
465
466	// Avoid increasing the max pressure of the entire region.
467	if (DAG->isTrackingPressure() &&
468	tryPressure(TryP: TryCand.RPDelta.CurrentMax, CandP: Cand.RPDelta.CurrentMax, TryCand,
469	Cand, Reason: RegMax, TRI, MF: DAG->MF))
470	return TryCand.Reason != NoCand;
471
472	if (SameBoundary) {
473	// Fall through to original instruction order.
474	if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) \|\|
475	(!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
476	TryCand.Reason = NodeOrder;
477	return true;
478	}
479	}
480	return false;
481	}
482
483	GCNScheduleDAGMILive::GCNScheduleDAGMILive(
484	MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
485	: ScheduleDAGMILive (C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
486	MFI(*MF.getInfo<SIMachineFunctionInfo>()),
487	StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
488
489	LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
490	if (RelaxedOcc) {
491	MinOccupancy = std::min(a: MFI.getMinAllowedOccupancy(), b: StartingOccupancy);
492	if (MinOccupancy != StartingOccupancy)
493	LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
494	<< ".\n");
495	}
496	}
497
498	std::unique_ptr<GCNSchedStage>
499	GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
500	switch (SchedStageID) {
501	case GCNSchedStageID::OccInitialSchedule:
502	return std::make_unique<OccInitialScheduleStage>(args&: SchedStageID, args&: *this);
503	case GCNSchedStageID::UnclusteredHighRPReschedule:
504	return std::make_unique<UnclusteredHighRPStage>(args&: SchedStageID, args&: *this);
505	case GCNSchedStageID::ClusteredLowOccupancyReschedule:
506	return std::make_unique<ClusteredLowOccStage>(args&: SchedStageID, args&: *this);
507	case GCNSchedStageID::PreRARematerialize:
508	return std::make_unique<PreRARematStage>(args&: SchedStageID, args&: *this);
509	case GCNSchedStageID::ILPInitialSchedule:
510	return std::make_unique<ILPInitialScheduleStage>(args&: SchedStageID, args&: *this);
511	}
512
513	llvm_unreachable("Unknown SchedStageID.");
514	}
515
516	void GCNScheduleDAGMILive::schedule() {
517	// Collect all scheduling regions. The actual scheduling is performed in
518	// GCNScheduleDAGMILive::finalizeSchedule.
519	Regions.push_back(Elt: std::pair(RegionBegin, RegionEnd));
520	}
521
522	GCNRegPressure
523	GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
524	GCNDownwardRPTracker RPTracker(*LIS);
525	RPTracker.advance(Begin: begin(), End: end(), LiveRegsCopy: &LiveIns [RegionIdx]);
526	return RPTracker.moveMaxPressure();
527	}
528
529	void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
530	const MachineBasicBlock *MBB) {
531	GCNDownwardRPTracker RPTracker(*LIS);
532
533	// If the block has the only successor then live-ins of that successor are
534	// live-outs of the current block. We can reuse calculated live set if the
535	// successor will be sent to scheduling past current block.
536
537	// However, due to the bug in LiveInterval analysis it may happen that two
538	// predecessors of the same successor block have different lane bitmasks for
539	// a live-out register. Workaround that by sticking to one-to-one relationship
540	// i.e. one predecessor with one successor block.
541	const MachineBasicBlock OnlySucc = nullptr*;
542	if (MBB->succ_size() == `1`) {
543	auto Candidate = MBB->succ_begin();
544	if (!Candidate->empty() && Candidate->pred_size() == `1`) {
545	SlotIndexes *Ind = LIS->getSlotIndexes();
546	if (Ind->getMBBStartIdx(mbb: MBB) < Ind->getMBBStartIdx(mbb: Candidate))
547	OnlySucc = Candidate;
548	}
549	}
550
551	// Scheduler sends regions from the end of the block upwards.
552	size_t CurRegion = RegionIdx;
553	for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
554	if (Regions [CurRegion].first ->getParent() != MBB)
555	break;
556	--CurRegion;
557
558	auto I = MBB->begin();
559	auto LiveInIt = MBBLiveIns.find(Val: MBB);
560	auto &Rgn = Regions [CurRegion];
561	auto NonDbgMI = &skipDebugInstructionsForward(It: Rgn.first, End: Rgn.second);
562	if (LiveInIt != MBBLiveIns.end()) {
563	auto LiveIn = std::move(LiveInIt ->second);
564	RPTracker.reset(MI: *MBB->begin(), LiveRegs: &LiveIn);
565	MBBLiveIns.erase(I: LiveInIt);
566	} else {
567	I = Rgn.first;
568	auto LRS = BBLiveInMap.lookup(Val: NonDbgMI);
569	#ifdef EXPENSIVE_CHECKS
570	assert(isEqual(getLiveRegsBefore(NonDbgMI, LIS), LRS));
571	#endif
572	RPTracker.reset(MI: *I, LiveRegs: &LRS);
573	}
574
575	for (;;) {
576	I = RPTracker.getNext();
577
578	if (Regions [CurRegion].first == I \|\| NonDbgMI == I) {
579	LiveIns [CurRegion] = RPTracker.getLiveRegs();
580	RPTracker.clearMaxPressure();
581	}
582
583	if (Regions [CurRegion].second == I) {
584	Pressure [CurRegion] = RPTracker.moveMaxPressure();
585	if (CurRegion-- == RegionIdx)
586	break;
587	}
588	RPTracker.advanceToNext();
589	RPTracker.advanceBeforeNext();
590	}
591
592	if (OnlySucc) {
593	if (I != MBB->end()) {
594	RPTracker.advanceToNext();
595	RPTracker.advance(End: MBB->end());
596	}
597	RPTracker.advanceBeforeNext();
598	MBBLiveIns [OnlySucc] = RPTracker.moveLiveRegs();
599	}
600	}
601
602	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
603	GCNScheduleDAGMILive::getBBLiveInMap() const {
604	assert(!Regions.empty());
605	std::vector<MachineInstr *> BBStarters;
606	BBStarters.reserve(n: Regions.size());
607	auto I = Regions.rbegin(), E = Regions.rend();
608	auto *BB = I ->first ->getParent();
609	do {
610	auto MI = &skipDebugInstructionsForward(It: I ->first, End: I ->second);
611	BBStarters.push_back(x: MI);
612	do {
613	++I;
614	} while (I != E && I ->first ->getParent() == BB);
615	} while (I != E);
616	return getLiveRegMap(R&: BBStarters, After: false /After/, LIS&: *LIS);
617	}
618
619	void GCNScheduleDAGMILive::finalizeSchedule() {
620	// Start actual scheduling here. This function is called by the base
621	// MachineScheduler after all regions have been recorded by
622	// GCNScheduleDAGMILive::schedule().
623	LiveIns.resize(N: Regions.size());
624	Pressure.resize(N: Regions.size());
625	RescheduleRegions.resize(N: Regions.size());
626	RegionsWithHighRP.resize(N: Regions.size());
627	RegionsWithExcessRP.resize(N: Regions.size());
628	RegionsWithMinOcc.resize(N: Regions.size());
629	RegionsWithIGLPInstrs.resize(N: Regions.size());
630	RescheduleRegions.set();
631	RegionsWithHighRP.reset();
632	RegionsWithExcessRP.reset();
633	RegionsWithMinOcc.reset();
634	RegionsWithIGLPInstrs.reset();
635
636	runSchedStages();
637	}
638
639	void GCNScheduleDAGMILive::runSchedStages() {
640	LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
641
642	if (!Regions.empty())
643	BBLiveInMap = getBBLiveInMap();
644
645	GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
646	while (S.advanceStage()) {
647	auto Stage = createSchedStage(SchedStageID: S.getCurrentStage());
648	if (!Stage ->initGCNSchedStage())
649	continue;
650
651	for (auto Region : Regions) {
652	RegionBegin = Region.first;
653	RegionEnd = Region.second;
654	// Setup for scheduling the region and check whether it should be skipped.
655	if (!Stage ->initGCNRegion()) {
656	Stage ->advanceRegion();
657	exitRegion();
658	continue;
659	}
660
661	ScheduleDAGMILive::schedule();
662	Stage ->finalizeGCNRegion();
663	}
664
665	Stage ->finalizeGCNSchedStage();
666	}
667	}
668
669	#ifndef NDEBUG
670	raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
671	switch (StageID) {
672	case GCNSchedStageID::OccInitialSchedule:
673	OS << "Max Occupancy Initial Schedule";
674	break;
675	case GCNSchedStageID::UnclusteredHighRPReschedule:
676	OS << "Unclustered High Register Pressure Reschedule";
677	break;
678	case GCNSchedStageID::ClusteredLowOccupancyReschedule:
679	OS << "Clustered Low Occupancy Reschedule";
680	break;
681	case GCNSchedStageID::PreRARematerialize:
682	OS << "Pre-RA Rematerialize";
683	break;
684	case GCNSchedStageID::ILPInitialSchedule:
685	OS << "Max ILP Initial Schedule";
686	break;
687	}
688
689	return OS;
690	}
691	#endif
692
693	GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
694	: DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
695	MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
696
697	bool GCNSchedStage::initGCNSchedStage() {
698	if (!DAG.LIS)
699	return false;
700
701	LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
702	return true;
703	}
704
705	bool UnclusteredHighRPStage::initGCNSchedStage() {
706	if (DisableUnclusterHighRP)
707	return false;
708
709	if (!GCNSchedStage::initGCNSchedStage())
710	return false;
711
712	if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
713	return false;
714
715	SavedMutations.swap(x&: DAG.Mutations);
716	DAG.addMutation(
717	Mutation: createIGroupLPDAGMutation(Phase: AMDGPU::SchedulingPhase::PreRAReentry));
718
719	InitialOccupancy = DAG.MinOccupancy;
720	// Aggressivly try to reduce register pressure in the unclustered high RP
721	// stage. Temporarily increase occupancy target in the region.
722	S.SGPRLimitBias = S.HighRPSGPRBias;
723	S.VGPRLimitBias = S.HighRPVGPRBias;
724	if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
725	MFI.increaseOccupancy(MF, Limit: ++DAG.MinOccupancy);
726
727	LLVM_DEBUG(
728	dbgs()
729	<< "Retrying function scheduling without clustering. "
730	"Aggressivly try to reduce register pressure to achieve occupancy "
731	<< DAG.MinOccupancy << ".\n");
732
733	return true;
734	}
735
736	bool ClusteredLowOccStage::initGCNSchedStage() {
737	if (DisableClusteredLowOccupancy)
738	return false;
739
740	if (!GCNSchedStage::initGCNSchedStage())
741	return false;
742
743	// Don't bother trying to improve ILP in lower RP regions if occupancy has not
744	// been dropped. All regions will have already been scheduled with the ideal
745	// occupancy targets.
746	if (DAG.StartingOccupancy <= DAG.MinOccupancy)
747	return false;
748
749	LLVM_DEBUG(
750	dbgs() << "Retrying function scheduling with lowest recorded occupancy "
751	<< DAG.MinOccupancy << ".\n");
752	return true;
753	}
754
755	bool PreRARematStage::initGCNSchedStage() {
756	if (!GCNSchedStage::initGCNSchedStage())
757	return false;
758
759	if (DAG.RegionsWithMinOcc.none() \|\| DAG.Regions.size() == `1`)
760	return false;
761
762	const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
763	// Check maximum occupancy
764	if (ST.computeOccupancy(F: MF.getFunction(), LDSSize: MFI.getLDSSize()) ==
765	DAG.MinOccupancy)
766	return false;
767
768	// FIXME: This pass will invalidate cached MBBLiveIns for regions
769	// inbetween the defs and region we sinked the def to. Cached pressure
770	// for regions where a def is sinked from will also be invalidated. Will
771	// need to be fixed if there is another pass after this pass.
772	assert(!S.hasNextStage());
773
774	collectRematerializableInstructions();
775	if (RematerializableInsts.empty() \|\| !sinkTriviallyRematInsts(ST, TII))
776	return false;
777
778	LLVM_DEBUG(
779	dbgs() << "Retrying function scheduling with improved occupancy of "
780	<< DAG.MinOccupancy << " from rematerializing\n");
781	return true;
782	}
783
784	void GCNSchedStage::finalizeGCNSchedStage() {
785	DAG.finishBlock();
786	LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
787	}
788
789	void UnclusteredHighRPStage::finalizeGCNSchedStage() {
790	SavedMutations.swap(x&: DAG.Mutations);
791	S.SGPRLimitBias = S.VGPRLimitBias = `0`;
792	if (DAG.MinOccupancy > InitialOccupancy) {
793	for (unsigned IDX = `0`; IDX < DAG.Pressure.size(); ++IDX)
794	DAG.RegionsWithMinOcc [IDX] =
795	DAG.Pressure [IDX].getOccupancy(ST: DAG.ST) == DAG.MinOccupancy;
796
797	LLVM_DEBUG(dbgs() << StageID
798	<< " stage successfully increased occupancy to "
799	<< DAG.MinOccupancy << `'\n'`);
800	}
801
802	GCNSchedStage::finalizeGCNSchedStage();
803	}
804
805	bool GCNSchedStage::initGCNRegion() {
806	// Check whether this new region is also a new block.
807	if (DAG.RegionBegin ->getParent() != CurrentMBB)
808	setupNewBlock();
809
810	unsigned NumRegionInstrs = std::distance(first: DAG.begin(), last: DAG.end());
811	DAG.enterRegion(bb: CurrentMBB, begin: DAG.begin(), end: DAG.end(), regioninstrs: NumRegionInstrs);
812
813	// Skip empty scheduling regions (0 or 1 schedulable instructions).
814	if (DAG.begin() == DAG.end() \|\| DAG.begin() == std::prev(x: DAG.end()))
815	return false;
816
817	LLVM_DEBUG(dbgs() << "******** MI Scheduling ********\n");
818	LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
819	<< " " << CurrentMBB->getName()
820	<< "\n From: " << *DAG.begin() << " To: ";
821	if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
822	else dbgs() << "End";
823	dbgs() << " RegionInstrs: " << NumRegionInstrs << `'\n'`);
824
825	// Save original instruction order before scheduling for possible revert.
826	Unsched.clear();
827	Unsched.reserve(n: DAG.NumRegionInstrs);
828	if (StageID == GCNSchedStageID::OccInitialSchedule \|\|
829	StageID == GCNSchedStageID::ILPInitialSchedule) {
830	for (auto &I : DAG) {
831	Unsched.push_back(x: &I);
832	if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER \|\|
833	I.getOpcode() == AMDGPU::IGLP_OPT)
834	DAG.RegionsWithIGLPInstrs [RegionIdx] = true;
835	}
836	} else {
837	for (auto &I : DAG)
838	Unsched.push_back(x: &I);
839	}
840
841	PressureBefore = DAG.Pressure [RegionIdx];
842
843	LLVM_DEBUG(
844	dbgs() << "Pressure before scheduling:\nRegion live-ins:"
845	<< print(DAG.LiveIns[RegionIdx], DAG.MRI)
846	<< "Region live-in pressure: "
847	<< print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
848	<< "Region register pressure: " << print(PressureBefore));
849
850	S.HasHighPressure = false;
851	S.KnownExcessRP = isRegionWithExcessRP();
852
853	if (DAG.RegionsWithIGLPInstrs [RegionIdx] &&
854	StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
855	SavedMutations.clear();
856	SavedMutations.swap(x&: DAG.Mutations);
857	bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule \|\|
858	StageID == GCNSchedStageID::ILPInitialSchedule;
859	DAG.addMutation(Mutation: createIGroupLPDAGMutation(
860	Phase: IsInitialStage ? AMDGPU::SchedulingPhase::Initial
861	: AMDGPU::SchedulingPhase::PreRAReentry));
862	}
863
864	return true;
865	}
866
867	bool UnclusteredHighRPStage::initGCNRegion() {
868	// Only reschedule regions with the minimum occupancy or regions that may have
869	// spilling (excess register pressure).
870	if ((!DAG.RegionsWithMinOcc [RegionIdx] \|\|
871	DAG.MinOccupancy <= InitialOccupancy) &&
872	!DAG.RegionsWithExcessRP [RegionIdx])
873	return false;
874
875	return GCNSchedStage::initGCNRegion();
876	}
877
878	bool ClusteredLowOccStage::initGCNRegion() {
879	// We may need to reschedule this region if it wasn't rescheduled in the last
880	// stage, or if we found it was testing critical register pressure limits in
881	// the unclustered reschedule stage. The later is because we may not have been
882	// able to raise the min occupancy in the previous stage so the region may be
883	// overly constrained even if it was already rescheduled.
884	if (!DAG.RegionsWithHighRP [RegionIdx])
885	return false;
886
887	return GCNSchedStage::initGCNRegion();
888	}
889
890	bool PreRARematStage::initGCNRegion() {
891	if (!DAG.RescheduleRegions [RegionIdx])
892	return false;
893
894	return GCNSchedStage::initGCNRegion();
895	}
896
897	void GCNSchedStage::setupNewBlock() {
898	if (CurrentMBB)
899	DAG.finishBlock();
900
901	CurrentMBB = DAG.RegionBegin ->getParent();
902	DAG.startBlock(bb: CurrentMBB);
903	// Get real RP for the region if it hasn't be calculated before. After the
904	// initial schedule stage real RP will be collected after scheduling.
905	if (StageID == GCNSchedStageID::OccInitialSchedule \|\|
906	StageID == GCNSchedStageID::ILPInitialSchedule)
907	DAG.computeBlockPressure(RegionIdx, MBB: CurrentMBB);
908	}
909
910	void GCNSchedStage::finalizeGCNRegion() {
911	DAG.Regions [RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
912	DAG.RescheduleRegions [RegionIdx] = false;
913	if (S.HasHighPressure)
914	DAG.RegionsWithHighRP [RegionIdx] = true;
915
916	// Revert scheduling if we have dropped occupancy or there is some other
917	// reason that the original schedule is better.
918	checkScheduling();
919
920	if (DAG.RegionsWithIGLPInstrs [RegionIdx] &&
921	StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
922	SavedMutations.swap(x&: DAG.Mutations);
923
924	DAG.exitRegion();
925	RegionIdx++;
926	}
927
928	void GCNSchedStage::checkScheduling() {
929	// Check the results of scheduling.
930	PressureAfter = DAG.getRealRegPressure(RegionIdx);
931	LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
932	LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
933
934	if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
935	PressureAfter.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
936	DAG.Pressure [RegionIdx] = PressureAfter;
937	DAG.RegionsWithMinOcc [RegionIdx] =
938	PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
939
940	// Early out if we have achieved the occupancy target.
941	LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
942	return;
943	}
944
945	unsigned TargetOccupancy =
946	std::min(a: S.getTargetOccupancy(), b: ST.getOccupancyWithLocalMemSize(MF));
947	unsigned WavesAfter =
948	std::min(a: TargetOccupancy, b: PressureAfter.getOccupancy(ST));
949	unsigned WavesBefore =
950	std::min(a: TargetOccupancy, b: PressureBefore.getOccupancy(ST));
951	LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
952	<< ", after " << WavesAfter << ".\n");
953
954	// We may not be able to keep the current target occupancy because of the just
955	// scheduled region. We might still be able to revert scheduling if the
956	// occupancy before was higher, or if the current schedule has register
957	// pressure higher than the excess limits which could lead to more spilling.
958	unsigned NewOccupancy = std::max(a: WavesAfter, b: WavesBefore);
959
960	// Allow memory bound functions to drop to 4 waves if not limited by an
961	// attribute.
962	if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
963	WavesAfter >= MFI.getMinAllowedOccupancy()) {
964	LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
965	<< MFI.getMinAllowedOccupancy() << " waves\n");
966	NewOccupancy = WavesAfter;
967	}
968
969	if (NewOccupancy < DAG.MinOccupancy) {
970	DAG.MinOccupancy = NewOccupancy;
971	MFI.limitOccupancy(Limit: DAG.MinOccupancy);
972	DAG.RegionsWithMinOcc.reset();
973	LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
974	<< DAG.MinOccupancy << ".\n");
975	}
976	// The maximum number of arch VGPR on non-unified register file, or the
977	// maximum VGPR + AGPR in the unified register file case.
978	unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
979	// The maximum number of arch VGPR for both unified and non-unified register
980	// file.
981	unsigned MaxArchVGPRs = std::min(a: MaxVGPRs, b: ST.getAddressableNumArchVGPRs());
982	unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
983
984	if (PressureAfter.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) > MaxVGPRs \|\|
985	PressureAfter.getVGPRNum(UnifiedVGPRFile: false) > MaxArchVGPRs \|\|
986	PressureAfter.getAGPRNum() > MaxArchVGPRs \|\|
987	PressureAfter.getSGPRNum() > MaxSGPRs) {
988	DAG.RescheduleRegions [RegionIdx] = true;
989	DAG.RegionsWithHighRP [RegionIdx] = true;
990	DAG.RegionsWithExcessRP [RegionIdx] = true;
991	}
992
993	// Revert if this region's schedule would cause a drop in occupancy or
994	// spilling.
995	if (shouldRevertScheduling(WavesAfter)) {
996	revertScheduling();
997	} else {
998	DAG.Pressure [RegionIdx] = PressureAfter;
999	DAG.RegionsWithMinOcc [RegionIdx] =
1000	PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1001	}
1002	}
1003
1004	unsigned
1005	GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1006	DenseMap<unsigned, unsigned> &ReadyCycles,
1007	const TargetSchedModel &SM) {
1008	unsigned ReadyCycle = CurrCycle;
1009	for (auto &D : SU.Preds) {
1010	if (D.isAssignedRegDep()) {
1011	MachineInstr *DefMI = D.getSUnit()->getInstr();
1012	unsigned Latency = SM.computeInstrLatency(MI: DefMI);
1013	unsigned DefReady = ReadyCycles [DAG.getSUnit(MI: DefMI)->NodeNum];
1014	ReadyCycle = std::max(a: ReadyCycle, b: DefReady + Latency);
1015	}
1016	}
1017	ReadyCycles [SU.NodeNum] = ReadyCycle;
1018	return ReadyCycle;
1019	}
1020
1021	#ifndef NDEBUG
1022	struct EarlierIssuingCycle {
1023	bool operator()(std::pair<MachineInstr , unsigned*> A,
1024	std::pair<MachineInstr , unsigned> B) const* {
1025	return A.second < B.second;
1026	}
1027	};
1028
1029	static void printScheduleModel(std::set<std::pair<MachineInstr , unsigned*>,
1030	EarlierIssuingCycle> &ReadyCycles) {
1031	if (ReadyCycles.empty())
1032	return;
1033	unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1034	dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1035	<< " ##################\n# Cycle #\t\t\tInstruction "
1036	" "
1037	" \n";
1038	unsigned IPrev = `1`;
1039	for (auto &I : ReadyCycles) {
1040	if (I.second > IPrev + `1`)
1041	dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1042	<< " CYCLES DETECTED ******************************\n\n";
1043	dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1044	IPrev = I.second;
1045	}
1046	}
1047	#endif
1048
1049	ScheduleMetrics
1050	GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1051	#ifndef NDEBUG
1052	std::set<std::pair<MachineInstr , unsigned*>, EarlierIssuingCycle>
1053	ReadyCyclesSorted;
1054	#endif
1055	const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1056	unsigned SumBubbles = `0`;
1057	DenseMap<unsigned, unsigned> ReadyCycles;
1058	unsigned CurrCycle = `0`;
1059	for (auto &SU : InputSchedule) {
1060	unsigned ReadyCycle =
1061	computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1062	SumBubbles += ReadyCycle - CurrCycle;
1063	#ifndef NDEBUG
1064	ReadyCyclesSorted.insert(x: std::make_pair(x: SU.getInstr(), y&: ReadyCycle));
1065	#endif
1066	CurrCycle = ++ReadyCycle;
1067	}
1068	#ifndef NDEBUG
1069	LLVM_DEBUG(
1070	printScheduleModel(ReadyCyclesSorted);
1071	dbgs() << "\n\t"
1072	<< "Metric: "
1073	<< (SumBubbles
1074	? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1075	: `1`)
1076	<< "\n\n");
1077	#endif
1078
1079	return ScheduleMetrics (CurrCycle, SumBubbles);
1080	}
1081
1082	ScheduleMetrics
1083	GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1084	#ifndef NDEBUG
1085	std::set<std::pair<MachineInstr , unsigned*>, EarlierIssuingCycle>
1086	ReadyCyclesSorted;
1087	#endif
1088	const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1089	unsigned SumBubbles = `0`;
1090	DenseMap<unsigned, unsigned> ReadyCycles;
1091	unsigned CurrCycle = `0`;
1092	for (auto &MI : DAG) {
1093	SUnit *SU = DAG.getSUnit(MI: &MI);
1094	if (!SU)
1095	continue;
1096	unsigned ReadyCycle =
1097	computeSUnitReadyCycle(SU: *SU, CurrCycle, ReadyCycles, SM);
1098	SumBubbles += ReadyCycle - CurrCycle;
1099	#ifndef NDEBUG
1100	ReadyCyclesSorted.insert(x: std::make_pair(x: SU->getInstr(), y&: ReadyCycle));
1101	#endif
1102	CurrCycle = ++ReadyCycle;
1103	}
1104	#ifndef NDEBUG
1105	LLVM_DEBUG(
1106	printScheduleModel(ReadyCyclesSorted);
1107	dbgs() << "\n\t"
1108	<< "Metric: "
1109	<< (SumBubbles
1110	? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1111	: `1`)
1112	<< "\n\n");
1113	#endif
1114
1115	return ScheduleMetrics (CurrCycle, SumBubbles);
1116	}
1117
1118	bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1119	if (WavesAfter < DAG.MinOccupancy)
1120	return true;
1121
1122	return false;
1123	}
1124
1125	bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1126	if (PressureAfter == PressureBefore)
1127	return false;
1128
1129	if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1130	return true;
1131
1132	if (mayCauseSpilling(WavesAfter))
1133	return true;
1134
1135	return false;
1136	}
1137
1138	bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
1139	// If RP is not reduced in the unclustered reschedule stage, revert to the
1140	// old schedule.
1141	if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1142	mayCauseSpilling(WavesAfter)) \|\|
1143	GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1144	LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1145	return true;
1146	}
1147
1148	// Do not attempt to relax schedule even more if we are already spilling.
1149	if (isRegionWithExcessRP())
1150	return false;
1151
1152	LLVM_DEBUG(
1153	dbgs()
1154	<< "\n\t * In shouldRevertScheduling *\n"
1155	<< " ********* BEFORE UnclusteredHighRPStage *********\n");
1156	ScheduleMetrics MBefore =
1157	getScheduleMetrics(InputSchedule: DAG.SUnits);
1158	LLVM_DEBUG(
1159	dbgs()
1160	<< "\n ********* AFTER UnclusteredHighRPStage *********\n");
1161	ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1162	unsigned OldMetric = MBefore.getMetric();
1163	unsigned NewMetric = MAfter.getMetric();
1164	unsigned WavesBefore =
1165	std::min(a: S.getTargetOccupancy(), b: PressureBefore.getOccupancy(ST));
1166	unsigned Profit =
1167	((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1168	((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1169	NewMetric) /
1170	ScheduleMetrics::ScaleFactor;
1171	LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1172	<< MAfter << "Profit: " << Profit << "\n");
1173	return Profit < ScheduleMetrics::ScaleFactor;
1174	}
1175
1176	bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1177	if (PressureAfter == PressureBefore)
1178	return false;
1179
1180	if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1181	return true;
1182
1183	if (mayCauseSpilling(WavesAfter))
1184	return true;
1185
1186	return false;
1187	}
1188
1189	bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1190	if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1191	return true;
1192
1193	if (mayCauseSpilling(WavesAfter))
1194	return true;
1195
1196	return false;
1197	}
1198
1199	bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1200	if (mayCauseSpilling(WavesAfter))
1201	return true;
1202
1203	return false;
1204	}
1205
1206	bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1207	if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1208	!PressureAfter.less(MF, O: PressureBefore)) {
1209	LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1210	return true;
1211	}
1212
1213	return false;
1214	}
1215
1216	void GCNSchedStage::revertScheduling() {
1217	DAG.RegionsWithMinOcc [RegionIdx] =
1218	PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1219	LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1220	DAG.RescheduleRegions [RegionIdx] =
1221	S.hasNextStage() &&
1222	S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1223	DAG.RegionEnd = DAG.RegionBegin;
1224	int SkippedDebugInstr = `0`;
1225	for (MachineInstr *MI : Unsched) {
1226	if (MI->isDebugInstr()) {
1227	++SkippedDebugInstr;
1228	continue;
1229	}
1230
1231	if (MI->getIterator() != DAG.RegionEnd) {
1232	DAG.BB->remove(I: MI);
1233	DAG.BB->insert(I: DAG.RegionEnd, MI);
1234	if (!MI->isDebugInstr())
1235	DAG.LIS->handleMove(MI&: MI, UpdateFlags: true*);
1236	}
1237
1238	// Reset read-undef flags and update them later.
1239	for (auto &Op : MI->all_defs())
1240	Op.setIsUndef(false);
1241	RegisterOperands RegOpers;
1242	RegOpers.collect(MI: MI, TRI: DAG.TRI, MRI: DAG.MRI, TrackLaneMasks: DAG.ShouldTrackLaneMasks, IgnoreDead: false);
1243	if (!MI->isDebugInstr()) {
1244	if (DAG.ShouldTrackLaneMasks) {
1245	// Adjust liveness and add missing dead+read-undef flags.
1246	SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1247	RegOpers.adjustLaneLiveness(LIS: *DAG.LIS, MRI: DAG.MRI, Pos: SlotIdx, AddFlagsMI: MI);
1248	} else {
1249	// Adjust for missing dead-def flags.
1250	RegOpers.detectDeadDefs(MI: MI, LIS: DAG.LIS);
1251	}
1252	}
1253	DAG.RegionEnd = MI->getIterator();
1254	++DAG.RegionEnd;
1255	LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1256	}
1257
1258	// After reverting schedule, debug instrs will now be at the end of the block
1259	// and RegionEnd will point to the first debug instr. Increment RegionEnd
1260	// pass debug instrs to the actual end of the scheduling region.
1261	while (SkippedDebugInstr-- > `0`)
1262	++DAG.RegionEnd;
1263
1264	// If Unsched.front() instruction is a debug instruction, this will actually
1265	// shrink the region since we moved all debug instructions to the end of the
1266	// block. Find the first instruction that is not a debug instruction.
1267	DAG.RegionBegin = Unsched.front()->getIterator();
1268	if (DAG.RegionBegin ->isDebugInstr()) {
1269	for (MachineInstr *MI : Unsched) {
1270	if (MI->isDebugInstr())
1271	continue;
1272	DAG.RegionBegin = MI->getIterator();
1273	break;
1274	}
1275	}
1276
1277	// Then move the debug instructions back into their correct place and set
1278	// RegionBegin and RegionEnd if needed.
1279	DAG.placeDebugValues();
1280
1281	DAG.Regions [RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1282	}
1283
1284	void PreRARematStage::collectRematerializableInstructions() {
1285	const SIRegisterInfo SRI = static_cast<const* SIRegisterInfo *>(DAG.TRI);
1286	for (unsigned I = `0`, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1287	Register Reg = Register::index2VirtReg(Index: I);
1288	if (!DAG.LIS->hasInterval(Reg))
1289	continue;
1290
1291	// TODO: Handle AGPR and SGPR rematerialization
1292	if (!SRI->isVGPRClass(RC: DAG.MRI.getRegClass(Reg)) \|\|
1293	!DAG.MRI.hasOneDef(RegNo: Reg) \|\| !DAG.MRI.hasOneNonDBGUse(RegNo: Reg))
1294	continue;
1295
1296	MachineOperand *Op = DAG.MRI.getOneDef(Reg);
1297	MachineInstr *Def = Op->getParent();
1298	if (Op->getSubReg() != `0` \|\| !isTriviallyReMaterializable(MI: *Def))
1299	continue;
1300
1301	MachineInstr UseI = &DAG.MRI.use_instr_nodbg_begin(RegNo: Reg);
1302	if (Def->getParent() == UseI->getParent())
1303	continue;
1304
1305	// We are only collecting defs that are defined in another block and are
1306	// live-through or used inside regions at MinOccupancy. This means that the
1307	// register must be in the live-in set for the region.
1308	bool AddedToRematList = false;
1309	for (unsigned I = `0`, E = DAG.Regions.size(); I != E; ++I) {
1310	auto It = DAG.LiveIns [I].find(Val: Reg);
1311	if (It != DAG.LiveIns [I].end() && !It ->second.none()) {
1312	if (DAG.RegionsWithMinOcc [I]) {
1313	RematerializableInsts [I][Def] = UseI;
1314	AddedToRematList = true;
1315	}
1316
1317	// Collect regions with rematerializable reg as live-in to avoid
1318	// searching later when updating RP.
1319	RematDefToLiveInRegions [Def].push_back(Elt: I);
1320	}
1321	}
1322	if (!AddedToRematList)
1323	RematDefToLiveInRegions.erase(Val: Def);
1324	}
1325	}
1326
1327	bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1328	const TargetInstrInfo *TII) {
1329	// Temporary copies of cached variables we will be modifying and replacing if
1330	// sinking succeeds.
1331	SmallVector<
1332	std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, `32`>
1333	NewRegions;
1334	DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
1335	DenseMap<unsigned, GCNRegPressure> NewPressure;
1336	BitVector NewRescheduleRegions;
1337	LiveIntervals *LIS = DAG.LIS;
1338
1339	NewRegions.resize(N: DAG.Regions.size());
1340	NewRescheduleRegions.resize(N: DAG.Regions.size());
1341
1342	// Collect only regions that has a rematerializable def as a live-in.
1343	SmallSet<unsigned, `16`> ImpactedRegions;
1344	for (const auto &It : RematDefToLiveInRegions)
1345	ImpactedRegions.insert(I: It.second.begin(), E: It.second.end());
1346
1347	// Make copies of register pressure and live-ins cache that will be updated
1348	// as we rematerialize.
1349	for (auto Idx : ImpactedRegions) {
1350	NewPressure [Idx] = DAG.Pressure [Idx];
1351	NewLiveIns [Idx] = DAG.LiveIns [Idx];
1352	}
1353	NewRegions = DAG.Regions;
1354	NewRescheduleRegions.reset();
1355
1356	DenseMap<MachineInstr , MachineInstr > InsertedMIToOldDef;
1357	bool Improved = false;
1358	for (auto I : ImpactedRegions) {
1359	if (!DAG.RegionsWithMinOcc [I])
1360	continue;
1361
1362	Improved = false;
1363	int VGPRUsage = NewPressure [I].getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts());
1364	int SGPRUsage = NewPressure [I].getSGPRNum();
1365
1366	// TODO: Handle occupancy drop due to AGPR and SGPR.
1367	// Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1368	if (ST.getOccupancyWithNumSGPRs(SGPRs: SGPRUsage) == DAG.MinOccupancy)
1369	break;
1370
1371	// The occupancy of this region could have been improved by a previous
1372	// iteration's sinking of defs.
1373	if (NewPressure [I].getOccupancy(ST) > DAG.MinOccupancy) {
1374	NewRescheduleRegions [I] = true;
1375	Improved = true;
1376	continue;
1377	}
1378
1379	// First check if we have enough trivially rematerializable instructions to
1380	// improve occupancy. Optimistically assume all instructions we are able to
1381	// sink decreased RP.
1382	int TotalSinkableRegs = `0`;
1383	for (const auto &It : RematerializableInsts [I]) {
1384	MachineInstr *Def = It.first;
1385	Register DefReg = Def->getOperand(i: `0`).getReg();
1386	TotalSinkableRegs +=
1387	SIRegisterInfo::getNumCoveredRegs(LM: NewLiveIns [I][DefReg]);
1388	}
1389	int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1390	unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRs: VGPRsAfterSink);
1391	// If in the most optimistic scenario, we cannot improve occupancy, then do
1392	// not attempt to sink any instructions.
1393	if (OptimisticOccupancy <= DAG.MinOccupancy)
1394	break;
1395
1396	unsigned ImproveOccupancy = `0`;
1397	SmallVector<MachineInstr *, `4`> SinkedDefs;
1398	for (auto &It : RematerializableInsts [I]) {
1399	MachineInstr *Def = It.first;
1400	MachineBasicBlock::iterator InsertPos =
1401	MachineBasicBlock::iterator (It.second);
1402	Register Reg = Def->getOperand(i: `0`).getReg();
1403	// Rematerialize MI to its use block. Since we are only rematerializing
1404	// instructions that do not have any virtual reg uses, we do not need to
1405	// call LiveRangeEdit::allUsesAvailableAt() and
1406	// LiveRangeEdit::canRematerializeAt().
1407	TII->reMaterialize(MBB&: *InsertPos ->getParent(), MI: InsertPos, DestReg: Reg,
1408	SubIdx: Def->getOperand(i: `0`).getSubReg(), Orig: Def, TRI: DAG.TRI);
1409	MachineInstr NewMI = &std::prev(x: InsertPos);
1410	LIS->InsertMachineInstrInMaps(MI&: *NewMI);
1411	LIS->removeInterval(Reg);
1412	LIS->createAndComputeVirtRegInterval(Reg);
1413	InsertedMIToOldDef [NewMI] = Def;
1414
1415	// Update region boundaries in scheduling region we sinked from since we
1416	// may sink an instruction that was at the beginning or end of its region
1417	DAG.updateRegionBoundaries(RegionBoundaries&: NewRegions, MI: Def, /NewMI =/nullptr,
1418	/Removing =/true);
1419
1420	// Update region boundaries in region we sinked to.
1421	DAG.updateRegionBoundaries(RegionBoundaries&: NewRegions, MI: InsertPos, NewMI);
1422
1423	LaneBitmask PrevMask = NewLiveIns [I][Reg];
1424	// FIXME: Also update cached pressure for where the def was sinked from.
1425	// Update RP for all regions that has this reg as a live-in and remove
1426	// the reg from all regions as a live-in.
1427	for (auto Idx : RematDefToLiveInRegions [Def]) {
1428	NewLiveIns [Idx].erase(Val: Reg);
1429	if (InsertPos ->getParent() != DAG.Regions [Idx].first ->getParent()) {
1430	// Def is live-through and not used in this block.
1431	NewPressure [Idx].inc(Reg, PrevMask, NewMask: LaneBitmask::getNone(), MRI: DAG.MRI);
1432	} else {
1433	// Def is used and rematerialized into this block.
1434	GCNDownwardRPTracker RPT(*LIS);
1435	auto NonDbgMI = &skipDebugInstructionsForward(
1436	It: NewRegions [Idx].first, End: NewRegions [Idx].second);
1437	RPT.reset(MI: *NonDbgMI, LiveRegs: &NewLiveIns [Idx]);
1438	RPT.advance(End: NewRegions [Idx].second);
1439	NewPressure [Idx] = RPT.moveMaxPressure();
1440	}
1441	}
1442
1443	SinkedDefs.push_back(Elt: Def);
1444	ImproveOccupancy = NewPressure [I].getOccupancy(ST);
1445	if (ImproveOccupancy > DAG.MinOccupancy)
1446	break;
1447	}
1448
1449	// Remove defs we just sinked from all regions' list of sinkable defs
1450	for (auto &Def : SinkedDefs)
1451	for (auto TrackedIdx : RematDefToLiveInRegions [Def])
1452	RematerializableInsts [TrackedIdx].erase(Key: Def);
1453
1454	if (ImproveOccupancy <= DAG.MinOccupancy)
1455	break;
1456
1457	NewRescheduleRegions [I] = true;
1458	Improved = true;
1459	}
1460
1461	if (!Improved) {
1462	// Occupancy was not improved for all regions that were at MinOccupancy.
1463	// Undo sinking and remove newly rematerialized instructions.
1464	for (auto &Entry : InsertedMIToOldDef) {
1465	MachineInstr *MI = Entry.first;
1466	MachineInstr *OldMI = Entry.second;
1467	Register Reg = MI->getOperand(i: `0`).getReg();
1468	LIS->RemoveMachineInstrFromMaps(MI&: *MI);
1469	MI->eraseFromParent();
1470	OldMI->clearRegisterDeads(Reg);
1471	LIS->removeInterval(Reg);
1472	LIS->createAndComputeVirtRegInterval(Reg);
1473	}
1474	return false;
1475	}
1476
1477	// Occupancy was improved for all regions.
1478	for (auto &Entry : InsertedMIToOldDef) {
1479	MachineInstr *MI = Entry.first;
1480	MachineInstr *OldMI = Entry.second;
1481
1482	// Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1483	DAG.BBLiveInMap.erase(Val: OldMI);
1484
1485	// Remove OldMI and update LIS
1486	Register Reg = MI->getOperand(i: `0`).getReg();
1487	LIS->RemoveMachineInstrFromMaps(MI&: *OldMI);
1488	OldMI->eraseFromParent();
1489	LIS->removeInterval(Reg);
1490	LIS->createAndComputeVirtRegInterval(Reg);
1491	}
1492
1493	// Update live-ins, register pressure, and regions caches.
1494	for (auto Idx : ImpactedRegions) {
1495	DAG.LiveIns [Idx] = NewLiveIns [Idx];
1496	DAG.Pressure [Idx] = NewPressure [Idx];
1497	DAG.MBBLiveIns.erase(Val: DAG.Regions [Idx].first ->getParent());
1498	}
1499	DAG.Regions = NewRegions;
1500	DAG.RescheduleRegions = NewRescheduleRegions;
1501
1502	SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1503	MFI.increaseOccupancy(MF, Limit: ++DAG.MinOccupancy);
1504
1505	return true;
1506	}
1507
1508	// Copied from MachineLICM
1509	bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1510	if (!DAG.TII->isTriviallyReMaterializable(MI))
1511	return false;
1512
1513	for (const MachineOperand &MO : MI.all_uses())
1514	if (MO.getReg().isVirtual())
1515	return false;
1516
1517	return true;
1518	}
1519
1520	// When removing, we will have to check both beginning and ending of the region.
1521	// When inserting, we will only have to check if we are inserting NewMI in front
1522	// of a scheduling region and do not need to check the ending since we will only
1523	// ever be inserting before an already existing MI.
1524	void GCNScheduleDAGMILive::updateRegionBoundaries(
1525	SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
1526	MachineBasicBlock::iterator>> &RegionBoundaries,
1527	MachineBasicBlock::iterator MI, MachineInstr NewMI, bool* Removing) {
1528	unsigned I = `0`, E = RegionBoundaries.size();
1529	// Search for first region of the block where MI is located
1530	while (I != E && MI ->getParent() != RegionBoundaries [I].first ->getParent())
1531	++I;
1532
1533	for (; I != E; ++I) {
1534	if (MI ->getParent() != RegionBoundaries [I].first ->getParent())
1535	return;
1536
1537	if (Removing && MI == RegionBoundaries [I].first &&
1538	MI == RegionBoundaries [I].second) {
1539	// MI is in a region with size 1, after removing, the region will be
1540	// size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1541	RegionBoundaries [I] =
1542	std::pair(MI ->getParent()->end(), MI ->getParent()->end());
1543	return;
1544	}
1545	if (MI == RegionBoundaries [I].first) {
1546	if (Removing)
1547	RegionBoundaries [I] =
1548	std::pair(std::next(x: MI), RegionBoundaries [I].second);
1549	else
1550	// Inserted NewMI in front of region, set new RegionBegin to NewMI
1551	RegionBoundaries [I] = std::pair(MachineBasicBlock::iterator (NewMI),
1552	RegionBoundaries [I].second);
1553	return;
1554	}
1555	if (Removing && MI == RegionBoundaries [I].second) {
1556	RegionBoundaries [I] = std::pair(RegionBoundaries [I].first, std::prev(x: MI));
1557	return;
1558	}
1559	}
1560	}
1561
1562	static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1563	return std::any_of(
1564	first: DAG->begin(), last: DAG->end(), pred: [](MachineBasicBlock::iterator MI) {
1565	unsigned Opc = MI ->getOpcode();
1566	return Opc == AMDGPU::SCHED_GROUP_BARRIER \|\| Opc == AMDGPU::IGLP_OPT;
1567	});
1568	}
1569
1570	GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1571	MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1572	bool RemoveKillFlags)
1573	: ScheduleDAGMI (C, std::move(S), RemoveKillFlags) {}
1574
1575	void GCNPostScheduleDAGMILive::schedule() {
1576	HasIGLPInstrs = hasIGLPInstrs(DAG: this);
1577	if (HasIGLPInstrs) {
1578	SavedMutations.clear();
1579	SavedMutations.swap(x&: Mutations);
1580	addMutation(Mutation: createIGroupLPDAGMutation(Phase: AMDGPU::SchedulingPhase::PostRA));
1581	}
1582
1583	ScheduleDAGMI::schedule();
1584	}
1585
1586	void GCNPostScheduleDAGMILive::finalizeSchedule() {
1587	if (HasIGLPInstrs)
1588	SavedMutations.swap(x&: Mutations);
1589
1590	ScheduleDAGMI::finalizeSchedule();
1591	}
1592

source code of llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp