1//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This contains a MachineSchedStrategy implementation for maximizing wave
11/// occupancy on GCN hardware.
12///
13/// This pass will apply multiple scheduling stages to the same function.
14/// Regions are first recorded in GCNScheduleDAGMILive::schedule. The actual
15/// entry point for the scheduling of those regions is
16/// GCNScheduleDAGMILive::runSchedStages.
17
18/// Generally, the reason for having multiple scheduling stages is to account
19/// for the kernel-wide effect of register usage on occupancy. Usually, only a
20/// few scheduling regions will have register pressure high enough to limit
21/// occupancy for the kernel, so constraints can be relaxed to improve ILP in
22/// other regions.
23///
24//===----------------------------------------------------------------------===//
25
26#include "GCNSchedStrategy.h"
27#include "AMDGPUIGroupLP.h"
28#include "SIMachineFunctionInfo.h"
29#include "llvm/CodeGen/RegisterClassInfo.h"
30
31#define DEBUG_TYPE "machine-scheduler"
32
33using namespace llvm;
34
35static cl::opt<bool> DisableUnclusterHighRP(
36 "amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden,
37 cl::desc("Disable unclustered high register pressure "
38 "reduction scheduling stage."),
39 cl::init(Val: false));
40
41static cl::opt<bool> DisableClusteredLowOccupancy(
42 "amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden,
43 cl::desc("Disable clustered low occupancy "
44 "rescheduling for ILP scheduling stage."),
45 cl::init(Val: false));
46
47static cl::opt<unsigned> ScheduleMetricBias(
48 "amdgpu-schedule-metric-bias", cl::Hidden,
49 cl::desc(
50 "Sets the bias which adds weight to occupancy vs latency. Set it to "
51 "100 to chase the occupancy only."),
52 cl::init(Val: 10));
53
54static cl::opt<bool>
55 RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
56 cl::desc("Relax occupancy targets for kernels which are memory "
57 "bound (amdgpu-membound-threshold), or "
58 "Wave Limited (amdgpu-limit-wave-threshold)."),
59 cl::init(Val: false));
60
61const unsigned ScheduleMetrics::ScaleFactor = 100;
62
63GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
64 : GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
65 HasHighPressure(false) {}
66
67void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
68 GenericScheduler::initialize(dag: DAG);
69
70 MF = &DAG->MF;
71
72 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
73
74 SGPRExcessLimit =
75 Context->RegClassInfo->getNumAllocatableRegs(RC: &AMDGPU::SGPR_32RegClass);
76 VGPRExcessLimit =
77 Context->RegClassInfo->getNumAllocatableRegs(RC: &AMDGPU::VGPR_32RegClass);
78
79 SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
80 // Set the initial TargetOccupnacy to the maximum occupancy that we can
81 // achieve for this function. This effectively sets a lower bound on the
82 // 'Critical' register limits in the scheduler.
83 // Allow for lower occupancy targets if kernel is wave limited or memory
84 // bound, and using the relaxed occupancy feature.
85 TargetOccupancy =
86 RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();
87 SGPRCriticalLimit =
88 std::min(a: ST.getMaxNumSGPRs(WavesPerEU: TargetOccupancy, Addressable: true), b: SGPRExcessLimit);
89
90 if (!KnownExcessRP) {
91 VGPRCriticalLimit =
92 std::min(a: ST.getMaxNumVGPRs(WavesPerEU: TargetOccupancy), b: VGPRExcessLimit);
93 } else {
94 // This is similar to ST.getMaxNumVGPRs(TargetOccupancy) result except
95 // returns a reasonably small number for targets with lots of VGPRs, such
96 // as GFX10 and GFX11.
97 LLVM_DEBUG(dbgs() << "Region is known to spill, use alternative "
98 "VGPRCriticalLimit calculation method.\n");
99
100 unsigned Granule = AMDGPU::IsaInfo::getVGPRAllocGranule(&ST);
101 unsigned Addressable = AMDGPU::IsaInfo::getAddressableNumVGPRs(&ST);
102 unsigned VGPRBudget = alignDown(Value: Addressable / TargetOccupancy, Align: Granule);
103 VGPRBudget = std::max(a: VGPRBudget, b: Granule);
104 VGPRCriticalLimit = std::min(a: VGPRBudget, b: VGPRExcessLimit);
105 }
106
107 // Subtract error margin and bias from register limits and avoid overflow.
108 SGPRCriticalLimit -= std::min(a: SGPRLimitBias + ErrorMargin, b: SGPRCriticalLimit);
109 VGPRCriticalLimit -= std::min(a: VGPRLimitBias + ErrorMargin, b: VGPRCriticalLimit);
110 SGPRExcessLimit -= std::min(a: SGPRLimitBias + ErrorMargin, b: SGPRExcessLimit);
111 VGPRExcessLimit -= std::min(a: VGPRLimitBias + ErrorMargin, b: VGPRExcessLimit);
112
113 LLVM_DEBUG(dbgs() << "VGPRCriticalLimit = " << VGPRCriticalLimit
114 << ", VGPRExcessLimit = " << VGPRExcessLimit
115 << ", SGPRCriticalLimit = " << SGPRCriticalLimit
116 << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n");
117}
118
119void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
120 bool AtTop,
121 const RegPressureTracker &RPTracker,
122 const SIRegisterInfo *SRI,
123 unsigned SGPRPressure,
124 unsigned VGPRPressure) {
125 Cand.SU = SU;
126 Cand.AtTop = AtTop;
127
128 if (!DAG->isTrackingPressure())
129 return;
130
131 // getDownwardPressure() and getUpwardPressure() make temporary changes to
132 // the tracker, so we need to pass those function a non-const copy.
133 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
134
135 Pressure.clear();
136 MaxPressure.clear();
137
138 if (AtTop)
139 TempTracker.getDownwardPressure(MI: SU->getInstr(), PressureResult&: Pressure, MaxPressureResult&: MaxPressure);
140 else {
141 // FIXME: I think for bottom up scheduling, the register pressure is cached
142 // and can be retrieved by DAG->getPressureDif(SU).
143 TempTracker.getUpwardPressure(MI: SU->getInstr(), PressureResult&: Pressure, MaxPressureResult&: MaxPressure);
144 }
145
146 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
147 unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
148
149 // If two instructions increase the pressure of different register sets
150 // by the same amount, the generic scheduler will prefer to schedule the
151 // instruction that increases the set with the least amount of registers,
152 // which in our case would be SGPRs. This is rarely what we want, so
153 // when we report excess/critical register pressure, we do it either
154 // only for VGPRs or only for SGPRs.
155
156 // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
157 const unsigned MaxVGPRPressureInc = 16;
158 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160
161
162 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163 // to increase the likelihood we don't go over the limits. We should improve
164 // the analysis to look through dependencies to find the path with the least
165 // register pressure.
166
167 // We only need to update the RPDelta for instructions that increase register
168 // pressure. Instructions that decrease or keep reg pressure the same will be
169 // marked as RegExcess in tryCandidate() when they are compared with
170 // instructions that increase the register pressure.
171 if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
172 HasHighPressure = true;
173 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
174 Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
175 }
176
177 if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
178 HasHighPressure = true;
179 Cand.RPDelta.Excess = PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
180 Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure - SGPRExcessLimit);
181 }
182
183 // Register pressure is considered 'CRITICAL' if it is approaching a value
184 // that would reduce the wave occupancy for the execution unit. When
185 // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
186 // has the same cost, so we don't need to prefer one over the other.
187
188 int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
189 int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
190
191 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
192 HasHighPressure = true;
193 if (SGPRDelta > VGPRDelta) {
194 Cand.RPDelta.CriticalMax =
195 PressureChange(AMDGPU::RegisterPressureSets::SReg_32);
196 Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
197 } else {
198 Cand.RPDelta.CriticalMax =
199 PressureChange(AMDGPU::RegisterPressureSets::VGPR_32);
200 Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
201 }
202 }
203}
204
205// This function is mostly cut and pasted from
206// GenericScheduler::pickNodeFromQueue()
207void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
208 const CandPolicy &ZonePolicy,
209 const RegPressureTracker &RPTracker,
210 SchedCandidate &Cand) {
211 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
212 ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
213 unsigned SGPRPressure = 0;
214 unsigned VGPRPressure = 0;
215 if (DAG->isTrackingPressure()) {
216 SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
217 VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
218 }
219 ReadyQueue &Q = Zone.Available;
220 for (SUnit *SU : Q) {
221
222 SchedCandidate TryCand(ZonePolicy);
223 initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, SRI,
224 SGPRPressure, VGPRPressure);
225 // Pass SchedBoundary only when comparing nodes from the same boundary.
226 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
227 tryCandidate(Cand, TryCand, Zone: ZoneArg);
228 if (TryCand.Reason != NoCand) {
229 // Initialize resource delta if needed in case future heuristics query it.
230 if (TryCand.ResDelta == SchedResourceDelta())
231 TryCand.initResourceDelta(DAG: Zone.DAG, SchedModel);
232 Cand.setBest(TryCand);
233 LLVM_DEBUG(traceCandidate(Cand));
234 }
235 }
236}
237
238// This function is mostly cut and pasted from
239// GenericScheduler::pickNodeBidirectional()
240SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
241 // Schedule as far as possible in the direction of no choice. This is most
242 // efficient, but also provides the best heuristics for CriticalPSets.
243 if (SUnit *SU = Bot.pickOnlyChoice()) {
244 IsTopNode = false;
245 return SU;
246 }
247 if (SUnit *SU = Top.pickOnlyChoice()) {
248 IsTopNode = true;
249 return SU;
250 }
251 // Set the bottom-up policy based on the state of the current bottom zone and
252 // the instructions outside the zone, including the top zone.
253 CandPolicy BotPolicy;
254 setPolicy(Policy&: BotPolicy, /*IsPostRA=*/false, CurrZone&: Bot, OtherZone: &Top);
255 // Set the top-down policy based on the state of the current top zone and
256 // the instructions outside the zone, including the bottom zone.
257 CandPolicy TopPolicy;
258 setPolicy(Policy&: TopPolicy, /*IsPostRA=*/false, CurrZone&: Top, OtherZone: &Bot);
259
260 // See if BotCand is still valid (because we previously scheduled from Top).
261 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
262 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
263 BotCand.Policy != BotPolicy) {
264 BotCand.reset(NewPolicy: CandPolicy());
265 pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
266 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
267 } else {
268 LLVM_DEBUG(traceCandidate(BotCand));
269#ifndef NDEBUG
270 if (VerifyScheduling) {
271 SchedCandidate TCand;
272 TCand.reset(NewPolicy: CandPolicy());
273 pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: TCand);
274 assert(TCand.SU == BotCand.SU &&
275 "Last pick result should correspond to re-picking right now");
276 }
277#endif
278 }
279
280 // Check if the top Q has a better candidate.
281 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
282 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
283 TopCand.Policy != TopPolicy) {
284 TopCand.reset(NewPolicy: CandPolicy());
285 pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
286 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
287 } else {
288 LLVM_DEBUG(traceCandidate(TopCand));
289#ifndef NDEBUG
290 if (VerifyScheduling) {
291 SchedCandidate TCand;
292 TCand.reset(NewPolicy: CandPolicy());
293 pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TCand);
294 assert(TCand.SU == TopCand.SU &&
295 "Last pick result should correspond to re-picking right now");
296 }
297#endif
298 }
299
300 // Pick best from BotCand and TopCand.
301 LLVM_DEBUG(dbgs() << "Top Cand: "; traceCandidate(TopCand);
302 dbgs() << "Bot Cand: "; traceCandidate(BotCand););
303 SchedCandidate Cand = BotCand;
304 TopCand.Reason = NoCand;
305 tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr);
306 if (TopCand.Reason != NoCand) {
307 Cand.setBest(TopCand);
308 }
309 LLVM_DEBUG(dbgs() << "Picking: "; traceCandidate(Cand););
310
311 IsTopNode = Cand.AtTop;
312 return Cand.SU;
313}
314
315// This function is mostly cut and pasted from
316// GenericScheduler::pickNode()
317SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
318 if (DAG->top() == DAG->bottom()) {
319 assert(Top.Available.empty() && Top.Pending.empty() &&
320 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
321 return nullptr;
322 }
323 SUnit *SU;
324 do {
325 if (RegionPolicy.OnlyTopDown) {
326 SU = Top.pickOnlyChoice();
327 if (!SU) {
328 CandPolicy NoPolicy;
329 TopCand.reset(NewPolicy: NoPolicy);
330 pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
331 assert(TopCand.Reason != NoCand && "failed to find a candidate");
332 SU = TopCand.SU;
333 }
334 IsTopNode = true;
335 } else if (RegionPolicy.OnlyBottomUp) {
336 SU = Bot.pickOnlyChoice();
337 if (!SU) {
338 CandPolicy NoPolicy;
339 BotCand.reset(NewPolicy: NoPolicy);
340 pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
341 assert(BotCand.Reason != NoCand && "failed to find a candidate");
342 SU = BotCand.SU;
343 }
344 IsTopNode = false;
345 } else {
346 SU = pickNodeBidirectional(IsTopNode);
347 }
348 } while (SU->isScheduled);
349
350 if (SU->isTopReady())
351 Top.removeReady(SU);
352 if (SU->isBottomReady())
353 Bot.removeReady(SU);
354
355 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
356 << *SU->getInstr());
357 return SU;
358}
359
360GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
361 assert(CurrentStage && CurrentStage != SchedStages.end());
362 return *CurrentStage;
363}
364
365bool GCNSchedStrategy::advanceStage() {
366 assert(CurrentStage != SchedStages.end());
367 if (!CurrentStage)
368 CurrentStage = SchedStages.begin();
369 else
370 CurrentStage++;
371
372 return CurrentStage != SchedStages.end();
373}
374
375bool GCNSchedStrategy::hasNextStage() const {
376 assert(CurrentStage);
377 return std::next(x: CurrentStage) != SchedStages.end();
378}
379
380GCNSchedStageID GCNSchedStrategy::getNextStage() const {
381 assert(CurrentStage && std::next(CurrentStage) != SchedStages.end());
382 return *std::next(x: CurrentStage);
383}
384
385GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
386 const MachineSchedContext *C)
387 : GCNSchedStrategy(C) {
388 SchedStages.push_back(Elt: GCNSchedStageID::OccInitialSchedule);
389 SchedStages.push_back(Elt: GCNSchedStageID::UnclusteredHighRPReschedule);
390 SchedStages.push_back(Elt: GCNSchedStageID::ClusteredLowOccupancyReschedule);
391 SchedStages.push_back(Elt: GCNSchedStageID::PreRARematerialize);
392}
393
394GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
395 : GCNSchedStrategy(C) {
396 SchedStages.push_back(Elt: GCNSchedStageID::ILPInitialSchedule);
397}
398
399bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
400 SchedCandidate &TryCand,
401 SchedBoundary *Zone) const {
402 // Initialize the candidate if needed.
403 if (!Cand.isValid()) {
404 TryCand.Reason = NodeOrder;
405 return true;
406 }
407
408 // Avoid spilling by exceeding the register limit.
409 if (DAG->isTrackingPressure() &&
410 tryPressure(TryP: TryCand.RPDelta.Excess, CandP: Cand.RPDelta.Excess, TryCand, Cand,
411 Reason: RegExcess, TRI, MF: DAG->MF))
412 return TryCand.Reason != NoCand;
413
414 // Bias PhysReg Defs and copies to their uses and defined respectively.
415 if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
416 CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
417 return TryCand.Reason != NoCand;
418
419 bool SameBoundary = Zone != nullptr;
420 if (SameBoundary) {
421 // Prioritize instructions that read unbuffered resources by stall cycles.
422 if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
423 CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
424 return TryCand.Reason != NoCand;
425
426 // Avoid critical resource consumption and balance the schedule.
427 TryCand.initResourceDelta(DAG, SchedModel);
428 if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
429 TryCand, Cand, Reason: ResourceReduce))
430 return TryCand.Reason != NoCand;
431 if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
432 CandVal: Cand.ResDelta.DemandedResources, TryCand, Cand,
433 Reason: ResourceDemand))
434 return TryCand.Reason != NoCand;
435
436 // Unconditionally try to reduce latency.
437 if (tryLatency(TryCand, Cand, Zone&: *Zone))
438 return TryCand.Reason != NoCand;
439
440 // Weak edges are for clustering and other constraints.
441 if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
442 CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: Weak))
443 return TryCand.Reason != NoCand;
444 }
445
446 // Keep clustered nodes together to encourage downstream peephole
447 // optimizations which may reduce resource requirements.
448 //
449 // This is a best effort to set things up for a post-RA pass. Optimizations
450 // like generating loads of multiple registers should ideally be done within
451 // the scheduler pass by combining the loads during DAG postprocessing.
452 const SUnit *CandNextClusterSU =
453 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
454 const SUnit *TryCandNextClusterSU =
455 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
456 if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU,
457 CandVal: Cand.SU == CandNextClusterSU, TryCand, Cand, Reason: Cluster))
458 return TryCand.Reason != NoCand;
459
460 // Avoid increasing the max critical pressure in the scheduled region.
461 if (DAG->isTrackingPressure() &&
462 tryPressure(TryP: TryCand.RPDelta.CriticalMax, CandP: Cand.RPDelta.CriticalMax,
463 TryCand, Cand, Reason: RegCritical, TRI, MF: DAG->MF))
464 return TryCand.Reason != NoCand;
465
466 // Avoid increasing the max pressure of the entire region.
467 if (DAG->isTrackingPressure() &&
468 tryPressure(TryP: TryCand.RPDelta.CurrentMax, CandP: Cand.RPDelta.CurrentMax, TryCand,
469 Cand, Reason: RegMax, TRI, MF: DAG->MF))
470 return TryCand.Reason != NoCand;
471
472 if (SameBoundary) {
473 // Fall through to original instruction order.
474 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) ||
475 (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
476 TryCand.Reason = NodeOrder;
477 return true;
478 }
479 }
480 return false;
481}
482
483GCNScheduleDAGMILive::GCNScheduleDAGMILive(
484 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
485 : ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
486 MFI(*MF.getInfo<SIMachineFunctionInfo>()),
487 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
488
489 LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
490 if (RelaxedOcc) {
491 MinOccupancy = std::min(a: MFI.getMinAllowedOccupancy(), b: StartingOccupancy);
492 if (MinOccupancy != StartingOccupancy)
493 LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
494 << ".\n");
495 }
496}
497
498std::unique_ptr<GCNSchedStage>
499GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
500 switch (SchedStageID) {
501 case GCNSchedStageID::OccInitialSchedule:
502 return std::make_unique<OccInitialScheduleStage>(args&: SchedStageID, args&: *this);
503 case GCNSchedStageID::UnclusteredHighRPReschedule:
504 return std::make_unique<UnclusteredHighRPStage>(args&: SchedStageID, args&: *this);
505 case GCNSchedStageID::ClusteredLowOccupancyReschedule:
506 return std::make_unique<ClusteredLowOccStage>(args&: SchedStageID, args&: *this);
507 case GCNSchedStageID::PreRARematerialize:
508 return std::make_unique<PreRARematStage>(args&: SchedStageID, args&: *this);
509 case GCNSchedStageID::ILPInitialSchedule:
510 return std::make_unique<ILPInitialScheduleStage>(args&: SchedStageID, args&: *this);
511 }
512
513 llvm_unreachable("Unknown SchedStageID.");
514}
515
516void GCNScheduleDAGMILive::schedule() {
517 // Collect all scheduling regions. The actual scheduling is performed in
518 // GCNScheduleDAGMILive::finalizeSchedule.
519 Regions.push_back(Elt: std::pair(RegionBegin, RegionEnd));
520}
521
522GCNRegPressure
523GCNScheduleDAGMILive::getRealRegPressure(unsigned RegionIdx) const {
524 GCNDownwardRPTracker RPTracker(*LIS);
525 RPTracker.advance(Begin: begin(), End: end(), LiveRegsCopy: &LiveIns[RegionIdx]);
526 return RPTracker.moveMaxPressure();
527}
528
529void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
530 const MachineBasicBlock *MBB) {
531 GCNDownwardRPTracker RPTracker(*LIS);
532
533 // If the block has the only successor then live-ins of that successor are
534 // live-outs of the current block. We can reuse calculated live set if the
535 // successor will be sent to scheduling past current block.
536
537 // However, due to the bug in LiveInterval analysis it may happen that two
538 // predecessors of the same successor block have different lane bitmasks for
539 // a live-out register. Workaround that by sticking to one-to-one relationship
540 // i.e. one predecessor with one successor block.
541 const MachineBasicBlock *OnlySucc = nullptr;
542 if (MBB->succ_size() == 1) {
543 auto *Candidate = *MBB->succ_begin();
544 if (!Candidate->empty() && Candidate->pred_size() == 1) {
545 SlotIndexes *Ind = LIS->getSlotIndexes();
546 if (Ind->getMBBStartIdx(mbb: MBB) < Ind->getMBBStartIdx(mbb: Candidate))
547 OnlySucc = Candidate;
548 }
549 }
550
551 // Scheduler sends regions from the end of the block upwards.
552 size_t CurRegion = RegionIdx;
553 for (size_t E = Regions.size(); CurRegion != E; ++CurRegion)
554 if (Regions[CurRegion].first->getParent() != MBB)
555 break;
556 --CurRegion;
557
558 auto I = MBB->begin();
559 auto LiveInIt = MBBLiveIns.find(Val: MBB);
560 auto &Rgn = Regions[CurRegion];
561 auto *NonDbgMI = &*skipDebugInstructionsForward(It: Rgn.first, End: Rgn.second);
562 if (LiveInIt != MBBLiveIns.end()) {
563 auto LiveIn = std::move(LiveInIt->second);
564 RPTracker.reset(MI: *MBB->begin(), LiveRegs: &LiveIn);
565 MBBLiveIns.erase(I: LiveInIt);
566 } else {
567 I = Rgn.first;
568 auto LRS = BBLiveInMap.lookup(Val: NonDbgMI);
569#ifdef EXPENSIVE_CHECKS
570 assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS));
571#endif
572 RPTracker.reset(MI: *I, LiveRegs: &LRS);
573 }
574
575 for (;;) {
576 I = RPTracker.getNext();
577
578 if (Regions[CurRegion].first == I || NonDbgMI == I) {
579 LiveIns[CurRegion] = RPTracker.getLiveRegs();
580 RPTracker.clearMaxPressure();
581 }
582
583 if (Regions[CurRegion].second == I) {
584 Pressure[CurRegion] = RPTracker.moveMaxPressure();
585 if (CurRegion-- == RegionIdx)
586 break;
587 }
588 RPTracker.advanceToNext();
589 RPTracker.advanceBeforeNext();
590 }
591
592 if (OnlySucc) {
593 if (I != MBB->end()) {
594 RPTracker.advanceToNext();
595 RPTracker.advance(End: MBB->end());
596 }
597 RPTracker.advanceBeforeNext();
598 MBBLiveIns[OnlySucc] = RPTracker.moveLiveRegs();
599 }
600}
601
602DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet>
603GCNScheduleDAGMILive::getBBLiveInMap() const {
604 assert(!Regions.empty());
605 std::vector<MachineInstr *> BBStarters;
606 BBStarters.reserve(n: Regions.size());
607 auto I = Regions.rbegin(), E = Regions.rend();
608 auto *BB = I->first->getParent();
609 do {
610 auto *MI = &*skipDebugInstructionsForward(It: I->first, End: I->second);
611 BBStarters.push_back(x: MI);
612 do {
613 ++I;
614 } while (I != E && I->first->getParent() == BB);
615 } while (I != E);
616 return getLiveRegMap(R&: BBStarters, After: false /*After*/, LIS&: *LIS);
617}
618
619void GCNScheduleDAGMILive::finalizeSchedule() {
620 // Start actual scheduling here. This function is called by the base
621 // MachineScheduler after all regions have been recorded by
622 // GCNScheduleDAGMILive::schedule().
623 LiveIns.resize(N: Regions.size());
624 Pressure.resize(N: Regions.size());
625 RescheduleRegions.resize(N: Regions.size());
626 RegionsWithHighRP.resize(N: Regions.size());
627 RegionsWithExcessRP.resize(N: Regions.size());
628 RegionsWithMinOcc.resize(N: Regions.size());
629 RegionsWithIGLPInstrs.resize(N: Regions.size());
630 RescheduleRegions.set();
631 RegionsWithHighRP.reset();
632 RegionsWithExcessRP.reset();
633 RegionsWithMinOcc.reset();
634 RegionsWithIGLPInstrs.reset();
635
636 runSchedStages();
637}
638
639void GCNScheduleDAGMILive::runSchedStages() {
640 LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
641
642 if (!Regions.empty())
643 BBLiveInMap = getBBLiveInMap();
644
645 GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
646 while (S.advanceStage()) {
647 auto Stage = createSchedStage(SchedStageID: S.getCurrentStage());
648 if (!Stage->initGCNSchedStage())
649 continue;
650
651 for (auto Region : Regions) {
652 RegionBegin = Region.first;
653 RegionEnd = Region.second;
654 // Setup for scheduling the region and check whether it should be skipped.
655 if (!Stage->initGCNRegion()) {
656 Stage->advanceRegion();
657 exitRegion();
658 continue;
659 }
660
661 ScheduleDAGMILive::schedule();
662 Stage->finalizeGCNRegion();
663 }
664
665 Stage->finalizeGCNSchedStage();
666 }
667}
668
669#ifndef NDEBUG
670raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
671 switch (StageID) {
672 case GCNSchedStageID::OccInitialSchedule:
673 OS << "Max Occupancy Initial Schedule";
674 break;
675 case GCNSchedStageID::UnclusteredHighRPReschedule:
676 OS << "Unclustered High Register Pressure Reschedule";
677 break;
678 case GCNSchedStageID::ClusteredLowOccupancyReschedule:
679 OS << "Clustered Low Occupancy Reschedule";
680 break;
681 case GCNSchedStageID::PreRARematerialize:
682 OS << "Pre-RA Rematerialize";
683 break;
684 case GCNSchedStageID::ILPInitialSchedule:
685 OS << "Max ILP Initial Schedule";
686 break;
687 }
688
689 return OS;
690}
691#endif
692
693GCNSchedStage::GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
694 : DAG(DAG), S(static_cast<GCNSchedStrategy &>(*DAG.SchedImpl)), MF(DAG.MF),
695 MFI(DAG.MFI), ST(DAG.ST), StageID(StageID) {}
696
697bool GCNSchedStage::initGCNSchedStage() {
698 if (!DAG.LIS)
699 return false;
700
701 LLVM_DEBUG(dbgs() << "Starting scheduling stage: " << StageID << "\n");
702 return true;
703}
704
705bool UnclusteredHighRPStage::initGCNSchedStage() {
706 if (DisableUnclusterHighRP)
707 return false;
708
709 if (!GCNSchedStage::initGCNSchedStage())
710 return false;
711
712 if (DAG.RegionsWithHighRP.none() && DAG.RegionsWithExcessRP.none())
713 return false;
714
715 SavedMutations.swap(x&: DAG.Mutations);
716 DAG.addMutation(
717 Mutation: createIGroupLPDAGMutation(Phase: AMDGPU::SchedulingPhase::PreRAReentry));
718
719 InitialOccupancy = DAG.MinOccupancy;
720 // Aggressivly try to reduce register pressure in the unclustered high RP
721 // stage. Temporarily increase occupancy target in the region.
722 S.SGPRLimitBias = S.HighRPSGPRBias;
723 S.VGPRLimitBias = S.HighRPVGPRBias;
724 if (MFI.getMaxWavesPerEU() > DAG.MinOccupancy)
725 MFI.increaseOccupancy(MF, Limit: ++DAG.MinOccupancy);
726
727 LLVM_DEBUG(
728 dbgs()
729 << "Retrying function scheduling without clustering. "
730 "Aggressivly try to reduce register pressure to achieve occupancy "
731 << DAG.MinOccupancy << ".\n");
732
733 return true;
734}
735
736bool ClusteredLowOccStage::initGCNSchedStage() {
737 if (DisableClusteredLowOccupancy)
738 return false;
739
740 if (!GCNSchedStage::initGCNSchedStage())
741 return false;
742
743 // Don't bother trying to improve ILP in lower RP regions if occupancy has not
744 // been dropped. All regions will have already been scheduled with the ideal
745 // occupancy targets.
746 if (DAG.StartingOccupancy <= DAG.MinOccupancy)
747 return false;
748
749 LLVM_DEBUG(
750 dbgs() << "Retrying function scheduling with lowest recorded occupancy "
751 << DAG.MinOccupancy << ".\n");
752 return true;
753}
754
755bool PreRARematStage::initGCNSchedStage() {
756 if (!GCNSchedStage::initGCNSchedStage())
757 return false;
758
759 if (DAG.RegionsWithMinOcc.none() || DAG.Regions.size() == 1)
760 return false;
761
762 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
763 // Check maximum occupancy
764 if (ST.computeOccupancy(F: MF.getFunction(), LDSSize: MFI.getLDSSize()) ==
765 DAG.MinOccupancy)
766 return false;
767
768 // FIXME: This pass will invalidate cached MBBLiveIns for regions
769 // inbetween the defs and region we sinked the def to. Cached pressure
770 // for regions where a def is sinked from will also be invalidated. Will
771 // need to be fixed if there is another pass after this pass.
772 assert(!S.hasNextStage());
773
774 collectRematerializableInstructions();
775 if (RematerializableInsts.empty() || !sinkTriviallyRematInsts(ST, TII))
776 return false;
777
778 LLVM_DEBUG(
779 dbgs() << "Retrying function scheduling with improved occupancy of "
780 << DAG.MinOccupancy << " from rematerializing\n");
781 return true;
782}
783
784void GCNSchedStage::finalizeGCNSchedStage() {
785 DAG.finishBlock();
786 LLVM_DEBUG(dbgs() << "Ending scheduling stage: " << StageID << "\n");
787}
788
789void UnclusteredHighRPStage::finalizeGCNSchedStage() {
790 SavedMutations.swap(x&: DAG.Mutations);
791 S.SGPRLimitBias = S.VGPRLimitBias = 0;
792 if (DAG.MinOccupancy > InitialOccupancy) {
793 for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX)
794 DAG.RegionsWithMinOcc[IDX] =
795 DAG.Pressure[IDX].getOccupancy(ST: DAG.ST) == DAG.MinOccupancy;
796
797 LLVM_DEBUG(dbgs() << StageID
798 << " stage successfully increased occupancy to "
799 << DAG.MinOccupancy << '\n');
800 }
801
802 GCNSchedStage::finalizeGCNSchedStage();
803}
804
805bool GCNSchedStage::initGCNRegion() {
806 // Check whether this new region is also a new block.
807 if (DAG.RegionBegin->getParent() != CurrentMBB)
808 setupNewBlock();
809
810 unsigned NumRegionInstrs = std::distance(first: DAG.begin(), last: DAG.end());
811 DAG.enterRegion(bb: CurrentMBB, begin: DAG.begin(), end: DAG.end(), regioninstrs: NumRegionInstrs);
812
813 // Skip empty scheduling regions (0 or 1 schedulable instructions).
814 if (DAG.begin() == DAG.end() || DAG.begin() == std::prev(x: DAG.end()))
815 return false;
816
817 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
818 LLVM_DEBUG(dbgs() << MF.getName() << ":" << printMBBReference(*CurrentMBB)
819 << " " << CurrentMBB->getName()
820 << "\n From: " << *DAG.begin() << " To: ";
821 if (DAG.RegionEnd != CurrentMBB->end()) dbgs() << *DAG.RegionEnd;
822 else dbgs() << "End";
823 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
824
825 // Save original instruction order before scheduling for possible revert.
826 Unsched.clear();
827 Unsched.reserve(n: DAG.NumRegionInstrs);
828 if (StageID == GCNSchedStageID::OccInitialSchedule ||
829 StageID == GCNSchedStageID::ILPInitialSchedule) {
830 for (auto &I : DAG) {
831 Unsched.push_back(x: &I);
832 if (I.getOpcode() == AMDGPU::SCHED_GROUP_BARRIER ||
833 I.getOpcode() == AMDGPU::IGLP_OPT)
834 DAG.RegionsWithIGLPInstrs[RegionIdx] = true;
835 }
836 } else {
837 for (auto &I : DAG)
838 Unsched.push_back(x: &I);
839 }
840
841 PressureBefore = DAG.Pressure[RegionIdx];
842
843 LLVM_DEBUG(
844 dbgs() << "Pressure before scheduling:\nRegion live-ins:"
845 << print(DAG.LiveIns[RegionIdx], DAG.MRI)
846 << "Region live-in pressure: "
847 << print(llvm::getRegPressure(DAG.MRI, DAG.LiveIns[RegionIdx]))
848 << "Region register pressure: " << print(PressureBefore));
849
850 S.HasHighPressure = false;
851 S.KnownExcessRP = isRegionWithExcessRP();
852
853 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
854 StageID != GCNSchedStageID::UnclusteredHighRPReschedule) {
855 SavedMutations.clear();
856 SavedMutations.swap(x&: DAG.Mutations);
857 bool IsInitialStage = StageID == GCNSchedStageID::OccInitialSchedule ||
858 StageID == GCNSchedStageID::ILPInitialSchedule;
859 DAG.addMutation(Mutation: createIGroupLPDAGMutation(
860 Phase: IsInitialStage ? AMDGPU::SchedulingPhase::Initial
861 : AMDGPU::SchedulingPhase::PreRAReentry));
862 }
863
864 return true;
865}
866
867bool UnclusteredHighRPStage::initGCNRegion() {
868 // Only reschedule regions with the minimum occupancy or regions that may have
869 // spilling (excess register pressure).
870 if ((!DAG.RegionsWithMinOcc[RegionIdx] ||
871 DAG.MinOccupancy <= InitialOccupancy) &&
872 !DAG.RegionsWithExcessRP[RegionIdx])
873 return false;
874
875 return GCNSchedStage::initGCNRegion();
876}
877
878bool ClusteredLowOccStage::initGCNRegion() {
879 // We may need to reschedule this region if it wasn't rescheduled in the last
880 // stage, or if we found it was testing critical register pressure limits in
881 // the unclustered reschedule stage. The later is because we may not have been
882 // able to raise the min occupancy in the previous stage so the region may be
883 // overly constrained even if it was already rescheduled.
884 if (!DAG.RegionsWithHighRP[RegionIdx])
885 return false;
886
887 return GCNSchedStage::initGCNRegion();
888}
889
890bool PreRARematStage::initGCNRegion() {
891 if (!DAG.RescheduleRegions[RegionIdx])
892 return false;
893
894 return GCNSchedStage::initGCNRegion();
895}
896
897void GCNSchedStage::setupNewBlock() {
898 if (CurrentMBB)
899 DAG.finishBlock();
900
901 CurrentMBB = DAG.RegionBegin->getParent();
902 DAG.startBlock(bb: CurrentMBB);
903 // Get real RP for the region if it hasn't be calculated before. After the
904 // initial schedule stage real RP will be collected after scheduling.
905 if (StageID == GCNSchedStageID::OccInitialSchedule ||
906 StageID == GCNSchedStageID::ILPInitialSchedule)
907 DAG.computeBlockPressure(RegionIdx, MBB: CurrentMBB);
908}
909
910void GCNSchedStage::finalizeGCNRegion() {
911 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
912 DAG.RescheduleRegions[RegionIdx] = false;
913 if (S.HasHighPressure)
914 DAG.RegionsWithHighRP[RegionIdx] = true;
915
916 // Revert scheduling if we have dropped occupancy or there is some other
917 // reason that the original schedule is better.
918 checkScheduling();
919
920 if (DAG.RegionsWithIGLPInstrs[RegionIdx] &&
921 StageID != GCNSchedStageID::UnclusteredHighRPReschedule)
922 SavedMutations.swap(x&: DAG.Mutations);
923
924 DAG.exitRegion();
925 RegionIdx++;
926}
927
928void GCNSchedStage::checkScheduling() {
929 // Check the results of scheduling.
930 PressureAfter = DAG.getRealRegPressure(RegionIdx);
931 LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
932 LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
933
934 if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
935 PressureAfter.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) {
936 DAG.Pressure[RegionIdx] = PressureAfter;
937 DAG.RegionsWithMinOcc[RegionIdx] =
938 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
939
940 // Early out if we have achieved the occupancy target.
941 LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
942 return;
943 }
944
945 unsigned TargetOccupancy =
946 std::min(a: S.getTargetOccupancy(), b: ST.getOccupancyWithLocalMemSize(MF));
947 unsigned WavesAfter =
948 std::min(a: TargetOccupancy, b: PressureAfter.getOccupancy(ST));
949 unsigned WavesBefore =
950 std::min(a: TargetOccupancy, b: PressureBefore.getOccupancy(ST));
951 LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
952 << ", after " << WavesAfter << ".\n");
953
954 // We may not be able to keep the current target occupancy because of the just
955 // scheduled region. We might still be able to revert scheduling if the
956 // occupancy before was higher, or if the current schedule has register
957 // pressure higher than the excess limits which could lead to more spilling.
958 unsigned NewOccupancy = std::max(a: WavesAfter, b: WavesBefore);
959
960 // Allow memory bound functions to drop to 4 waves if not limited by an
961 // attribute.
962 if (WavesAfter < WavesBefore && WavesAfter < DAG.MinOccupancy &&
963 WavesAfter >= MFI.getMinAllowedOccupancy()) {
964 LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
965 << MFI.getMinAllowedOccupancy() << " waves\n");
966 NewOccupancy = WavesAfter;
967 }
968
969 if (NewOccupancy < DAG.MinOccupancy) {
970 DAG.MinOccupancy = NewOccupancy;
971 MFI.limitOccupancy(Limit: DAG.MinOccupancy);
972 DAG.RegionsWithMinOcc.reset();
973 LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
974 << DAG.MinOccupancy << ".\n");
975 }
976 // The maximum number of arch VGPR on non-unified register file, or the
977 // maximum VGPR + AGPR in the unified register file case.
978 unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
979 // The maximum number of arch VGPR for both unified and non-unified register
980 // file.
981 unsigned MaxArchVGPRs = std::min(a: MaxVGPRs, b: ST.getAddressableNumArchVGPRs());
982 unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
983
984 if (PressureAfter.getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts()) > MaxVGPRs ||
985 PressureAfter.getVGPRNum(UnifiedVGPRFile: false) > MaxArchVGPRs ||
986 PressureAfter.getAGPRNum() > MaxArchVGPRs ||
987 PressureAfter.getSGPRNum() > MaxSGPRs) {
988 DAG.RescheduleRegions[RegionIdx] = true;
989 DAG.RegionsWithHighRP[RegionIdx] = true;
990 DAG.RegionsWithExcessRP[RegionIdx] = true;
991 }
992
993 // Revert if this region's schedule would cause a drop in occupancy or
994 // spilling.
995 if (shouldRevertScheduling(WavesAfter)) {
996 revertScheduling();
997 } else {
998 DAG.Pressure[RegionIdx] = PressureAfter;
999 DAG.RegionsWithMinOcc[RegionIdx] =
1000 PressureAfter.getOccupancy(ST) == DAG.MinOccupancy;
1001 }
1002}
1003
1004unsigned
1005GCNSchedStage::computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
1006 DenseMap<unsigned, unsigned> &ReadyCycles,
1007 const TargetSchedModel &SM) {
1008 unsigned ReadyCycle = CurrCycle;
1009 for (auto &D : SU.Preds) {
1010 if (D.isAssignedRegDep()) {
1011 MachineInstr *DefMI = D.getSUnit()->getInstr();
1012 unsigned Latency = SM.computeInstrLatency(MI: DefMI);
1013 unsigned DefReady = ReadyCycles[DAG.getSUnit(MI: DefMI)->NodeNum];
1014 ReadyCycle = std::max(a: ReadyCycle, b: DefReady + Latency);
1015 }
1016 }
1017 ReadyCycles[SU.NodeNum] = ReadyCycle;
1018 return ReadyCycle;
1019}
1020
1021#ifndef NDEBUG
1022struct EarlierIssuingCycle {
1023 bool operator()(std::pair<MachineInstr *, unsigned> A,
1024 std::pair<MachineInstr *, unsigned> B) const {
1025 return A.second < B.second;
1026 }
1027};
1028
1029static void printScheduleModel(std::set<std::pair<MachineInstr *, unsigned>,
1030 EarlierIssuingCycle> &ReadyCycles) {
1031 if (ReadyCycles.empty())
1032 return;
1033 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1034 dbgs() << "\n################## Schedule time ReadyCycles for MBB : " << BBNum
1035 << " ##################\n# Cycle #\t\t\tInstruction "
1036 " "
1037 " \n";
1038 unsigned IPrev = 1;
1039 for (auto &I : ReadyCycles) {
1040 if (I.second > IPrev + 1)
1041 dbgs() << "****************************** BUBBLE OF " << I.second - IPrev
1042 << " CYCLES DETECTED ******************************\n\n";
1043 dbgs() << "[ " << I.second << " ] : " << *I.first << "\n";
1044 IPrev = I.second;
1045 }
1046}
1047#endif
1048
1049ScheduleMetrics
1050GCNSchedStage::getScheduleMetrics(const std::vector<SUnit> &InputSchedule) {
1051#ifndef NDEBUG
1052 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1053 ReadyCyclesSorted;
1054#endif
1055 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1056 unsigned SumBubbles = 0;
1057 DenseMap<unsigned, unsigned> ReadyCycles;
1058 unsigned CurrCycle = 0;
1059 for (auto &SU : InputSchedule) {
1060 unsigned ReadyCycle =
1061 computeSUnitReadyCycle(SU, CurrCycle, ReadyCycles, SM);
1062 SumBubbles += ReadyCycle - CurrCycle;
1063#ifndef NDEBUG
1064 ReadyCyclesSorted.insert(x: std::make_pair(x: SU.getInstr(), y&: ReadyCycle));
1065#endif
1066 CurrCycle = ++ReadyCycle;
1067 }
1068#ifndef NDEBUG
1069 LLVM_DEBUG(
1070 printScheduleModel(ReadyCyclesSorted);
1071 dbgs() << "\n\t"
1072 << "Metric: "
1073 << (SumBubbles
1074 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1075 : 1)
1076 << "\n\n");
1077#endif
1078
1079 return ScheduleMetrics(CurrCycle, SumBubbles);
1080}
1081
1082ScheduleMetrics
1083GCNSchedStage::getScheduleMetrics(const GCNScheduleDAGMILive &DAG) {
1084#ifndef NDEBUG
1085 std::set<std::pair<MachineInstr *, unsigned>, EarlierIssuingCycle>
1086 ReadyCyclesSorted;
1087#endif
1088 const TargetSchedModel &SM = ST.getInstrInfo()->getSchedModel();
1089 unsigned SumBubbles = 0;
1090 DenseMap<unsigned, unsigned> ReadyCycles;
1091 unsigned CurrCycle = 0;
1092 for (auto &MI : DAG) {
1093 SUnit *SU = DAG.getSUnit(MI: &MI);
1094 if (!SU)
1095 continue;
1096 unsigned ReadyCycle =
1097 computeSUnitReadyCycle(SU: *SU, CurrCycle, ReadyCycles, SM);
1098 SumBubbles += ReadyCycle - CurrCycle;
1099#ifndef NDEBUG
1100 ReadyCyclesSorted.insert(x: std::make_pair(x: SU->getInstr(), y&: ReadyCycle));
1101#endif
1102 CurrCycle = ++ReadyCycle;
1103 }
1104#ifndef NDEBUG
1105 LLVM_DEBUG(
1106 printScheduleModel(ReadyCyclesSorted);
1107 dbgs() << "\n\t"
1108 << "Metric: "
1109 << (SumBubbles
1110 ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1111 : 1)
1112 << "\n\n");
1113#endif
1114
1115 return ScheduleMetrics(CurrCycle, SumBubbles);
1116}
1117
1118bool GCNSchedStage::shouldRevertScheduling(unsigned WavesAfter) {
1119 if (WavesAfter < DAG.MinOccupancy)
1120 return true;
1121
1122 return false;
1123}
1124
1125bool OccInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1126 if (PressureAfter == PressureBefore)
1127 return false;
1128
1129 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1130 return true;
1131
1132 if (mayCauseSpilling(WavesAfter))
1133 return true;
1134
1135 return false;
1136}
1137
1138bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
1139 // If RP is not reduced in the unclustered reschedule stage, revert to the
1140 // old schedule.
1141 if ((WavesAfter <= PressureBefore.getOccupancy(ST) &&
1142 mayCauseSpilling(WavesAfter)) ||
1143 GCNSchedStage::shouldRevertScheduling(WavesAfter)) {
1144 LLVM_DEBUG(dbgs() << "Unclustered reschedule did not help.\n");
1145 return true;
1146 }
1147
1148 // Do not attempt to relax schedule even more if we are already spilling.
1149 if (isRegionWithExcessRP())
1150 return false;
1151
1152 LLVM_DEBUG(
1153 dbgs()
1154 << "\n\t *** In shouldRevertScheduling ***\n"
1155 << " *********** BEFORE UnclusteredHighRPStage ***********\n");
1156 ScheduleMetrics MBefore =
1157 getScheduleMetrics(InputSchedule: DAG.SUnits);
1158 LLVM_DEBUG(
1159 dbgs()
1160 << "\n *********** AFTER UnclusteredHighRPStage ***********\n");
1161 ScheduleMetrics MAfter = getScheduleMetrics(DAG);
1162 unsigned OldMetric = MBefore.getMetric();
1163 unsigned NewMetric = MAfter.getMetric();
1164 unsigned WavesBefore =
1165 std::min(a: S.getTargetOccupancy(), b: PressureBefore.getOccupancy(ST));
1166 unsigned Profit =
1167 ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1168 ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1169 NewMetric) /
1170 ScheduleMetrics::ScaleFactor;
1171 LLVM_DEBUG(dbgs() << "\tMetric before " << MBefore << "\tMetric after "
1172 << MAfter << "Profit: " << Profit << "\n");
1173 return Profit < ScheduleMetrics::ScaleFactor;
1174}
1175
1176bool ClusteredLowOccStage::shouldRevertScheduling(unsigned WavesAfter) {
1177 if (PressureAfter == PressureBefore)
1178 return false;
1179
1180 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1181 return true;
1182
1183 if (mayCauseSpilling(WavesAfter))
1184 return true;
1185
1186 return false;
1187}
1188
1189bool PreRARematStage::shouldRevertScheduling(unsigned WavesAfter) {
1190 if (GCNSchedStage::shouldRevertScheduling(WavesAfter))
1191 return true;
1192
1193 if (mayCauseSpilling(WavesAfter))
1194 return true;
1195
1196 return false;
1197}
1198
1199bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1200 if (mayCauseSpilling(WavesAfter))
1201 return true;
1202
1203 return false;
1204}
1205
1206bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
1207 if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
1208 !PressureAfter.less(MF, O: PressureBefore)) {
1209 LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
1210 return true;
1211 }
1212
1213 return false;
1214}
1215
1216void GCNSchedStage::revertScheduling() {
1217 DAG.RegionsWithMinOcc[RegionIdx] =
1218 PressureBefore.getOccupancy(ST) == DAG.MinOccupancy;
1219 LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n");
1220 DAG.RescheduleRegions[RegionIdx] =
1221 S.hasNextStage() &&
1222 S.getNextStage() != GCNSchedStageID::UnclusteredHighRPReschedule;
1223 DAG.RegionEnd = DAG.RegionBegin;
1224 int SkippedDebugInstr = 0;
1225 for (MachineInstr *MI : Unsched) {
1226 if (MI->isDebugInstr()) {
1227 ++SkippedDebugInstr;
1228 continue;
1229 }
1230
1231 if (MI->getIterator() != DAG.RegionEnd) {
1232 DAG.BB->remove(I: MI);
1233 DAG.BB->insert(I: DAG.RegionEnd, MI);
1234 if (!MI->isDebugInstr())
1235 DAG.LIS->handleMove(MI&: *MI, UpdateFlags: true);
1236 }
1237
1238 // Reset read-undef flags and update them later.
1239 for (auto &Op : MI->all_defs())
1240 Op.setIsUndef(false);
1241 RegisterOperands RegOpers;
1242 RegOpers.collect(MI: *MI, TRI: *DAG.TRI, MRI: DAG.MRI, TrackLaneMasks: DAG.ShouldTrackLaneMasks, IgnoreDead: false);
1243 if (!MI->isDebugInstr()) {
1244 if (DAG.ShouldTrackLaneMasks) {
1245 // Adjust liveness and add missing dead+read-undef flags.
1246 SlotIndex SlotIdx = DAG.LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1247 RegOpers.adjustLaneLiveness(LIS: *DAG.LIS, MRI: DAG.MRI, Pos: SlotIdx, AddFlagsMI: MI);
1248 } else {
1249 // Adjust for missing dead-def flags.
1250 RegOpers.detectDeadDefs(MI: *MI, LIS: *DAG.LIS);
1251 }
1252 }
1253 DAG.RegionEnd = MI->getIterator();
1254 ++DAG.RegionEnd;
1255 LLVM_DEBUG(dbgs() << "Scheduling " << *MI);
1256 }
1257
1258 // After reverting schedule, debug instrs will now be at the end of the block
1259 // and RegionEnd will point to the first debug instr. Increment RegionEnd
1260 // pass debug instrs to the actual end of the scheduling region.
1261 while (SkippedDebugInstr-- > 0)
1262 ++DAG.RegionEnd;
1263
1264 // If Unsched.front() instruction is a debug instruction, this will actually
1265 // shrink the region since we moved all debug instructions to the end of the
1266 // block. Find the first instruction that is not a debug instruction.
1267 DAG.RegionBegin = Unsched.front()->getIterator();
1268 if (DAG.RegionBegin->isDebugInstr()) {
1269 for (MachineInstr *MI : Unsched) {
1270 if (MI->isDebugInstr())
1271 continue;
1272 DAG.RegionBegin = MI->getIterator();
1273 break;
1274 }
1275 }
1276
1277 // Then move the debug instructions back into their correct place and set
1278 // RegionBegin and RegionEnd if needed.
1279 DAG.placeDebugValues();
1280
1281 DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
1282}
1283
1284void PreRARematStage::collectRematerializableInstructions() {
1285 const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
1286 for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
1287 Register Reg = Register::index2VirtReg(Index: I);
1288 if (!DAG.LIS->hasInterval(Reg))
1289 continue;
1290
1291 // TODO: Handle AGPR and SGPR rematerialization
1292 if (!SRI->isVGPRClass(RC: DAG.MRI.getRegClass(Reg)) ||
1293 !DAG.MRI.hasOneDef(RegNo: Reg) || !DAG.MRI.hasOneNonDBGUse(RegNo: Reg))
1294 continue;
1295
1296 MachineOperand *Op = DAG.MRI.getOneDef(Reg);
1297 MachineInstr *Def = Op->getParent();
1298 if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(MI: *Def))
1299 continue;
1300
1301 MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(RegNo: Reg);
1302 if (Def->getParent() == UseI->getParent())
1303 continue;
1304
1305 // We are only collecting defs that are defined in another block and are
1306 // live-through or used inside regions at MinOccupancy. This means that the
1307 // register must be in the live-in set for the region.
1308 bool AddedToRematList = false;
1309 for (unsigned I = 0, E = DAG.Regions.size(); I != E; ++I) {
1310 auto It = DAG.LiveIns[I].find(Val: Reg);
1311 if (It != DAG.LiveIns[I].end() && !It->second.none()) {
1312 if (DAG.RegionsWithMinOcc[I]) {
1313 RematerializableInsts[I][Def] = UseI;
1314 AddedToRematList = true;
1315 }
1316
1317 // Collect regions with rematerializable reg as live-in to avoid
1318 // searching later when updating RP.
1319 RematDefToLiveInRegions[Def].push_back(Elt: I);
1320 }
1321 }
1322 if (!AddedToRematList)
1323 RematDefToLiveInRegions.erase(Val: Def);
1324 }
1325}
1326
1327bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1328 const TargetInstrInfo *TII) {
1329 // Temporary copies of cached variables we will be modifying and replacing if
1330 // sinking succeeds.
1331 SmallVector<
1332 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>, 32>
1333 NewRegions;
1334 DenseMap<unsigned, GCNRPTracker::LiveRegSet> NewLiveIns;
1335 DenseMap<unsigned, GCNRegPressure> NewPressure;
1336 BitVector NewRescheduleRegions;
1337 LiveIntervals *LIS = DAG.LIS;
1338
1339 NewRegions.resize(N: DAG.Regions.size());
1340 NewRescheduleRegions.resize(N: DAG.Regions.size());
1341
1342 // Collect only regions that has a rematerializable def as a live-in.
1343 SmallSet<unsigned, 16> ImpactedRegions;
1344 for (const auto &It : RematDefToLiveInRegions)
1345 ImpactedRegions.insert(I: It.second.begin(), E: It.second.end());
1346
1347 // Make copies of register pressure and live-ins cache that will be updated
1348 // as we rematerialize.
1349 for (auto Idx : ImpactedRegions) {
1350 NewPressure[Idx] = DAG.Pressure[Idx];
1351 NewLiveIns[Idx] = DAG.LiveIns[Idx];
1352 }
1353 NewRegions = DAG.Regions;
1354 NewRescheduleRegions.reset();
1355
1356 DenseMap<MachineInstr *, MachineInstr *> InsertedMIToOldDef;
1357 bool Improved = false;
1358 for (auto I : ImpactedRegions) {
1359 if (!DAG.RegionsWithMinOcc[I])
1360 continue;
1361
1362 Improved = false;
1363 int VGPRUsage = NewPressure[I].getVGPRNum(UnifiedVGPRFile: ST.hasGFX90AInsts());
1364 int SGPRUsage = NewPressure[I].getSGPRNum();
1365
1366 // TODO: Handle occupancy drop due to AGPR and SGPR.
1367 // Check if cause of occupancy drop is due to VGPR usage and not SGPR.
1368 if (ST.getOccupancyWithNumSGPRs(SGPRs: SGPRUsage) == DAG.MinOccupancy)
1369 break;
1370
1371 // The occupancy of this region could have been improved by a previous
1372 // iteration's sinking of defs.
1373 if (NewPressure[I].getOccupancy(ST) > DAG.MinOccupancy) {
1374 NewRescheduleRegions[I] = true;
1375 Improved = true;
1376 continue;
1377 }
1378
1379 // First check if we have enough trivially rematerializable instructions to
1380 // improve occupancy. Optimistically assume all instructions we are able to
1381 // sink decreased RP.
1382 int TotalSinkableRegs = 0;
1383 for (const auto &It : RematerializableInsts[I]) {
1384 MachineInstr *Def = It.first;
1385 Register DefReg = Def->getOperand(i: 0).getReg();
1386 TotalSinkableRegs +=
1387 SIRegisterInfo::getNumCoveredRegs(LM: NewLiveIns[I][DefReg]);
1388 }
1389 int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1390 unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRs: VGPRsAfterSink);
1391 // If in the most optimistic scenario, we cannot improve occupancy, then do
1392 // not attempt to sink any instructions.
1393 if (OptimisticOccupancy <= DAG.MinOccupancy)
1394 break;
1395
1396 unsigned ImproveOccupancy = 0;
1397 SmallVector<MachineInstr *, 4> SinkedDefs;
1398 for (auto &It : RematerializableInsts[I]) {
1399 MachineInstr *Def = It.first;
1400 MachineBasicBlock::iterator InsertPos =
1401 MachineBasicBlock::iterator(It.second);
1402 Register Reg = Def->getOperand(i: 0).getReg();
1403 // Rematerialize MI to its use block. Since we are only rematerializing
1404 // instructions that do not have any virtual reg uses, we do not need to
1405 // call LiveRangeEdit::allUsesAvailableAt() and
1406 // LiveRangeEdit::canRematerializeAt().
1407 TII->reMaterialize(MBB&: *InsertPos->getParent(), MI: InsertPos, DestReg: Reg,
1408 SubIdx: Def->getOperand(i: 0).getSubReg(), Orig: *Def, TRI: *DAG.TRI);
1409 MachineInstr *NewMI = &*std::prev(x: InsertPos);
1410 LIS->InsertMachineInstrInMaps(MI&: *NewMI);
1411 LIS->removeInterval(Reg);
1412 LIS->createAndComputeVirtRegInterval(Reg);
1413 InsertedMIToOldDef[NewMI] = Def;
1414
1415 // Update region boundaries in scheduling region we sinked from since we
1416 // may sink an instruction that was at the beginning or end of its region
1417 DAG.updateRegionBoundaries(RegionBoundaries&: NewRegions, MI: Def, /*NewMI =*/nullptr,
1418 /*Removing =*/true);
1419
1420 // Update region boundaries in region we sinked to.
1421 DAG.updateRegionBoundaries(RegionBoundaries&: NewRegions, MI: InsertPos, NewMI);
1422
1423 LaneBitmask PrevMask = NewLiveIns[I][Reg];
1424 // FIXME: Also update cached pressure for where the def was sinked from.
1425 // Update RP for all regions that has this reg as a live-in and remove
1426 // the reg from all regions as a live-in.
1427 for (auto Idx : RematDefToLiveInRegions[Def]) {
1428 NewLiveIns[Idx].erase(Val: Reg);
1429 if (InsertPos->getParent() != DAG.Regions[Idx].first->getParent()) {
1430 // Def is live-through and not used in this block.
1431 NewPressure[Idx].inc(Reg, PrevMask, NewMask: LaneBitmask::getNone(), MRI: DAG.MRI);
1432 } else {
1433 // Def is used and rematerialized into this block.
1434 GCNDownwardRPTracker RPT(*LIS);
1435 auto *NonDbgMI = &*skipDebugInstructionsForward(
1436 It: NewRegions[Idx].first, End: NewRegions[Idx].second);
1437 RPT.reset(MI: *NonDbgMI, LiveRegs: &NewLiveIns[Idx]);
1438 RPT.advance(End: NewRegions[Idx].second);
1439 NewPressure[Idx] = RPT.moveMaxPressure();
1440 }
1441 }
1442
1443 SinkedDefs.push_back(Elt: Def);
1444 ImproveOccupancy = NewPressure[I].getOccupancy(ST);
1445 if (ImproveOccupancy > DAG.MinOccupancy)
1446 break;
1447 }
1448
1449 // Remove defs we just sinked from all regions' list of sinkable defs
1450 for (auto &Def : SinkedDefs)
1451 for (auto TrackedIdx : RematDefToLiveInRegions[Def])
1452 RematerializableInsts[TrackedIdx].erase(Key: Def);
1453
1454 if (ImproveOccupancy <= DAG.MinOccupancy)
1455 break;
1456
1457 NewRescheduleRegions[I] = true;
1458 Improved = true;
1459 }
1460
1461 if (!Improved) {
1462 // Occupancy was not improved for all regions that were at MinOccupancy.
1463 // Undo sinking and remove newly rematerialized instructions.
1464 for (auto &Entry : InsertedMIToOldDef) {
1465 MachineInstr *MI = Entry.first;
1466 MachineInstr *OldMI = Entry.second;
1467 Register Reg = MI->getOperand(i: 0).getReg();
1468 LIS->RemoveMachineInstrFromMaps(MI&: *MI);
1469 MI->eraseFromParent();
1470 OldMI->clearRegisterDeads(Reg);
1471 LIS->removeInterval(Reg);
1472 LIS->createAndComputeVirtRegInterval(Reg);
1473 }
1474 return false;
1475 }
1476
1477 // Occupancy was improved for all regions.
1478 for (auto &Entry : InsertedMIToOldDef) {
1479 MachineInstr *MI = Entry.first;
1480 MachineInstr *OldMI = Entry.second;
1481
1482 // Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1483 DAG.BBLiveInMap.erase(Val: OldMI);
1484
1485 // Remove OldMI and update LIS
1486 Register Reg = MI->getOperand(i: 0).getReg();
1487 LIS->RemoveMachineInstrFromMaps(MI&: *OldMI);
1488 OldMI->eraseFromParent();
1489 LIS->removeInterval(Reg);
1490 LIS->createAndComputeVirtRegInterval(Reg);
1491 }
1492
1493 // Update live-ins, register pressure, and regions caches.
1494 for (auto Idx : ImpactedRegions) {
1495 DAG.LiveIns[Idx] = NewLiveIns[Idx];
1496 DAG.Pressure[Idx] = NewPressure[Idx];
1497 DAG.MBBLiveIns.erase(Val: DAG.Regions[Idx].first->getParent());
1498 }
1499 DAG.Regions = NewRegions;
1500 DAG.RescheduleRegions = NewRescheduleRegions;
1501
1502 SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
1503 MFI.increaseOccupancy(MF, Limit: ++DAG.MinOccupancy);
1504
1505 return true;
1506}
1507
1508// Copied from MachineLICM
1509bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1510 if (!DAG.TII->isTriviallyReMaterializable(MI))
1511 return false;
1512
1513 for (const MachineOperand &MO : MI.all_uses())
1514 if (MO.getReg().isVirtual())
1515 return false;
1516
1517 return true;
1518}
1519
1520// When removing, we will have to check both beginning and ending of the region.
1521// When inserting, we will only have to check if we are inserting NewMI in front
1522// of a scheduling region and do not need to check the ending since we will only
1523// ever be inserting before an already existing MI.
1524void GCNScheduleDAGMILive::updateRegionBoundaries(
1525 SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
1526 MachineBasicBlock::iterator>> &RegionBoundaries,
1527 MachineBasicBlock::iterator MI, MachineInstr *NewMI, bool Removing) {
1528 unsigned I = 0, E = RegionBoundaries.size();
1529 // Search for first region of the block where MI is located
1530 while (I != E && MI->getParent() != RegionBoundaries[I].first->getParent())
1531 ++I;
1532
1533 for (; I != E; ++I) {
1534 if (MI->getParent() != RegionBoundaries[I].first->getParent())
1535 return;
1536
1537 if (Removing && MI == RegionBoundaries[I].first &&
1538 MI == RegionBoundaries[I].second) {
1539 // MI is in a region with size 1, after removing, the region will be
1540 // size 0, set RegionBegin and RegionEnd to pass end of block iterator.
1541 RegionBoundaries[I] =
1542 std::pair(MI->getParent()->end(), MI->getParent()->end());
1543 return;
1544 }
1545 if (MI == RegionBoundaries[I].first) {
1546 if (Removing)
1547 RegionBoundaries[I] =
1548 std::pair(std::next(x: MI), RegionBoundaries[I].second);
1549 else
1550 // Inserted NewMI in front of region, set new RegionBegin to NewMI
1551 RegionBoundaries[I] = std::pair(MachineBasicBlock::iterator(NewMI),
1552 RegionBoundaries[I].second);
1553 return;
1554 }
1555 if (Removing && MI == RegionBoundaries[I].second) {
1556 RegionBoundaries[I] = std::pair(RegionBoundaries[I].first, std::prev(x: MI));
1557 return;
1558 }
1559 }
1560}
1561
1562static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG) {
1563 return std::any_of(
1564 first: DAG->begin(), last: DAG->end(), pred: [](MachineBasicBlock::iterator MI) {
1565 unsigned Opc = MI->getOpcode();
1566 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
1567 });
1568}
1569
1570GCNPostScheduleDAGMILive::GCNPostScheduleDAGMILive(
1571 MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
1572 bool RemoveKillFlags)
1573 : ScheduleDAGMI(C, std::move(S), RemoveKillFlags) {}
1574
1575void GCNPostScheduleDAGMILive::schedule() {
1576 HasIGLPInstrs = hasIGLPInstrs(DAG: this);
1577 if (HasIGLPInstrs) {
1578 SavedMutations.clear();
1579 SavedMutations.swap(x&: Mutations);
1580 addMutation(Mutation: createIGroupLPDAGMutation(Phase: AMDGPU::SchedulingPhase::PostRA));
1581 }
1582
1583 ScheduleDAGMI::schedule();
1584}
1585
1586void GCNPostScheduleDAGMILive::finalizeSchedule() {
1587 if (HasIGLPInstrs)
1588 SavedMutations.swap(x&: Mutations);
1589
1590 ScheduleDAGMI::finalizeSchedule();
1591}
1592

source code of llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp