1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/CodeGen/MachineScheduler.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/PriorityQueue.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/ADT/iterator_range.h"
23#include "llvm/Analysis/AliasAnalysis.h"
24#include "llvm/CodeGen/LiveInterval.h"
25#include "llvm/CodeGen/LiveIntervals.h"
26#include "llvm/CodeGen/MachineBasicBlock.h"
27#include "llvm/CodeGen/MachineDominators.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineFunctionPass.h"
30#include "llvm/CodeGen/MachineInstr.h"
31#include "llvm/CodeGen/MachineLoopInfo.h"
32#include "llvm/CodeGen/MachineOperand.h"
33#include "llvm/CodeGen/MachinePassRegistry.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/CodeGen/RegisterClassInfo.h"
36#include "llvm/CodeGen/RegisterPressure.h"
37#include "llvm/CodeGen/ScheduleDAG.h"
38#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39#include "llvm/CodeGen/ScheduleDAGMutation.h"
40#include "llvm/CodeGen/ScheduleDFS.h"
41#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42#include "llvm/CodeGen/SlotIndexes.h"
43#include "llvm/CodeGen/TargetFrameLowering.h"
44#include "llvm/CodeGen/TargetInstrInfo.h"
45#include "llvm/CodeGen/TargetLowering.h"
46#include "llvm/CodeGen/TargetPassConfig.h"
47#include "llvm/CodeGen/TargetRegisterInfo.h"
48#include "llvm/CodeGen/TargetSchedule.h"
49#include "llvm/CodeGen/TargetSubtargetInfo.h"
50#include "llvm/CodeGenTypes/MachineValueType.h"
51#include "llvm/Config/llvm-config.h"
52#include "llvm/InitializePasses.h"
53#include "llvm/MC/LaneBitmask.h"
54#include "llvm/Pass.h"
55#include "llvm/Support/CommandLine.h"
56#include "llvm/Support/Compiler.h"
57#include "llvm/Support/Debug.h"
58#include "llvm/Support/ErrorHandling.h"
59#include "llvm/Support/GraphWriter.h"
60#include "llvm/Support/raw_ostream.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
80cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81 cl::desc("Force top-down list scheduling"));
82cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83 cl::desc("Force bottom-up list scheduling"));
84namespace MISchedPostRASched {
85enum Direction {
86 TopDown,
87 BottomUp,
88 Bidirectional,
89};
90} // end namespace MISchedPostRASched
91cl::opt<MISchedPostRASched::Direction> PostRADirection(
92 "misched-postra-direction", cl::Hidden,
93 cl::desc("Post reg-alloc list scheduling direction"),
94 // Default to top-down because it was implemented first and existing targets
95 // expect that behavior by default.
96 cl::init(Val: MISchedPostRASched::TopDown),
97 cl::values(
98 clEnumValN(MISchedPostRASched::TopDown, "topdown",
99 "Force top-down post reg-alloc list scheduling"),
100 clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
101 "Force bottom-up post reg-alloc list scheduling"),
102 clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional",
103 "Force bidirectional post reg-alloc list scheduling")));
104cl::opt<bool>
105DumpCriticalPathLength("misched-dcpl", cl::Hidden,
106 cl::desc("Print critical path length to stdout"));
107
108cl::opt<bool> VerifyScheduling(
109 "verify-misched", cl::Hidden,
110 cl::desc("Verify machine instrs before and after machine scheduling"));
111
112#ifndef NDEBUG
113cl::opt<bool> ViewMISchedDAGs(
114 "view-misched-dags", cl::Hidden,
115 cl::desc("Pop up a window to show MISched dags after they are processed"));
116cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117 cl::desc("Print schedule DAGs"));
118cl::opt<bool> MISchedDumpReservedCycles(
119 "misched-dump-reserved-cycles", cl::Hidden, cl::init(Val: false),
120 cl::desc("Dump resource usage at schedule boundary."));
121cl::opt<bool> MischedDetailResourceBooking(
122 "misched-detail-resource-booking", cl::Hidden, cl::init(Val: false),
123 cl::desc("Show details of invoking getNextResoufceCycle."));
124#else
125const bool ViewMISchedDAGs = false;
126const bool PrintDAGs = false;
127const bool MischedDetailResourceBooking = false;
128#ifdef LLVM_ENABLE_DUMP
129const bool MISchedDumpReservedCycles = false;
130#endif // LLVM_ENABLE_DUMP
131#endif // NDEBUG
132
133} // end namespace llvm
134
135#ifndef NDEBUG
136/// In some situations a few uninteresting nodes depend on nearly all other
137/// nodes in the graph, provide a cutoff to hide them.
138static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
140
141static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
142 cl::desc("Stop scheduling after N instructions"), cl::init(Val: ~0U));
143
144static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
145 cl::desc("Only schedule this function"));
146static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147 cl::desc("Only schedule this MBB#"));
148#endif // NDEBUG
149
150/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151/// size of the ready lists.
152static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
153 cl::desc("Limit ready list to N instructions"), cl::init(Val: 256));
154
155static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156 cl::desc("Enable register pressure scheduling."), cl::init(Val: true));
157
158static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159 cl::desc("Enable cyclic critical path analysis."), cl::init(Val: true));
160
161static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
162 cl::desc("Enable memop clustering."),
163 cl::init(Val: true));
164static cl::opt<bool>
165 ForceFastCluster("force-fast-cluster", cl::Hidden,
166 cl::desc("Switch to fast cluster algorithm with the lost "
167 "of some fusion opportunities"),
168 cl::init(Val: false));
169static cl::opt<unsigned>
170 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171 cl::desc("The threshold for fast cluster"),
172 cl::init(Val: 1000));
173
174#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
175static cl::opt<bool> MISchedDumpScheduleTrace(
176 "misched-dump-schedule-trace", cl::Hidden, cl::init(Val: false),
177 cl::desc("Dump resource usage at schedule boundary."));
178static cl::opt<unsigned>
179 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180 cl::desc("Set width of the columns with "
181 "the resources and schedule units"),
182 cl::init(Val: 19));
183static cl::opt<unsigned>
184 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185 cl::desc("Set width of the columns showing resource booking."),
186 cl::init(Val: 5));
187static cl::opt<bool> MISchedSortResourcesInTrace(
188 "misched-sort-resources-in-trace", cl::Hidden, cl::init(Val: true),
189 cl::desc("Sort the resources printed in the dump trace"));
190#endif
191
192static cl::opt<unsigned>
193 MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194 cl::desc("Number of intervals to track"), cl::init(Val: 10));
195
196// DAG subtrees must have at least this many nodes.
197static const unsigned MinSubtreeSize = 8;
198
199// Pin the vtables to this file.
200void MachineSchedStrategy::anchor() {}
201
202void ScheduleDAGMutation::anchor() {}
203
204//===----------------------------------------------------------------------===//
205// Machine Instruction Scheduling Pass and Registry
206//===----------------------------------------------------------------------===//
207
208MachineSchedContext::MachineSchedContext() {
209 RegClassInfo = new RegisterClassInfo();
210}
211
212MachineSchedContext::~MachineSchedContext() {
213 delete RegClassInfo;
214}
215
216namespace {
217
218/// Base class for a machine scheduler class that can run at any point.
219class MachineSchedulerBase : public MachineSchedContext,
220 public MachineFunctionPass {
221public:
222 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
223
224 void print(raw_ostream &O, const Module* = nullptr) const override;
225
226protected:
227 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228};
229
230/// MachineScheduler runs after coalescing and before register allocation.
231class MachineScheduler : public MachineSchedulerBase {
232public:
233 MachineScheduler();
234
235 void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237 bool runOnMachineFunction(MachineFunction&) override;
238
239 static char ID; // Class identification, replacement for typeinfo
240
241protected:
242 ScheduleDAGInstrs *createMachineScheduler();
243};
244
245/// PostMachineScheduler runs after shortly before code emission.
246class PostMachineScheduler : public MachineSchedulerBase {
247public:
248 PostMachineScheduler();
249
250 void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252 bool runOnMachineFunction(MachineFunction&) override;
253
254 static char ID; // Class identification, replacement for typeinfo
255
256protected:
257 ScheduleDAGInstrs *createPostMachineScheduler();
258};
259
260} // end anonymous namespace
261
262char MachineScheduler::ID = 0;
263
264char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
266INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
267 "Machine Instruction Scheduler", false, false)
268INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
269INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
270INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
271INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
272INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
273INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
274 "Machine Instruction Scheduler", false, false)
275
276MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
277 initializeMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
278}
279
280void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281 AU.setPreservesCFG();
282 AU.addRequired<MachineDominatorTree>();
283 AU.addRequired<MachineLoopInfo>();
284 AU.addRequired<AAResultsWrapperPass>();
285 AU.addRequired<TargetPassConfig>();
286 AU.addRequired<SlotIndexes>();
287 AU.addPreserved<SlotIndexes>();
288 AU.addRequired<LiveIntervals>();
289 AU.addPreserved<LiveIntervals>();
290 MachineFunctionPass::getAnalysisUsage(AU);
291}
292
293char PostMachineScheduler::ID = 0;
294
295char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298 "PostRA Machine Instruction Scheduler", false, false)
299INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
300INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
301INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
302INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
303 "PostRA Machine Instruction Scheduler", false, false)
304
305PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
306 initializePostMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
307}
308
309void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310 AU.setPreservesCFG();
311 AU.addRequired<MachineDominatorTree>();
312 AU.addRequired<MachineLoopInfo>();
313 AU.addRequired<AAResultsWrapperPass>();
314 AU.addRequired<TargetPassConfig>();
315 MachineFunctionPass::getAnalysisUsage(AU);
316}
317
318MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
319 MachineSchedRegistry::Registry;
320
321/// A dummy default scheduler factory indicates whether the scheduler
322/// is overridden on the command line.
323static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
324 return nullptr;
325}
326
327/// MachineSchedOpt allows command line selection of the scheduler.
328static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
329 RegisterPassParser<MachineSchedRegistry>>
330MachineSchedOpt("misched",
331 cl::init(Val: &useDefaultMachineSched), cl::Hidden,
332 cl::desc("Machine instruction scheduler to use"));
333
334static MachineSchedRegistry
335DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
336 useDefaultMachineSched);
337
338static cl::opt<bool> EnableMachineSched(
339 "enable-misched",
340 cl::desc("Enable the machine instruction scheduling pass."), cl::init(Val: true),
341 cl::Hidden);
342
343static cl::opt<bool> EnablePostRAMachineSched(
344 "enable-post-misched",
345 cl::desc("Enable the post-ra machine instruction scheduling pass."),
346 cl::init(Val: true), cl::Hidden);
347
348/// Decrement this iterator until reaching the top or a non-debug instr.
349static MachineBasicBlock::const_iterator
350priorNonDebug(MachineBasicBlock::const_iterator I,
351 MachineBasicBlock::const_iterator Beg) {
352 assert(I != Beg && "reached the top of the region, cannot decrement");
353 while (--I != Beg) {
354 if (!I->isDebugOrPseudoInstr())
355 break;
356 }
357 return I;
358}
359
360/// Non-const version.
361static MachineBasicBlock::iterator
362priorNonDebug(MachineBasicBlock::iterator I,
363 MachineBasicBlock::const_iterator Beg) {
364 return priorNonDebug(I: MachineBasicBlock::const_iterator(I), Beg)
365 .getNonConstIterator();
366}
367
368/// If this iterator is a debug value, increment until reaching the End or a
369/// non-debug instruction.
370static MachineBasicBlock::const_iterator
371nextIfDebug(MachineBasicBlock::const_iterator I,
372 MachineBasicBlock::const_iterator End) {
373 for(; I != End; ++I) {
374 if (!I->isDebugOrPseudoInstr())
375 break;
376 }
377 return I;
378}
379
380/// Non-const version.
381static MachineBasicBlock::iterator
382nextIfDebug(MachineBasicBlock::iterator I,
383 MachineBasicBlock::const_iterator End) {
384 return nextIfDebug(I: MachineBasicBlock::const_iterator(I), End)
385 .getNonConstIterator();
386}
387
388/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390 // Select the scheduler, or set the default.
391 MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
392 if (Ctor != useDefaultMachineSched)
393 return Ctor(this);
394
395 // Get the default scheduler set by the target for this function.
396 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(C: this);
397 if (Scheduler)
398 return Scheduler;
399
400 // Default to GenericScheduler.
401 return createGenericSchedLive(C: this);
402}
403
404/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405/// the caller. We don't have a command line option to override the postRA
406/// scheduler. The Target must configure it.
407ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408 // Get the postRA scheduler set by the target for this function.
409 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(C: this);
410 if (Scheduler)
411 return Scheduler;
412
413 // Default to GenericScheduler.
414 return createGenericSchedPostRA(C: this);
415}
416
417/// Top-level MachineScheduler pass driver.
418///
419/// Visit blocks in function order. Divide each block into scheduling regions
420/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421/// consistent with the DAG builder, which traverses the interior of the
422/// scheduling regions bottom-up.
423///
424/// This design avoids exposing scheduling boundaries to the DAG builder,
425/// simplifying the DAG builder's support for "special" target instructions.
426/// At the same time the design allows target schedulers to operate across
427/// scheduling boundaries, for example to bundle the boundary instructions
428/// without reordering them. This creates complexity, because the target
429/// scheduler must update the RegionBegin and RegionEnd positions cached by
430/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431/// design would be to split blocks at scheduling boundaries, but LLVM has a
432/// general bias against block splitting purely for implementation simplicity.
433bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434 if (skipFunction(F: mf.getFunction()))
435 return false;
436
437 if (EnableMachineSched.getNumOccurrences()) {
438 if (!EnableMachineSched)
439 return false;
440 } else if (!mf.getSubtarget().enableMachineScheduler())
441 return false;
442
443 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445 // Initialize the context of the pass.
446 MF = &mf;
447 MLI = &getAnalysis<MachineLoopInfo>();
448 MDT = &getAnalysis<MachineDominatorTree>();
449 PassConfig = &getAnalysis<TargetPassConfig>();
450 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452 LIS = &getAnalysis<LiveIntervals>();
453
454 if (VerifyScheduling) {
455 LLVM_DEBUG(LIS->dump());
456 MF->verify(p: this, Banner: "Before machine scheduling.");
457 }
458 RegClassInfo->runOnMachineFunction(MF: *MF);
459
460 // Instantiate the selected scheduler for this target, function, and
461 // optimization level.
462 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
463 ScheduleDAGMI::DumpDirection D;
464 if (ForceTopDown)
465 D = ScheduleDAGMI::DumpDirection::TopDown;
466 else if (ForceBottomUp)
467 D = ScheduleDAGMI::DumpDirection::BottomUp;
468 else
469 D = ScheduleDAGMI::DumpDirection::Bidirectional;
470 Scheduler->setDumpDirection(D);
471 scheduleRegions(Scheduler&: *Scheduler, FixKillFlags: false);
472
473 LLVM_DEBUG(LIS->dump());
474 if (VerifyScheduling)
475 MF->verify(p: this, Banner: "After machine scheduling.");
476 return true;
477}
478
479bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
480 if (skipFunction(F: mf.getFunction()))
481 return false;
482
483 if (EnablePostRAMachineSched.getNumOccurrences()) {
484 if (!EnablePostRAMachineSched)
485 return false;
486 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
487 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
488 return false;
489 }
490 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
491
492 // Initialize the context of the pass.
493 MF = &mf;
494 MLI = &getAnalysis<MachineLoopInfo>();
495 PassConfig = &getAnalysis<TargetPassConfig>();
496 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
497
498 if (VerifyScheduling)
499 MF->verify(p: this, Banner: "Before post machine scheduling.");
500
501 // Instantiate the selected scheduler for this target, function, and
502 // optimization level.
503 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
504 ScheduleDAGMI::DumpDirection D;
505 if (PostRADirection == MISchedPostRASched::TopDown)
506 D = ScheduleDAGMI::DumpDirection::TopDown;
507 else if (PostRADirection == MISchedPostRASched::BottomUp)
508 D = ScheduleDAGMI::DumpDirection::BottomUp;
509 else
510 D = ScheduleDAGMI::DumpDirection::Bidirectional;
511 Scheduler->setDumpDirection(D);
512 scheduleRegions(Scheduler&: *Scheduler, FixKillFlags: true);
513
514 if (VerifyScheduling)
515 MF->verify(p: this, Banner: "After post machine scheduling.");
516 return true;
517}
518
519/// Return true of the given instruction should not be included in a scheduling
520/// region.
521///
522/// MachineScheduler does not currently support scheduling across calls. To
523/// handle calls, the DAG builder needs to be modified to create register
524/// anti/output dependencies on the registers clobbered by the call's regmask
525/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
526/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
527/// the boundary, but there would be no benefit to postRA scheduling across
528/// calls this late anyway.
529static bool isSchedBoundary(MachineBasicBlock::iterator MI,
530 MachineBasicBlock *MBB,
531 MachineFunction *MF,
532 const TargetInstrInfo *TII) {
533 return MI->isCall() || TII->isSchedulingBoundary(MI: *MI, MBB, MF: *MF);
534}
535
536/// A region of an MBB for scheduling.
537namespace {
538struct SchedRegion {
539 /// RegionBegin is the first instruction in the scheduling region, and
540 /// RegionEnd is either MBB->end() or the scheduling boundary after the
541 /// last instruction in the scheduling region. These iterators cannot refer
542 /// to instructions outside of the identified scheduling region because
543 /// those may be reordered before scheduling this region.
544 MachineBasicBlock::iterator RegionBegin;
545 MachineBasicBlock::iterator RegionEnd;
546 unsigned NumRegionInstrs;
547
548 SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
549 unsigned N) :
550 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
551};
552} // end anonymous namespace
553
554using MBBRegionsVector = SmallVector<SchedRegion, 16>;
555
556static void
557getSchedRegions(MachineBasicBlock *MBB,
558 MBBRegionsVector &Regions,
559 bool RegionsTopDown) {
560 MachineFunction *MF = MBB->getParent();
561 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
562
563 MachineBasicBlock::iterator I = nullptr;
564 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565 RegionEnd != MBB->begin(); RegionEnd = I) {
566
567 // Avoid decrementing RegionEnd for blocks with no terminator.
568 if (RegionEnd != MBB->end() ||
569 isSchedBoundary(MI: &*std::prev(x: RegionEnd), MBB: &*MBB, MF, TII)) {
570 --RegionEnd;
571 }
572
573 // The next region starts above the previous region. Look backward in the
574 // instruction stream until we find the nearest boundary.
575 unsigned NumRegionInstrs = 0;
576 I = RegionEnd;
577 for (;I != MBB->begin(); --I) {
578 MachineInstr &MI = *std::prev(x: I);
579 if (isSchedBoundary(MI: &MI, MBB: &*MBB, MF, TII))
580 break;
581 if (!MI.isDebugOrPseudoInstr()) {
582 // MBB::size() uses instr_iterator to count. Here we need a bundle to
583 // count as a single instruction.
584 ++NumRegionInstrs;
585 }
586 }
587
588 // It's possible we found a scheduling region that only has debug
589 // instructions. Don't bother scheduling these.
590 if (NumRegionInstrs != 0)
591 Regions.push_back(Elt: SchedRegion(I, RegionEnd, NumRegionInstrs));
592 }
593
594 if (RegionsTopDown)
595 std::reverse(first: Regions.begin(), last: Regions.end());
596}
597
598/// Main driver for both MachineScheduler and PostMachineScheduler.
599void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
600 bool FixKillFlags) {
601 // Visit all machine basic blocks.
602 //
603 // TODO: Visit blocks in global postorder or postorder within the bottom-up
604 // loop tree. Then we can optionally compute global RegPressure.
605 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
606 MBB != MBBEnd; ++MBB) {
607
608 Scheduler.startBlock(BB: &*MBB);
609
610#ifndef NDEBUG
611 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
612 continue;
613 if (SchedOnlyBlock.getNumOccurrences()
614 && (int)SchedOnlyBlock != MBB->getNumber())
615 continue;
616#endif
617
618 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
619 // points to the scheduling boundary at the bottom of the region. The DAG
620 // does not include RegionEnd, but the region does (i.e. the next
621 // RegionEnd is above the previous RegionBegin). If the current block has
622 // no terminator then RegionEnd == MBB->end() for the bottom region.
623 //
624 // All the regions of MBB are first found and stored in MBBRegions, which
625 // will be processed (MBB) top-down if initialized with true.
626 //
627 // The Scheduler may insert instructions during either schedule() or
628 // exitRegion(), even for empty regions. So the local iterators 'I' and
629 // 'RegionEnd' are invalid across these calls. Instructions must not be
630 // added to other regions than the current one without updating MBBRegions.
631
632 MBBRegionsVector MBBRegions;
633 getSchedRegions(MBB: &*MBB, Regions&: MBBRegions, RegionsTopDown: Scheduler.doMBBSchedRegionsTopDown());
634 for (const SchedRegion &R : MBBRegions) {
635 MachineBasicBlock::iterator I = R.RegionBegin;
636 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
637 unsigned NumRegionInstrs = R.NumRegionInstrs;
638
639 // Notify the scheduler of the region, even if we may skip scheduling
640 // it. Perhaps it still needs to be bundled.
641 Scheduler.enterRegion(bb: &*MBB, begin: I, end: RegionEnd, regioninstrs: NumRegionInstrs);
642
643 // Skip empty scheduling regions (0 or 1 schedulable instructions).
644 if (I == RegionEnd || I == std::prev(x: RegionEnd)) {
645 // Close the current region. Bundle the terminator if needed.
646 // This invalidates 'RegionEnd' and 'I'.
647 Scheduler.exitRegion();
648 continue;
649 }
650 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
651 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652 << " " << MBB->getName() << "\n From: " << *I
653 << " To: ";
654 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
655 else dbgs() << "End\n";
656 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
657 if (DumpCriticalPathLength) {
658 errs() << MF->getName();
659 errs() << ":%bb. " << MBB->getNumber();
660 errs() << " " << MBB->getName() << " \n";
661 }
662
663 // Schedule a region: possibly reorder instructions.
664 // This invalidates the original region iterators.
665 Scheduler.schedule();
666
667 // Close the current region.
668 Scheduler.exitRegion();
669 }
670 Scheduler.finishBlock();
671 // FIXME: Ideally, no further passes should rely on kill flags. However,
672 // thumb2 size reduction is currently an exception, so the PostMIScheduler
673 // needs to do this.
674 if (FixKillFlags)
675 Scheduler.fixupKills(MBB&: *MBB);
676 }
677 Scheduler.finalizeSchedule();
678}
679
680void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
681 // unimplemented
682}
683
684#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
685LLVM_DUMP_METHOD void ReadyQueue::dump() const {
686 dbgs() << "Queue " << Name << ": ";
687 for (const SUnit *SU : Queue)
688 dbgs() << SU->NodeNum << " ";
689 dbgs() << "\n";
690}
691#endif
692
693//===----------------------------------------------------------------------===//
694// ScheduleDAGMI - Basic machine instruction scheduling. This is
695// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
696// virtual registers.
697// ===----------------------------------------------------------------------===/
698
699// Provide a vtable anchor.
700ScheduleDAGMI::~ScheduleDAGMI() = default;
701
702/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
703/// NumPredsLeft reaches zero, release the successor node.
704///
705/// FIXME: Adjust SuccSU height based on MinLatency.
706void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
707 SUnit *SuccSU = SuccEdge->getSUnit();
708
709 if (SuccEdge->isWeak()) {
710 --SuccSU->WeakPredsLeft;
711 if (SuccEdge->isCluster())
712 NextClusterSucc = SuccSU;
713 return;
714 }
715#ifndef NDEBUG
716 if (SuccSU->NumPredsLeft == 0) {
717 dbgs() << "*** Scheduling failed! ***\n";
718 dumpNode(SU: *SuccSU);
719 dbgs() << " has been released too many times!\n";
720 llvm_unreachable(nullptr);
721 }
722#endif
723 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
724 // CurrCycle may have advanced since then.
725 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
727
728 --SuccSU->NumPredsLeft;
729 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
730 SchedImpl->releaseTopNode(SU: SuccSU);
731}
732
733/// releaseSuccessors - Call releaseSucc on each of SU's successors.
734void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
735 for (SDep &Succ : SU->Succs)
736 releaseSucc(SU, SuccEdge: &Succ);
737}
738
739/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
740/// NumSuccsLeft reaches zero, release the predecessor node.
741///
742/// FIXME: Adjust PredSU height based on MinLatency.
743void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
744 SUnit *PredSU = PredEdge->getSUnit();
745
746 if (PredEdge->isWeak()) {
747 --PredSU->WeakSuccsLeft;
748 if (PredEdge->isCluster())
749 NextClusterPred = PredSU;
750 return;
751 }
752#ifndef NDEBUG
753 if (PredSU->NumSuccsLeft == 0) {
754 dbgs() << "*** Scheduling failed! ***\n";
755 dumpNode(SU: *PredSU);
756 dbgs() << " has been released too many times!\n";
757 llvm_unreachable(nullptr);
758 }
759#endif
760 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
761 // CurrCycle may have advanced since then.
762 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
764
765 --PredSU->NumSuccsLeft;
766 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
767 SchedImpl->releaseBottomNode(SU: PredSU);
768}
769
770/// releasePredecessors - Call releasePred on each of SU's predecessors.
771void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
772 for (SDep &Pred : SU->Preds)
773 releasePred(SU, PredEdge: &Pred);
774}
775
776void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
777 ScheduleDAGInstrs::startBlock(BB: bb);
778 SchedImpl->enterMBB(MBB: bb);
779}
780
781void ScheduleDAGMI::finishBlock() {
782 SchedImpl->leaveMBB();
783 ScheduleDAGInstrs::finishBlock();
784}
785
786/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
787/// after crossing a scheduling boundary. [begin, end) includes all instructions
788/// in the region, including the boundary itself and single-instruction regions
789/// that don't get scheduled.
790void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
791 MachineBasicBlock::iterator begin,
792 MachineBasicBlock::iterator end,
793 unsigned regioninstrs)
794{
795 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
796
797 SchedImpl->initPolicy(Begin: begin, End: end, NumRegionInstrs: regioninstrs);
798}
799
800/// This is normally called from the main scheduler loop but may also be invoked
801/// by the scheduling strategy to perform additional code motion.
802void ScheduleDAGMI::moveInstruction(
803 MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
804 // Advance RegionBegin if the first instruction moves down.
805 if (&*RegionBegin == MI)
806 ++RegionBegin;
807
808 // Update the instruction stream.
809 BB->splice(Where: InsertPos, Other: BB, From: MI);
810
811 // Update LiveIntervals
812 if (LIS)
813 LIS->handleMove(MI&: *MI, /*UpdateFlags=*/true);
814
815 // Recede RegionBegin if an instruction moves above the first.
816 if (RegionBegin == InsertPos)
817 RegionBegin = MI;
818}
819
820bool ScheduleDAGMI::checkSchedLimit() {
821#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
822 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
823 CurrentTop = CurrentBottom;
824 return false;
825 }
826 ++NumInstrsScheduled;
827#endif
828 return true;
829}
830
831/// Per-region scheduling driver, called back from
832/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
833/// that does not consider liveness or register pressure. It is useful for
834/// PostRA scheduling and potentially other custom schedulers.
835void ScheduleDAGMI::schedule() {
836 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
837 LLVM_DEBUG(SchedImpl->dumpPolicy());
838
839 // Build the DAG.
840 buildSchedGraph(AA);
841
842 postProcessDAG();
843
844 SmallVector<SUnit*, 8> TopRoots, BotRoots;
845 findRootsAndBiasEdges(TopRoots, BotRoots);
846
847 LLVM_DEBUG(dump());
848 if (PrintDAGs) dump();
849 if (ViewMISchedDAGs) viewGraph();
850
851 // Initialize the strategy before modifying the DAG.
852 // This may initialize a DFSResult to be used for queue priority.
853 SchedImpl->initialize(DAG: this);
854
855 // Initialize ready queues now that the DAG and priority data are finalized.
856 initQueues(TopRoots, BotRoots);
857
858 bool IsTopNode = false;
859 while (true) {
860 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
861 SUnit *SU = SchedImpl->pickNode(IsTopNode);
862 if (!SU) break;
863
864 assert(!SU->isScheduled && "Node already scheduled");
865 if (!checkSchedLimit())
866 break;
867
868 MachineInstr *MI = SU->getInstr();
869 if (IsTopNode) {
870 assert(SU->isTopReady() && "node still has unscheduled dependencies");
871 if (&*CurrentTop == MI)
872 CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
873 else
874 moveInstruction(MI, InsertPos: CurrentTop);
875 } else {
876 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
877 MachineBasicBlock::iterator priorII =
878 priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
879 if (&*priorII == MI)
880 CurrentBottom = priorII;
881 else {
882 if (&*CurrentTop == MI)
883 CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
884 moveInstruction(MI, InsertPos: CurrentBottom);
885 CurrentBottom = MI;
886 }
887 }
888 // Notify the scheduling strategy before updating the DAG.
889 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
890 // runs, it can then use the accurate ReadyCycle time to determine whether
891 // newly released nodes can move to the readyQ.
892 SchedImpl->schedNode(SU, IsTopNode);
893
894 updateQueues(SU, IsTopNode);
895 }
896 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
897
898 placeDebugValues();
899
900 LLVM_DEBUG({
901 dbgs() << "*** Final schedule for "
902 << printMBBReference(*begin()->getParent()) << " ***\n";
903 dumpSchedule();
904 dbgs() << '\n';
905 });
906}
907
908/// Apply each ScheduleDAGMutation step in order.
909void ScheduleDAGMI::postProcessDAG() {
910 for (auto &m : Mutations)
911 m->apply(DAG: this);
912}
913
914void ScheduleDAGMI::
915findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
916 SmallVectorImpl<SUnit*> &BotRoots) {
917 for (SUnit &SU : SUnits) {
918 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
919
920 // Order predecessors so DFSResult follows the critical path.
921 SU.biasCriticalPath();
922
923 // A SUnit is ready to top schedule if it has no predecessors.
924 if (!SU.NumPredsLeft)
925 TopRoots.push_back(Elt: &SU);
926 // A SUnit is ready to bottom schedule if it has no successors.
927 if (!SU.NumSuccsLeft)
928 BotRoots.push_back(Elt: &SU);
929 }
930 ExitSU.biasCriticalPath();
931}
932
933/// Identify DAG roots and setup scheduler queues.
934void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
935 ArrayRef<SUnit*> BotRoots) {
936 NextClusterSucc = nullptr;
937 NextClusterPred = nullptr;
938
939 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
940 //
941 // Nodes with unreleased weak edges can still be roots.
942 // Release top roots in forward order.
943 for (SUnit *SU : TopRoots)
944 SchedImpl->releaseTopNode(SU);
945
946 // Release bottom roots in reverse order so the higher priority nodes appear
947 // first. This is more natural and slightly more efficient.
948 for (SmallVectorImpl<SUnit*>::const_reverse_iterator
949 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
950 SchedImpl->releaseBottomNode(SU: *I);
951 }
952
953 releaseSuccessors(SU: &EntrySU);
954 releasePredecessors(SU: &ExitSU);
955
956 SchedImpl->registerRoots();
957
958 // Advance past initial DebugValues.
959 CurrentTop = nextIfDebug(I: RegionBegin, End: RegionEnd);
960 CurrentBottom = RegionEnd;
961}
962
963/// Update scheduler queues after scheduling an instruction.
964void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
965 // Release dependent instructions for scheduling.
966 if (IsTopNode)
967 releaseSuccessors(SU);
968 else
969 releasePredecessors(SU);
970
971 SU->isScheduled = true;
972}
973
974/// Reinsert any remaining debug_values, just like the PostRA scheduler.
975void ScheduleDAGMI::placeDebugValues() {
976 // If first instruction was a DBG_VALUE then put it back.
977 if (FirstDbgValue) {
978 BB->splice(Where: RegionBegin, Other: BB, From: FirstDbgValue);
979 RegionBegin = FirstDbgValue;
980 }
981
982 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
983 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
984 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(x: DI);
985 MachineInstr *DbgValue = P.first;
986 MachineBasicBlock::iterator OrigPrevMI = P.second;
987 if (&*RegionBegin == DbgValue)
988 ++RegionBegin;
989 BB->splice(Where: std::next(x: OrigPrevMI), Other: BB, From: DbgValue);
990 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
991 RegionEnd = DbgValue;
992 }
993}
994
995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
996static const char *scheduleTableLegend = " i: issue\n x: resource booked";
997
998LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
999 // Bail off when there is no schedule model to query.
1000 if (!SchedModel.hasInstrSchedModel())
1001 return;
1002
1003 // Nothing to show if there is no or just one instruction.
1004 if (BB->size() < 2)
1005 return;
1006
1007 dbgs() << " * Schedule table (TopDown):\n";
1008 dbgs() << scheduleTableLegend << "\n";
1009 const unsigned FirstCycle = getSUnit(MI: &*(std::begin(cont: *this)))->TopReadyCycle;
1010 unsigned LastCycle = getSUnit(MI: &*(std::prev(x: std::end(cont: *this))))->TopReadyCycle;
1011 for (MachineInstr &MI : *this) {
1012 SUnit *SU = getSUnit(MI: &MI);
1013 if (!SU)
1014 continue;
1015 const MCSchedClassDesc *SC = getSchedClass(SU);
1016 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1017 PE = SchedModel.getWriteProcResEnd(SC);
1018 PI != PE; ++PI) {
1019 if (SU->TopReadyCycle + PI->ReleaseAtCycle - 1 > LastCycle)
1020 LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - 1;
1021 }
1022 }
1023 // Print the header with the cycles
1024 dbgs() << llvm::left_justify(Str: "Cycle", Width: HeaderColWidth);
1025 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1026 dbgs() << llvm::left_justify(Str: "| " + std::to_string(val: C), Width: ColWidth);
1027 dbgs() << "|\n";
1028
1029 for (MachineInstr &MI : *this) {
1030 SUnit *SU = getSUnit(MI: &MI);
1031 if (!SU) {
1032 dbgs() << "Missing SUnit\n";
1033 continue;
1034 }
1035 std::string NodeName("SU(");
1036 NodeName += std::to_string(val: SU->NodeNum) + ")";
1037 dbgs() << llvm::left_justify(Str: NodeName, Width: HeaderColWidth);
1038 unsigned C = FirstCycle;
1039 for (; C <= LastCycle; ++C) {
1040 if (C == SU->TopReadyCycle)
1041 dbgs() << llvm::left_justify(Str: "| i", Width: ColWidth);
1042 else
1043 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1044 }
1045 dbgs() << "|\n";
1046 const MCSchedClassDesc *SC = getSchedClass(SU);
1047
1048 SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
1049 make_range(x: SchedModel.getWriteProcResBegin(SC),
1050 y: SchedModel.getWriteProcResEnd(SC)));
1051
1052 if (MISchedSortResourcesInTrace)
1053 llvm::stable_sort(Range&: ResourcesIt,
1054 C: [](const MCWriteProcResEntry &LHS,
1055 const MCWriteProcResEntry &RHS) -> bool {
1056 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1057 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1058 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1059 });
1060 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1061 C = FirstCycle;
1062 const std::string ResName =
1063 SchedModel.getResourceName(PIdx: PI.ProcResourceIdx);
1064 dbgs() << llvm::right_justify(Str: ResName + " ", Width: HeaderColWidth);
1065 for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1066 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1067 }
1068 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1069 ++I, ++C)
1070 dbgs() << llvm::left_justify(Str: "| x", Width: ColWidth);
1071 while (C++ <= LastCycle)
1072 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1073 // Place end char
1074 dbgs() << "| \n";
1075 }
1076 }
1077}
1078
1079LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1080 // Bail off when there is no schedule model to query.
1081 if (!SchedModel.hasInstrSchedModel())
1082 return;
1083
1084 // Nothing to show if there is no or just one instruction.
1085 if (BB->size() < 2)
1086 return;
1087
1088 dbgs() << " * Schedule table (BottomUp):\n";
1089 dbgs() << scheduleTableLegend << "\n";
1090
1091 const int FirstCycle = getSUnit(MI: &*(std::begin(cont: *this)))->BotReadyCycle;
1092 int LastCycle = getSUnit(MI: &*(std::prev(x: std::end(cont: *this))))->BotReadyCycle;
1093 for (MachineInstr &MI : *this) {
1094 SUnit *SU = getSUnit(MI: &MI);
1095 if (!SU)
1096 continue;
1097 const MCSchedClassDesc *SC = getSchedClass(SU);
1098 for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1099 PE = SchedModel.getWriteProcResEnd(SC);
1100 PI != PE; ++PI) {
1101 if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1102 LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + 1;
1103 }
1104 }
1105 // Print the header with the cycles
1106 dbgs() << llvm::left_justify(Str: "Cycle", Width: HeaderColWidth);
1107 for (int C = FirstCycle; C >= LastCycle; --C)
1108 dbgs() << llvm::left_justify(Str: "| " + std::to_string(val: C), Width: ColWidth);
1109 dbgs() << "|\n";
1110
1111 for (MachineInstr &MI : *this) {
1112 SUnit *SU = getSUnit(MI: &MI);
1113 if (!SU) {
1114 dbgs() << "Missing SUnit\n";
1115 continue;
1116 }
1117 std::string NodeName("SU(");
1118 NodeName += std::to_string(val: SU->NodeNum) + ")";
1119 dbgs() << llvm::left_justify(Str: NodeName, Width: HeaderColWidth);
1120 int C = FirstCycle;
1121 for (; C >= LastCycle; --C) {
1122 if (C == (int)SU->BotReadyCycle)
1123 dbgs() << llvm::left_justify(Str: "| i", Width: ColWidth);
1124 else
1125 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1126 }
1127 dbgs() << "|\n";
1128 const MCSchedClassDesc *SC = getSchedClass(SU);
1129 SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
1130 make_range(x: SchedModel.getWriteProcResBegin(SC),
1131 y: SchedModel.getWriteProcResEnd(SC)));
1132
1133 if (MISchedSortResourcesInTrace)
1134 llvm::stable_sort(Range&: ResourcesIt,
1135 C: [](const MCWriteProcResEntry &LHS,
1136 const MCWriteProcResEntry &RHS) -> bool {
1137 return LHS.AcquireAtCycle < RHS.AcquireAtCycle ||
1138 (LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1139 LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1140 });
1141 for (const MCWriteProcResEntry &PI : ResourcesIt) {
1142 C = FirstCycle;
1143 const std::string ResName =
1144 SchedModel.getResourceName(PIdx: PI.ProcResourceIdx);
1145 dbgs() << llvm::right_justify(Str: ResName + " ", Width: HeaderColWidth);
1146 for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1147 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1148 }
1149 for (unsigned I = 0, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150 ++I, --C)
1151 dbgs() << llvm::left_justify(Str: "| x", Width: ColWidth);
1152 while (C-- >= LastCycle)
1153 dbgs() << llvm::left_justify(Str: "|", Width: ColWidth);
1154 // Place end char
1155 dbgs() << "| \n";
1156 }
1157 }
1158}
1159#endif
1160
1161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1162LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1163 if (MISchedDumpScheduleTrace) {
1164 if (DumpDir == DumpDirection::TopDown)
1165 dumpScheduleTraceTopDown();
1166 else if (DumpDir == DumpDirection::BottomUp)
1167 dumpScheduleTraceBottomUp();
1168 else if (DumpDir == DumpDirection::Bidirectional) {
1169 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1170 } else {
1171 dbgs() << "* Schedule table: DumpDirection not set.\n";
1172 }
1173 }
1174
1175 for (MachineInstr &MI : *this) {
1176 if (SUnit *SU = getSUnit(MI: &MI))
1177 dumpNode(SU: *SU);
1178 else
1179 dbgs() << "Missing SUnit\n";
1180 }
1181}
1182#endif
1183
1184//===----------------------------------------------------------------------===//
1185// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1186// preservation.
1187//===----------------------------------------------------------------------===//
1188
1189ScheduleDAGMILive::~ScheduleDAGMILive() {
1190 delete DFSResult;
1191}
1192
1193void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1194 const MachineInstr &MI = *SU.getInstr();
1195 for (const MachineOperand &MO : MI.operands()) {
1196 if (!MO.isReg())
1197 continue;
1198 if (!MO.readsReg())
1199 continue;
1200 if (TrackLaneMasks && !MO.isUse())
1201 continue;
1202
1203 Register Reg = MO.getReg();
1204 if (!Reg.isVirtual())
1205 continue;
1206
1207 // Ignore re-defs.
1208 if (TrackLaneMasks) {
1209 bool FoundDef = false;
1210 for (const MachineOperand &MO2 : MI.all_defs()) {
1211 if (MO2.getReg() == Reg && !MO2.isDead()) {
1212 FoundDef = true;
1213 break;
1214 }
1215 }
1216 if (FoundDef)
1217 continue;
1218 }
1219
1220 // Record this local VReg use.
1221 VReg2SUnitMultiMap::iterator UI = VRegUses.find(Key: Reg);
1222 for (; UI != VRegUses.end(); ++UI) {
1223 if (UI->SU == &SU)
1224 break;
1225 }
1226 if (UI == VRegUses.end())
1227 VRegUses.insert(Val: VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
1228 }
1229}
1230
1231/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1232/// crossing a scheduling boundary. [begin, end) includes all instructions in
1233/// the region, including the boundary itself and single-instruction regions
1234/// that don't get scheduled.
1235void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1236 MachineBasicBlock::iterator begin,
1237 MachineBasicBlock::iterator end,
1238 unsigned regioninstrs)
1239{
1240 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1241 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1242
1243 // For convenience remember the end of the liveness region.
1244 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(x: RegionEnd);
1245
1246 SUPressureDiffs.clear();
1247
1248 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1249 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1250
1251 assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
1252 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1253}
1254
1255// Setup the register pressure trackers for the top scheduled and bottom
1256// scheduled regions.
1257void ScheduleDAGMILive::initRegPressure() {
1258 VRegUses.clear();
1259 VRegUses.setUniverse(MRI.getNumVirtRegs());
1260 for (SUnit &SU : SUnits)
1261 collectVRegUses(SU);
1262
1263 TopRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin,
1264 TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1265 BotRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1266 TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1267
1268 // Close the RPTracker to finalize live ins.
1269 RPTracker.closeRegion();
1270
1271 LLVM_DEBUG(RPTracker.dump());
1272
1273 // Initialize the live ins and live outs.
1274 TopRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveInRegs);
1275 BotRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveOutRegs);
1276
1277 // Close one end of the tracker so we can call
1278 // getMaxUpward/DownwardPressureDelta before advancing across any
1279 // instructions. This converts currently live regs into live ins/outs.
1280 TopRPTracker.closeTop();
1281 BotRPTracker.closeBottom();
1282
1283 BotRPTracker.initLiveThru(RPTracker);
1284 if (!BotRPTracker.getLiveThru().empty()) {
1285 TopRPTracker.initLiveThru(PressureSet: BotRPTracker.getLiveThru());
1286 LLVM_DEBUG(dbgs() << "Live Thru: ";
1287 dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1288 };
1289
1290 // For each live out vreg reduce the pressure change associated with other
1291 // uses of the same vreg below the live-out reaching def.
1292 updatePressureDiffs(LiveUses: RPTracker.getPressure().LiveOutRegs);
1293
1294 // Account for liveness generated by the region boundary.
1295 if (LiveRegionEnd != RegionEnd) {
1296 SmallVector<RegisterMaskPair, 8> LiveUses;
1297 BotRPTracker.recede(LiveUses: &LiveUses);
1298 updatePressureDiffs(LiveUses);
1299 }
1300
1301 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1302 dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1303 dbgs() << "Bottom Pressure:\n";
1304 dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1305
1306 assert((BotRPTracker.getPos() == RegionEnd ||
1307 (RegionEnd->isDebugInstr() &&
1308 BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1309 "Can't find the region bottom");
1310
1311 // Cache the list of excess pressure sets in this region. This will also track
1312 // the max pressure in the scheduled code for these sets.
1313 RegionCriticalPSets.clear();
1314 const std::vector<unsigned> &RegionPressure =
1315 RPTracker.getPressure().MaxSetPressure;
1316 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1317 unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: i);
1318 if (RegionPressure[i] > Limit) {
1319 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1320 << " Actual " << RegionPressure[i] << "\n");
1321 RegionCriticalPSets.push_back(x: PressureChange(i));
1322 }
1323 }
1324 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1325 for (const PressureChange &RCPS
1326 : RegionCriticalPSets) dbgs()
1327 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1328 dbgs() << "\n");
1329}
1330
1331void ScheduleDAGMILive::
1332updateScheduledPressure(const SUnit *SU,
1333 const std::vector<unsigned> &NewMaxPressure) {
1334 const PressureDiff &PDiff = getPressureDiff(SU);
1335 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1336 for (const PressureChange &PC : PDiff) {
1337 if (!PC.isValid())
1338 break;
1339 unsigned ID = PC.getPSet();
1340 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1341 ++CritIdx;
1342 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1343 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1344 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1345 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1346 }
1347 unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: ID);
1348 if (NewMaxPressure[ID] >= Limit - 2) {
1349 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1350 << NewMaxPressure[ID]
1351 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1352 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1353 << " livethru)\n");
1354 }
1355 }
1356}
1357
1358/// Update the PressureDiff array for liveness after scheduling this
1359/// instruction.
1360void ScheduleDAGMILive::updatePressureDiffs(
1361 ArrayRef<RegisterMaskPair> LiveUses) {
1362 for (const RegisterMaskPair &P : LiveUses) {
1363 Register Reg = P.RegUnit;
1364 /// FIXME: Currently assuming single-use physregs.
1365 if (!Reg.isVirtual())
1366 continue;
1367
1368 if (ShouldTrackLaneMasks) {
1369 // If the register has just become live then other uses won't change
1370 // this fact anymore => decrement pressure.
1371 // If the register has just become dead then other uses make it come
1372 // back to life => increment pressure.
1373 bool Decrement = P.LaneMask.any();
1374
1375 for (const VReg2SUnit &V2SU
1376 : make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1377 SUnit &SU = *V2SU.SU;
1378 if (SU.isScheduled || &SU == &ExitSU)
1379 continue;
1380
1381 PressureDiff &PDiff = getPressureDiff(SU: &SU);
1382 PDiff.addPressureChange(RegUnit: Reg, IsDec: Decrement, MRI: &MRI);
1383 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1384 << printReg(Reg, TRI) << ':'
1385 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1386 dbgs() << " to "; PDiff.dump(*TRI););
1387 }
1388 } else {
1389 assert(P.LaneMask.any());
1390 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1391 // This may be called before CurrentBottom has been initialized. However,
1392 // BotRPTracker must have a valid position. We want the value live into the
1393 // instruction or live out of the block, so ask for the previous
1394 // instruction's live-out.
1395 const LiveInterval &LI = LIS->getInterval(Reg);
1396 VNInfo *VNI;
1397 MachineBasicBlock::const_iterator I =
1398 nextIfDebug(I: BotRPTracker.getPos(), End: BB->end());
1399 if (I == BB->end())
1400 VNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1401 else {
1402 LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *I));
1403 VNI = LRQ.valueIn();
1404 }
1405 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1406 assert(VNI && "No live value at use.");
1407 for (const VReg2SUnit &V2SU
1408 : make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1409 SUnit *SU = V2SU.SU;
1410 // If this use comes before the reaching def, it cannot be a last use,
1411 // so decrease its pressure change.
1412 if (!SU->isScheduled && SU != &ExitSU) {
1413 LiveQueryResult LRQ =
1414 LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1415 if (LRQ.valueIn() == VNI) {
1416 PressureDiff &PDiff = getPressureDiff(SU);
1417 PDiff.addPressureChange(RegUnit: Reg, IsDec: true, MRI: &MRI);
1418 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419 << *SU->getInstr();
1420 dbgs() << " to "; PDiff.dump(*TRI););
1421 }
1422 }
1423 }
1424 }
1425 }
1426}
1427
1428void ScheduleDAGMILive::dump() const {
1429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 if (EntrySU.getInstr() != nullptr)
1431 dumpNodeAll(SU: EntrySU);
1432 for (const SUnit &SU : SUnits) {
1433 dumpNodeAll(SU);
1434 if (ShouldTrackPressure) {
1435 dbgs() << " Pressure Diff : ";
1436 getPressureDiff(SU: &SU).dump(TRI: *TRI);
1437 }
1438 dbgs() << " Single Issue : ";
1439 if (SchedModel.mustBeginGroup(MI: SU.getInstr()) &&
1440 SchedModel.mustEndGroup(MI: SU.getInstr()))
1441 dbgs() << "true;";
1442 else
1443 dbgs() << "false;";
1444 dbgs() << '\n';
1445 }
1446 if (ExitSU.getInstr() != nullptr)
1447 dumpNodeAll(SU: ExitSU);
1448#endif
1449}
1450
1451/// schedule - Called back from MachineScheduler::runOnMachineFunction
1452/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1453/// only includes instructions that have DAG nodes, not scheduling boundaries.
1454///
1455/// This is a skeletal driver, with all the functionality pushed into helpers,
1456/// so that it can be easily extended by experimental schedulers. Generally,
1457/// implementing MachineSchedStrategy should be sufficient to implement a new
1458/// scheduling algorithm. However, if a scheduler further subclasses
1459/// ScheduleDAGMILive then it will want to override this virtual method in order
1460/// to update any specialized state.
1461void ScheduleDAGMILive::schedule() {
1462 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1463 LLVM_DEBUG(SchedImpl->dumpPolicy());
1464 buildDAGWithRegPressure();
1465
1466 postProcessDAG();
1467
1468 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1469 findRootsAndBiasEdges(TopRoots, BotRoots);
1470
1471 // Initialize the strategy before modifying the DAG.
1472 // This may initialize a DFSResult to be used for queue priority.
1473 SchedImpl->initialize(DAG: this);
1474
1475 LLVM_DEBUG(dump());
1476 if (PrintDAGs) dump();
1477 if (ViewMISchedDAGs) viewGraph();
1478
1479 // Initialize ready queues now that the DAG and priority data are finalized.
1480 initQueues(TopRoots, BotRoots);
1481
1482 bool IsTopNode = false;
1483 while (true) {
1484 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1485 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1486 if (!SU) break;
1487
1488 assert(!SU->isScheduled && "Node already scheduled");
1489 if (!checkSchedLimit())
1490 break;
1491
1492 scheduleMI(SU, IsTopNode);
1493
1494 if (DFSResult) {
1495 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1496 if (!ScheduledTrees.test(Idx: SubtreeID)) {
1497 ScheduledTrees.set(SubtreeID);
1498 DFSResult->scheduleTree(SubtreeID);
1499 SchedImpl->scheduleTree(SubtreeID);
1500 }
1501 }
1502
1503 // Notify the scheduling strategy after updating the DAG.
1504 SchedImpl->schedNode(SU, IsTopNode);
1505
1506 updateQueues(SU, IsTopNode);
1507 }
1508 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1509
1510 placeDebugValues();
1511
1512 LLVM_DEBUG({
1513 dbgs() << "*** Final schedule for "
1514 << printMBBReference(*begin()->getParent()) << " ***\n";
1515 dumpSchedule();
1516 dbgs() << '\n';
1517 });
1518}
1519
1520/// Build the DAG and setup three register pressure trackers.
1521void ScheduleDAGMILive::buildDAGWithRegPressure() {
1522 if (!ShouldTrackPressure) {
1523 RPTracker.reset();
1524 RegionCriticalPSets.clear();
1525 buildSchedGraph(AA);
1526 return;
1527 }
1528
1529 // Initialize the register pressure tracker used by buildSchedGraph.
1530 RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1531 TrackLaneMasks: ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1532
1533 // Account for liveness generate by the region boundary.
1534 if (LiveRegionEnd != RegionEnd)
1535 RPTracker.recede();
1536
1537 // Build the DAG, and compute current register pressure.
1538 buildSchedGraph(AA, RPTracker: &RPTracker, PDiffs: &SUPressureDiffs, LIS, TrackLaneMasks: ShouldTrackLaneMasks);
1539
1540 // Initialize top/bottom trackers after computing region pressure.
1541 initRegPressure();
1542}
1543
1544void ScheduleDAGMILive::computeDFSResult() {
1545 if (!DFSResult)
1546 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1547 DFSResult->clear();
1548 ScheduledTrees.clear();
1549 DFSResult->resize(NumSUnits: SUnits.size());
1550 DFSResult->compute(SUnits);
1551 ScheduledTrees.resize(N: DFSResult->getNumSubtrees());
1552}
1553
1554/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1555/// only provides the critical path for single block loops. To handle loops that
1556/// span blocks, we could use the vreg path latencies provided by
1557/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1558/// available for use in the scheduler.
1559///
1560/// The cyclic path estimation identifies a def-use pair that crosses the back
1561/// edge and considers the depth and height of the nodes. For example, consider
1562/// the following instruction sequence where each instruction has unit latency
1563/// and defines an eponymous virtual register:
1564///
1565/// a->b(a,c)->c(b)->d(c)->exit
1566///
1567/// The cyclic critical path is a two cycles: b->c->b
1568/// The acyclic critical path is four cycles: a->b->c->d->exit
1569/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572/// LiveInDepth = depth(b) = len(a->b) = 1
1573///
1574/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1576/// CyclicCriticalPath = min(2, 2) = 2
1577///
1578/// This could be relevant to PostRA scheduling, but is currently implemented
1579/// assuming LiveIntervals.
1580unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1581 // This only applies to single block loop.
1582 if (!BB->isSuccessor(MBB: BB))
1583 return 0;
1584
1585 unsigned MaxCyclicLatency = 0;
1586 // Visit each live out vreg def to find def/use pairs that cross iterations.
1587 for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1588 Register Reg = P.RegUnit;
1589 if (!Reg.isVirtual())
1590 continue;
1591 const LiveInterval &LI = LIS->getInterval(Reg);
1592 const VNInfo *DefVNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1593 if (!DefVNI)
1594 continue;
1595
1596 MachineInstr *DefMI = LIS->getInstructionFromIndex(index: DefVNI->def);
1597 const SUnit *DefSU = getSUnit(MI: DefMI);
1598 if (!DefSU)
1599 continue;
1600
1601 unsigned LiveOutHeight = DefSU->getHeight();
1602 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1603 // Visit all local users of the vreg def.
1604 for (const VReg2SUnit &V2SU
1605 : make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1606 SUnit *SU = V2SU.SU;
1607 if (SU == &ExitSU)
1608 continue;
1609
1610 // Only consider uses of the phi.
1611 LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1612 if (!LRQ.valueIn()->isPHIDef())
1613 continue;
1614
1615 // Assume that a path spanning two iterations is a cycle, which could
1616 // overestimate in strange cases. This allows cyclic latency to be
1617 // estimated as the minimum slack of the vreg's depth or height.
1618 unsigned CyclicLatency = 0;
1619 if (LiveOutDepth > SU->getDepth())
1620 CyclicLatency = LiveOutDepth - SU->getDepth();
1621
1622 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1623 if (LiveInHeight > LiveOutHeight) {
1624 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625 CyclicLatency = LiveInHeight - LiveOutHeight;
1626 } else
1627 CyclicLatency = 0;
1628
1629 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1631 if (CyclicLatency > MaxCyclicLatency)
1632 MaxCyclicLatency = CyclicLatency;
1633 }
1634 }
1635 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1636 return MaxCyclicLatency;
1637}
1638
1639/// Release ExitSU predecessors and setup scheduler queues. Re-position
1640/// the Top RP tracker in case the region beginning has changed.
1641void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1642 ArrayRef<SUnit*> BotRoots) {
1643 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1644 if (ShouldTrackPressure) {
1645 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1646 TopRPTracker.setPos(CurrentTop);
1647 }
1648}
1649
1650/// Move an instruction and update register pressure.
1651void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1652 // Move the instruction to its new location in the instruction stream.
1653 MachineInstr *MI = SU->getInstr();
1654
1655 if (IsTopNode) {
1656 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1657 if (&*CurrentTop == MI)
1658 CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
1659 else {
1660 moveInstruction(MI, InsertPos: CurrentTop);
1661 TopRPTracker.setPos(MI);
1662 }
1663
1664 if (ShouldTrackPressure) {
1665 // Update top scheduled pressure.
1666 RegisterOperands RegOpers;
1667 RegOpers.collect(MI: *MI, TRI: *TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks, IgnoreDead: false);
1668 if (ShouldTrackLaneMasks) {
1669 // Adjust liveness and add missing dead+read-undef flags.
1670 SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1671 RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1672 } else {
1673 // Adjust for missing dead-def flags.
1674 RegOpers.detectDeadDefs(MI: *MI, LIS: *LIS);
1675 }
1676
1677 TopRPTracker.advance(RegOpers);
1678 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1679 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1680 TopRPTracker.getRegSetPressureAtPos(), TRI););
1681
1682 updateScheduledPressure(SU, NewMaxPressure: TopRPTracker.getPressure().MaxSetPressure);
1683 }
1684 } else {
1685 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1686 MachineBasicBlock::iterator priorII =
1687 priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
1688 if (&*priorII == MI)
1689 CurrentBottom = priorII;
1690 else {
1691 if (&*CurrentTop == MI) {
1692 CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
1693 TopRPTracker.setPos(CurrentTop);
1694 }
1695 moveInstruction(MI, InsertPos: CurrentBottom);
1696 CurrentBottom = MI;
1697 BotRPTracker.setPos(CurrentBottom);
1698 }
1699 if (ShouldTrackPressure) {
1700 RegisterOperands RegOpers;
1701 RegOpers.collect(MI: *MI, TRI: *TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks, IgnoreDead: false);
1702 if (ShouldTrackLaneMasks) {
1703 // Adjust liveness and add missing dead+read-undef flags.
1704 SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1705 RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1706 } else {
1707 // Adjust for missing dead-def flags.
1708 RegOpers.detectDeadDefs(MI: *MI, LIS: *LIS);
1709 }
1710
1711 if (BotRPTracker.getPos() != CurrentBottom)
1712 BotRPTracker.recedeSkipDebugValues();
1713 SmallVector<RegisterMaskPair, 8> LiveUses;
1714 BotRPTracker.recede(RegOpers, LiveUses: &LiveUses);
1715 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1716 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1717 BotRPTracker.getRegSetPressureAtPos(), TRI););
1718
1719 updateScheduledPressure(SU, NewMaxPressure: BotRPTracker.getPressure().MaxSetPressure);
1720 updatePressureDiffs(LiveUses);
1721 }
1722 }
1723}
1724
1725//===----------------------------------------------------------------------===//
1726// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1727//===----------------------------------------------------------------------===//
1728
1729namespace {
1730
1731/// Post-process the DAG to create cluster edges between neighboring
1732/// loads or between neighboring stores.
1733class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1734 struct MemOpInfo {
1735 SUnit *SU;
1736 SmallVector<const MachineOperand *, 4> BaseOps;
1737 int64_t Offset;
1738 LocationSize Width;
1739 bool OffsetIsScalable;
1740
1741 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1742 int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1743 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1744 Width(Width), OffsetIsScalable(OffsetIsScalable) {}
1745
1746 static bool Compare(const MachineOperand *const &A,
1747 const MachineOperand *const &B) {
1748 if (A->getType() != B->getType())
1749 return A->getType() < B->getType();
1750 if (A->isReg())
1751 return A->getReg() < B->getReg();
1752 if (A->isFI()) {
1753 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1754 const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1755 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1756 TargetFrameLowering::StackGrowsDown;
1757 return StackGrowsDown ? A->getIndex() > B->getIndex()
1758 : A->getIndex() < B->getIndex();
1759 }
1760
1761 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1762 "index bases.");
1763 }
1764
1765 bool operator<(const MemOpInfo &RHS) const {
1766 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1767 // comparison instead when it's available.
1768 if (std::lexicographical_compare(first1: BaseOps.begin(), last1: BaseOps.end(),
1769 first2: RHS.BaseOps.begin(), last2: RHS.BaseOps.end(),
1770 comp: Compare))
1771 return true;
1772 if (std::lexicographical_compare(first1: RHS.BaseOps.begin(), last1: RHS.BaseOps.end(),
1773 first2: BaseOps.begin(), last2: BaseOps.end(), comp: Compare))
1774 return false;
1775 if (Offset != RHS.Offset)
1776 return Offset < RHS.Offset;
1777 return SU->NodeNum < RHS.SU->NodeNum;
1778 }
1779 };
1780
1781 const TargetInstrInfo *TII;
1782 const TargetRegisterInfo *TRI;
1783 bool IsLoad;
1784 bool ReorderWhileClustering;
1785
1786public:
1787 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1788 const TargetRegisterInfo *tri, bool IsLoad,
1789 bool ReorderWhileClustering)
1790 : TII(tii), TRI(tri), IsLoad(IsLoad),
1791 ReorderWhileClustering(ReorderWhileClustering) {}
1792
1793 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1794
1795protected:
1796 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1797 ScheduleDAGInstrs *DAG);
1798 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1799 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1800 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1801 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
1802};
1803
1804class StoreClusterMutation : public BaseMemOpClusterMutation {
1805public:
1806 StoreClusterMutation(const TargetInstrInfo *tii,
1807 const TargetRegisterInfo *tri,
1808 bool ReorderWhileClustering)
1809 : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
1810};
1811
1812class LoadClusterMutation : public BaseMemOpClusterMutation {
1813public:
1814 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
1815 bool ReorderWhileClustering)
1816 : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
1817};
1818
1819} // end anonymous namespace
1820
1821namespace llvm {
1822
1823std::unique_ptr<ScheduleDAGMutation>
1824createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1825 const TargetRegisterInfo *TRI,
1826 bool ReorderWhileClustering) {
1827 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1828 args&: TII, args&: TRI, args&: ReorderWhileClustering)
1829 : nullptr;
1830}
1831
1832std::unique_ptr<ScheduleDAGMutation>
1833createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1834 const TargetRegisterInfo *TRI,
1835 bool ReorderWhileClustering) {
1836 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1837 args&: TII, args&: TRI, args&: ReorderWhileClustering)
1838 : nullptr;
1839}
1840
1841} // end namespace llvm
1842
1843// Sorting all the loads/stores first, then for each load/store, checking the
1844// following load/store one by one, until reach the first non-dependent one and
1845// call target hook to see if they can cluster.
1846// If FastCluster is enabled, we assume that, all the loads/stores have been
1847// preprocessed and now, they didn't have dependencies on each other.
1848void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1849 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1850 ScheduleDAGInstrs *DAG) {
1851 // Keep track of the current cluster length and bytes for each SUnit.
1852 DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1853
1854 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1855 // cluster mem ops collected within `MemOpRecords` array.
1856 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1857 // Decision to cluster mem ops is taken based on target dependent logic
1858 auto MemOpa = MemOpRecords[Idx];
1859
1860 // Seek for the next load/store to do the cluster.
1861 unsigned NextIdx = Idx + 1;
1862 for (; NextIdx < End; ++NextIdx)
1863 // Skip if MemOpb has been clustered already or has dependency with
1864 // MemOpa.
1865 if (!SUnit2ClusterInfo.count(Val: MemOpRecords[NextIdx].SU->NodeNum) &&
1866 (FastCluster ||
1867 (!DAG->IsReachable(SU: MemOpRecords[NextIdx].SU, TargetSU: MemOpa.SU) &&
1868 !DAG->IsReachable(SU: MemOpa.SU, TargetSU: MemOpRecords[NextIdx].SU))))
1869 break;
1870 if (NextIdx == End)
1871 continue;
1872
1873 auto MemOpb = MemOpRecords[NextIdx];
1874 unsigned ClusterLength = 2;
1875 unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1876 MemOpb.Width.getValue().getKnownMinValue();
1877 if (SUnit2ClusterInfo.count(Val: MemOpa.SU->NodeNum)) {
1878 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1879 CurrentClusterBytes = SUnit2ClusterInfo[MemOpa.SU->NodeNum].second +
1880 MemOpb.Width.getValue().getKnownMinValue();
1881 }
1882
1883 if (!TII->shouldClusterMemOps(BaseOps1: MemOpa.BaseOps, Offset1: MemOpa.Offset,
1884 OffsetIsScalable1: MemOpa.OffsetIsScalable, BaseOps2: MemOpb.BaseOps,
1885 Offset2: MemOpb.Offset, OffsetIsScalable2: MemOpb.OffsetIsScalable,
1886 ClusterSize: ClusterLength, NumBytes: CurrentClusterBytes))
1887 continue;
1888
1889 SUnit *SUa = MemOpa.SU;
1890 SUnit *SUb = MemOpb.SU;
1891 if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1892 std::swap(a&: SUa, b&: SUb);
1893
1894 // FIXME: Is this check really required?
1895 if (!DAG->addEdge(SuccSU: SUb, PredDep: SDep(SUa, SDep::Cluster)))
1896 continue;
1897
1898 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1899 << SUb->NodeNum << ")\n");
1900 ++NumClustered;
1901
1902 if (IsLoad) {
1903 // Copy successor edges from SUa to SUb. Interleaving computation
1904 // dependent on SUa can prevent load combining due to register reuse.
1905 // Predecessor edges do not need to be copied from SUb to SUa since
1906 // nearby loads should have effectively the same inputs.
1907 for (const SDep &Succ : SUa->Succs) {
1908 if (Succ.getSUnit() == SUb)
1909 continue;
1910 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1911 << ")\n");
1912 DAG->addEdge(SuccSU: Succ.getSUnit(), PredDep: SDep(SUb, SDep::Artificial));
1913 }
1914 } else {
1915 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1916 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1917 // do not need to be copied from SUa to SUb since no one will depend
1918 // on stores.
1919 // Notice that, we don't need to care about the memory dependency as
1920 // we won't try to cluster them if they have any memory dependency.
1921 for (const SDep &Pred : SUb->Preds) {
1922 if (Pred.getSUnit() == SUa)
1923 continue;
1924 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1925 << ")\n");
1926 DAG->addEdge(SuccSU: SUa, PredDep: SDep(Pred.getSUnit(), SDep::Artificial));
1927 }
1928 }
1929
1930 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1931 CurrentClusterBytes};
1932
1933 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1934 << ", Curr cluster bytes: " << CurrentClusterBytes
1935 << "\n");
1936 }
1937}
1938
1939void BaseMemOpClusterMutation::collectMemOpRecords(
1940 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1941 for (auto &SU : SUnits) {
1942 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1943 (!IsLoad && !SU.getInstr()->mayStore()))
1944 continue;
1945
1946 const MachineInstr &MI = *SU.getInstr();
1947 SmallVector<const MachineOperand *, 4> BaseOps;
1948 int64_t Offset;
1949 bool OffsetIsScalable;
1950 LocationSize Width = 0;
1951 if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1952 OffsetIsScalable, Width, TRI)) {
1953 MemOpRecords.push_back(
1954 Elt: MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
1955
1956 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1957 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1958 << ", Width: " << Width << "\n");
1959 }
1960#ifndef NDEBUG
1961 for (const auto *Op : BaseOps)
1962 assert(Op);
1963#endif
1964 }
1965}
1966
1967bool BaseMemOpClusterMutation::groupMemOps(
1968 ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1969 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
1970 bool FastCluster =
1971 ForceFastCluster ||
1972 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1973
1974 for (const auto &MemOp : MemOps) {
1975 unsigned ChainPredID = DAG->SUnits.size();
1976 if (FastCluster) {
1977 for (const SDep &Pred : MemOp.SU->Preds) {
1978 // We only want to cluster the mem ops that have the same ctrl(non-data)
1979 // pred so that they didn't have ctrl dependency for each other. But for
1980 // store instrs, we can still cluster them if the pred is load instr.
1981 if ((Pred.isCtrl() &&
1982 (IsLoad ||
1983 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1984 !Pred.isArtificial()) {
1985 ChainPredID = Pred.getSUnit()->NodeNum;
1986 break;
1987 }
1988 }
1989 } else
1990 ChainPredID = 0;
1991
1992 Groups[ChainPredID].push_back(Elt: MemOp);
1993 }
1994 return FastCluster;
1995}
1996
1997/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1998void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1999 // Collect all the clusterable loads/stores
2000 SmallVector<MemOpInfo, 32> MemOpRecords;
2001 collectMemOpRecords(SUnits&: DAG->SUnits, MemOpRecords);
2002
2003 if (MemOpRecords.size() < 2)
2004 return;
2005
2006 // Put the loads/stores without dependency into the same group with some
2007 // heuristic if the DAG is too complex to avoid compiling time blow up.
2008 // Notice that, some fusion pair could be lost with this.
2009 DenseMap<unsigned, SmallVector<MemOpInfo, 32>> Groups;
2010 bool FastCluster = groupMemOps(MemOps: MemOpRecords, DAG, Groups);
2011
2012 for (auto &Group : Groups) {
2013 // Sorting the loads/stores, so that, we can stop the cluster as early as
2014 // possible.
2015 llvm::sort(C&: Group.second);
2016
2017 // Trying to cluster all the neighboring loads/stores.
2018 clusterNeighboringMemOps(MemOpRecords: Group.second, FastCluster, DAG);
2019 }
2020}
2021
2022//===----------------------------------------------------------------------===//
2023// CopyConstrain - DAG post-processing to encourage copy elimination.
2024//===----------------------------------------------------------------------===//
2025
2026namespace {
2027
2028/// Post-process the DAG to create weak edges from all uses of a copy to
2029/// the one use that defines the copy's source vreg, most likely an induction
2030/// variable increment.
2031class CopyConstrain : public ScheduleDAGMutation {
2032 // Transient state.
2033 SlotIndex RegionBeginIdx;
2034
2035 // RegionEndIdx is the slot index of the last non-debug instruction in the
2036 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2037 SlotIndex RegionEndIdx;
2038
2039public:
2040 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
2041
2042 void apply(ScheduleDAGInstrs *DAGInstrs) override;
2043
2044protected:
2045 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
2046};
2047
2048} // end anonymous namespace
2049
2050namespace llvm {
2051
2052std::unique_ptr<ScheduleDAGMutation>
2053createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
2054 const TargetRegisterInfo *TRI) {
2055 return std::make_unique<CopyConstrain>(args&: TII, args&: TRI);
2056}
2057
2058} // end namespace llvm
2059
2060/// constrainLocalCopy handles two possibilities:
2061/// 1) Local src:
2062/// I0: = dst
2063/// I1: src = ...
2064/// I2: = dst
2065/// I3: dst = src (copy)
2066/// (create pred->succ edges I0->I1, I2->I1)
2067///
2068/// 2) Local copy:
2069/// I0: dst = src (copy)
2070/// I1: = dst
2071/// I2: src = ...
2072/// I3: = dst
2073/// (create pred->succ edges I1->I2, I3->I2)
2074///
2075/// Although the MachineScheduler is currently constrained to single blocks,
2076/// this algorithm should handle extended blocks. An EBB is a set of
2077/// contiguously numbered blocks such that the previous block in the EBB is
2078/// always the single predecessor.
2079void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
2080 LiveIntervals *LIS = DAG->getLIS();
2081 MachineInstr *Copy = CopySU->getInstr();
2082
2083 // Check for pure vreg copies.
2084 const MachineOperand &SrcOp = Copy->getOperand(i: 1);
2085 Register SrcReg = SrcOp.getReg();
2086 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
2087 return;
2088
2089 const MachineOperand &DstOp = Copy->getOperand(i: 0);
2090 Register DstReg = DstOp.getReg();
2091 if (!DstReg.isVirtual() || DstOp.isDead())
2092 return;
2093
2094 // Check if either the dest or source is local. If it's live across a back
2095 // edge, it's not local. Note that if both vregs are live across the back
2096 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2097 // If both the copy's source and dest are local live intervals, then we
2098 // should treat the dest as the global for the purpose of adding
2099 // constraints. This adds edges from source's other uses to the copy.
2100 unsigned LocalReg = SrcReg;
2101 unsigned GlobalReg = DstReg;
2102 LiveInterval *LocalLI = &LIS->getInterval(Reg: LocalReg);
2103 if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx)) {
2104 LocalReg = DstReg;
2105 GlobalReg = SrcReg;
2106 LocalLI = &LIS->getInterval(Reg: LocalReg);
2107 if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx))
2108 return;
2109 }
2110 LiveInterval *GlobalLI = &LIS->getInterval(Reg: GlobalReg);
2111
2112 // Find the global segment after the start of the local LI.
2113 LiveInterval::iterator GlobalSegment = GlobalLI->find(Pos: LocalLI->beginIndex());
2114 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2115 // local live range. We could create edges from other global uses to the local
2116 // start, but the coalescer should have already eliminated these cases, so
2117 // don't bother dealing with it.
2118 if (GlobalSegment == GlobalLI->end())
2119 return;
2120
2121 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2122 // returned the next global segment. But if GlobalSegment overlaps with
2123 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2124 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2125 if (GlobalSegment->contains(I: LocalLI->beginIndex()))
2126 ++GlobalSegment;
2127
2128 if (GlobalSegment == GlobalLI->end())
2129 return;
2130
2131 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2132 if (GlobalSegment != GlobalLI->begin()) {
2133 // Two address defs have no hole.
2134 if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->end,
2135 B: GlobalSegment->start)) {
2136 return;
2137 }
2138 // If the prior global segment may be defined by the same two-address
2139 // instruction that also defines LocalLI, then can't make a hole here.
2140 if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->start,
2141 B: LocalLI->beginIndex())) {
2142 return;
2143 }
2144 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2145 // it would be a disconnected component in the live range.
2146 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2147 "Disconnected LRG within the scheduling region.");
2148 }
2149 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(index: GlobalSegment->start);
2150 if (!GlobalDef)
2151 return;
2152
2153 SUnit *GlobalSU = DAG->getSUnit(MI: GlobalDef);
2154 if (!GlobalSU)
2155 return;
2156
2157 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2158 // constraining the uses of the last local def to precede GlobalDef.
2159 SmallVector<SUnit*,8> LocalUses;
2160 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(Idx: LocalLI->endIndex());
2161 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(index: LastLocalVN->def);
2162 SUnit *LastLocalSU = DAG->getSUnit(MI: LastLocalDef);
2163 for (const SDep &Succ : LastLocalSU->Succs) {
2164 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2165 continue;
2166 if (Succ.getSUnit() == GlobalSU)
2167 continue;
2168 if (!DAG->canAddEdge(SuccSU: GlobalSU, PredSU: Succ.getSUnit()))
2169 return;
2170 LocalUses.push_back(Elt: Succ.getSUnit());
2171 }
2172 // Open the top of the GlobalLI hole by constraining any earlier global uses
2173 // to precede the start of LocalLI.
2174 SmallVector<SUnit*,8> GlobalUses;
2175 MachineInstr *FirstLocalDef =
2176 LIS->getInstructionFromIndex(index: LocalLI->beginIndex());
2177 SUnit *FirstLocalSU = DAG->getSUnit(MI: FirstLocalDef);
2178 for (const SDep &Pred : GlobalSU->Preds) {
2179 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2180 continue;
2181 if (Pred.getSUnit() == FirstLocalSU)
2182 continue;
2183 if (!DAG->canAddEdge(SuccSU: FirstLocalSU, PredSU: Pred.getSUnit()))
2184 return;
2185 GlobalUses.push_back(Elt: Pred.getSUnit());
2186 }
2187 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2188 // Add the weak edges.
2189 for (SUnit *LU : LocalUses) {
2190 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2191 << GlobalSU->NodeNum << ")\n");
2192 DAG->addEdge(SuccSU: GlobalSU, PredDep: SDep(LU, SDep::Weak));
2193 }
2194 for (SUnit *GU : GlobalUses) {
2195 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2196 << FirstLocalSU->NodeNum << ")\n");
2197 DAG->addEdge(SuccSU: FirstLocalSU, PredDep: SDep(GU, SDep::Weak));
2198 }
2199}
2200
2201/// Callback from DAG postProcessing to create weak edges to encourage
2202/// copy elimination.
2203void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2204 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2205 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2206
2207 MachineBasicBlock::iterator FirstPos = nextIfDebug(I: DAG->begin(), End: DAG->end());
2208 if (FirstPos == DAG->end())
2209 return;
2210 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(Instr: *FirstPos);
2211 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2212 Instr: *priorNonDebug(I: DAG->end(), Beg: DAG->begin()));
2213
2214 for (SUnit &SU : DAG->SUnits) {
2215 if (!SU.getInstr()->isCopy())
2216 continue;
2217
2218 constrainLocalCopy(CopySU: &SU, DAG: static_cast<ScheduleDAGMILive*>(DAG));
2219 }
2220}
2221
2222//===----------------------------------------------------------------------===//
2223// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2224// and possibly other custom schedulers.
2225//===----------------------------------------------------------------------===//
2226
2227static const unsigned InvalidCycle = ~0U;
2228
2229SchedBoundary::~SchedBoundary() { delete HazardRec; }
2230
2231/// Given a Count of resource usage and a Latency value, return true if a
2232/// SchedBoundary becomes resource limited.
2233/// If we are checking after scheduling a node, we should return true when
2234/// we just reach the resource limit.
2235static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2236 unsigned Latency, bool AfterSchedNode) {
2237 int ResCntFactor = (int)(Count - (Latency * LFactor));
2238 if (AfterSchedNode)
2239 return ResCntFactor >= (int)LFactor;
2240 else
2241 return ResCntFactor > (int)LFactor;
2242}
2243
2244void SchedBoundary::reset() {
2245 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2246 // Destroying and reconstructing it is very expensive though. So keep
2247 // invalid, placeholder HazardRecs.
2248 if (HazardRec && HazardRec->isEnabled()) {
2249 delete HazardRec;
2250 HazardRec = nullptr;
2251 }
2252 Available.clear();
2253 Pending.clear();
2254 CheckPending = false;
2255 CurrCycle = 0;
2256 CurrMOps = 0;
2257 MinReadyCycle = std::numeric_limits<unsigned>::max();
2258 ExpectedLatency = 0;
2259 DependentLatency = 0;
2260 RetiredMOps = 0;
2261 MaxExecutedResCount = 0;
2262 ZoneCritResIdx = 0;
2263 IsResourceLimited = false;
2264 ReservedCycles.clear();
2265 ReservedResourceSegments.clear();
2266 ReservedCyclesIndex.clear();
2267 ResourceGroupSubUnitMasks.clear();
2268#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2269 // Track the maximum number of stall cycles that could arise either from the
2270 // latency of a DAG edge or the number of cycles that a processor resource is
2271 // reserved (SchedBoundary::ReservedCycles).
2272 MaxObservedStall = 0;
2273#endif
2274 // Reserve a zero-count for invalid CritResIdx.
2275 ExecutedResCounts.resize(N: 1);
2276 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2277}
2278
2279void SchedRemainder::
2280init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2281 reset();
2282 if (!SchedModel->hasInstrSchedModel())
2283 return;
2284 RemainingCounts.resize(N: SchedModel->getNumProcResourceKinds());
2285 for (SUnit &SU : DAG->SUnits) {
2286 const MCSchedClassDesc *SC = DAG->getSchedClass(SU: &SU);
2287 RemIssueCount += SchedModel->getNumMicroOps(MI: SU.getInstr(), SC)
2288 * SchedModel->getMicroOpFactor();
2289 for (TargetSchedModel::ProcResIter
2290 PI = SchedModel->getWriteProcResBegin(SC),
2291 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2292 unsigned PIdx = PI->ProcResourceIdx;
2293 unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2294 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2295 RemainingCounts[PIdx] +=
2296 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2297 }
2298 }
2299}
2300
2301void SchedBoundary::
2302init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2303 reset();
2304 DAG = dag;
2305 SchedModel = smodel;
2306 Rem = rem;
2307 if (SchedModel->hasInstrSchedModel()) {
2308 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2309 ReservedCyclesIndex.resize(N: ResourceCount);
2310 ExecutedResCounts.resize(N: ResourceCount);
2311 ResourceGroupSubUnitMasks.resize(N: ResourceCount, NV: APInt(ResourceCount, 0));
2312 unsigned NumUnits = 0;
2313
2314 for (unsigned i = 0; i < ResourceCount; ++i) {
2315 ReservedCyclesIndex[i] = NumUnits;
2316 NumUnits += SchedModel->getProcResource(PIdx: i)->NumUnits;
2317 if (isUnbufferedGroup(PIdx: i)) {
2318 auto SubUnits = SchedModel->getProcResource(PIdx: i)->SubUnitsIdxBegin;
2319 for (unsigned U = 0, UE = SchedModel->getProcResource(PIdx: i)->NumUnits;
2320 U != UE; ++U)
2321 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2322 }
2323 }
2324
2325 ReservedCycles.resize(new_size: NumUnits, x: InvalidCycle);
2326 }
2327}
2328
2329/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2330/// these "soft stalls" differently than the hard stall cycles based on CPU
2331/// resources and computed by checkHazard(). A fully in-order model
2332/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2333/// available for scheduling until they are ready. However, a weaker in-order
2334/// model may use this for heuristics. For example, if a processor has in-order
2335/// behavior when reading certain resources, this may come into play.
2336unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2337 if (!SU->isUnbuffered)
2338 return 0;
2339
2340 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2341 if (ReadyCycle > CurrCycle)
2342 return ReadyCycle - CurrCycle;
2343 return 0;
2344}
2345
2346/// Compute the next cycle at which the given processor resource unit
2347/// can be scheduled.
2348unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2349 unsigned ReleaseAtCycle,
2350 unsigned AcquireAtCycle) {
2351 if (SchedModel && SchedModel->enableIntervals()) {
2352 if (isTop())
2353 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2354 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2355
2356 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2357 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2358 }
2359
2360 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2361 // If this resource has never been used, always return cycle zero.
2362 if (NextUnreserved == InvalidCycle)
2363 return CurrCycle;
2364 // For bottom-up scheduling add the cycles needed for the current operation.
2365 if (!isTop())
2366 NextUnreserved = std::max(a: CurrCycle, b: NextUnreserved + ReleaseAtCycle);
2367 return NextUnreserved;
2368}
2369
2370/// Compute the next cycle at which the given processor resource can be
2371/// scheduled. Returns the next cycle and the index of the processor resource
2372/// instance in the reserved cycles vector.
2373std::pair<unsigned, unsigned>
2374SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
2375 unsigned ReleaseAtCycle,
2376 unsigned AcquireAtCycle) {
2377 if (MischedDetailResourceBooking) {
2378 LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2379 LLVM_DEBUG(dumpReservedCycles());
2380 LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2381 }
2382 unsigned MinNextUnreserved = InvalidCycle;
2383 unsigned InstanceIdx = 0;
2384 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2385 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2386 assert(NumberOfInstances > 0 &&
2387 "Cannot have zero instances of a ProcResource");
2388
2389 if (isUnbufferedGroup(PIdx)) {
2390 // If any subunits are used by the instruction, report that the
2391 // subunits of the resource group are available at the first cycle
2392 // in which the unit is available, effectively removing the group
2393 // record from hazarding and basing the hazarding decisions on the
2394 // subunit records. Otherwise, choose the first available instance
2395 // from among the subunits. Specifications which assign cycles to
2396 // both the subunits and the group or which use an unbuffered
2397 // group with buffered subunits will appear to schedule
2398 // strangely. In the first case, the additional cycles for the
2399 // group will be ignored. In the second, the group will be
2400 // ignored entirely.
2401 for (const MCWriteProcResEntry &PE :
2402 make_range(x: SchedModel->getWriteProcResBegin(SC),
2403 y: SchedModel->getWriteProcResEnd(SC)))
2404 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2405 return std::make_pair(x: getNextResourceCycleByInstance(
2406 InstanceIdx: StartIndex, ReleaseAtCycle, AcquireAtCycle),
2407 y&: StartIndex);
2408
2409 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2410 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2411 unsigned NextUnreserved, NextInstanceIdx;
2412 std::tie(args&: NextUnreserved, args&: NextInstanceIdx) =
2413 getNextResourceCycle(SC, PIdx: SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2414 if (MinNextUnreserved > NextUnreserved) {
2415 InstanceIdx = NextInstanceIdx;
2416 MinNextUnreserved = NextUnreserved;
2417 }
2418 }
2419 return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2420 }
2421
2422 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2423 ++I) {
2424 unsigned NextUnreserved =
2425 getNextResourceCycleByInstance(InstanceIdx: I, ReleaseAtCycle, AcquireAtCycle);
2426 if (MischedDetailResourceBooking)
2427 LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2428 << NextUnreserved << "c\n");
2429 if (MinNextUnreserved > NextUnreserved) {
2430 InstanceIdx = I;
2431 MinNextUnreserved = NextUnreserved;
2432 }
2433 }
2434 if (MischedDetailResourceBooking)
2435 LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2436 << "[" << InstanceIdx - StartIndex << "]"
2437 << " available @" << MinNextUnreserved << "c"
2438 << "\n");
2439 return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2440}
2441
2442/// Does this SU have a hazard within the current instruction group.
2443///
2444/// The scheduler supports two modes of hazard recognition. The first is the
2445/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2446/// supports highly complicated in-order reservation tables
2447/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2448///
2449/// The second is a streamlined mechanism that checks for hazards based on
2450/// simple counters that the scheduler itself maintains. It explicitly checks
2451/// for instruction dispatch limitations, including the number of micro-ops that
2452/// can dispatch per cycle.
2453///
2454/// TODO: Also check whether the SU must start a new group.
2455bool SchedBoundary::checkHazard(SUnit *SU) {
2456 if (HazardRec->isEnabled()
2457 && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2458 return true;
2459 }
2460
2461 unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
2462 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2463 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2464 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2465 return true;
2466 }
2467
2468 if (CurrMOps > 0 &&
2469 ((isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr())) ||
2470 (!isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())))) {
2471 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2472 << (isTop() ? "begin" : "end") << " group\n");
2473 return true;
2474 }
2475
2476 if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2477 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2478 for (const MCWriteProcResEntry &PE :
2479 make_range(x: SchedModel->getWriteProcResBegin(SC),
2480 y: SchedModel->getWriteProcResEnd(SC))) {
2481 unsigned ResIdx = PE.ProcResourceIdx;
2482 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2483 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2484 unsigned NRCycle, InstanceIdx;
2485 std::tie(args&: NRCycle, args&: InstanceIdx) =
2486 getNextResourceCycle(SC, PIdx: ResIdx, ReleaseAtCycle, AcquireAtCycle);
2487 if (NRCycle > CurrCycle) {
2488#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2489 MaxObservedStall = std::max(a: ReleaseAtCycle, b: MaxObservedStall);
2490#endif
2491 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2492 << SchedModel->getResourceName(ResIdx)
2493 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2494 << "=" << NRCycle << "c\n");
2495 return true;
2496 }
2497 }
2498 }
2499 return false;
2500}
2501
2502// Find the unscheduled node in ReadySUs with the highest latency.
2503unsigned SchedBoundary::
2504findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2505 SUnit *LateSU = nullptr;
2506 unsigned RemLatency = 0;
2507 for (SUnit *SU : ReadySUs) {
2508 unsigned L = getUnscheduledLatency(SU);
2509 if (L > RemLatency) {
2510 RemLatency = L;
2511 LateSU = SU;
2512 }
2513 }
2514 if (LateSU) {
2515 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2516 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2517 }
2518 return RemLatency;
2519}
2520
2521// Count resources in this zone and the remaining unscheduled
2522// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2523// resource index, or zero if the zone is issue limited.
2524unsigned SchedBoundary::
2525getOtherResourceCount(unsigned &OtherCritIdx) {
2526 OtherCritIdx = 0;
2527 if (!SchedModel->hasInstrSchedModel())
2528 return 0;
2529
2530 unsigned OtherCritCount = Rem->RemIssueCount
2531 + (RetiredMOps * SchedModel->getMicroOpFactor());
2532 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2533 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2534 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2535 PIdx != PEnd; ++PIdx) {
2536 unsigned OtherCount = getResourceCount(ResIdx: PIdx) + Rem->RemainingCounts[PIdx];
2537 if (OtherCount > OtherCritCount) {
2538 OtherCritCount = OtherCount;
2539 OtherCritIdx = PIdx;
2540 }
2541 }
2542 if (OtherCritIdx) {
2543 LLVM_DEBUG(
2544 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2545 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2546 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2547 }
2548 return OtherCritCount;
2549}
2550
2551void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2552 unsigned Idx) {
2553 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2554
2555#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2556 // ReadyCycle was been bumped up to the CurrCycle when this node was
2557 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2558 // scheduling, so may now be greater than ReadyCycle.
2559 if (ReadyCycle > CurrCycle)
2560 MaxObservedStall = std::max(a: ReadyCycle - CurrCycle, b: MaxObservedStall);
2561#endif
2562
2563 if (ReadyCycle < MinReadyCycle)
2564 MinReadyCycle = ReadyCycle;
2565
2566 // Check for interlocks first. For the purpose of other heuristics, an
2567 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2568 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2569 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2570 checkHazard(SU) || (Available.size() >= ReadyListLimit);
2571
2572 if (!HazardDetected) {
2573 Available.push(SU);
2574
2575 if (InPQueue)
2576 Pending.remove(I: Pending.begin() + Idx);
2577 return;
2578 }
2579
2580 if (!InPQueue)
2581 Pending.push(SU);
2582}
2583
2584/// Move the boundary of scheduled code by one cycle.
2585void SchedBoundary::bumpCycle(unsigned NextCycle) {
2586 if (SchedModel->getMicroOpBufferSize() == 0) {
2587 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2588 "MinReadyCycle uninitialized");
2589 if (MinReadyCycle > NextCycle)
2590 NextCycle = MinReadyCycle;
2591 }
2592 // Update the current micro-ops, which will issue in the next cycle.
2593 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2594 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2595
2596 // Decrement DependentLatency based on the next cycle.
2597 if ((NextCycle - CurrCycle) > DependentLatency)
2598 DependentLatency = 0;
2599 else
2600 DependentLatency -= (NextCycle - CurrCycle);
2601
2602 if (!HazardRec->isEnabled()) {
2603 // Bypass HazardRec virtual calls.
2604 CurrCycle = NextCycle;
2605 } else {
2606 // Bypass getHazardType calls in case of long latency.
2607 for (; CurrCycle != NextCycle; ++CurrCycle) {
2608 if (isTop())
2609 HazardRec->AdvanceCycle();
2610 else
2611 HazardRec->RecedeCycle();
2612 }
2613 }
2614 CheckPending = true;
2615 IsResourceLimited =
2616 checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2617 Latency: getScheduledLatency(), AfterSchedNode: true);
2618
2619 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2620 << '\n');
2621}
2622
2623void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2624 ExecutedResCounts[PIdx] += Count;
2625 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2626 MaxExecutedResCount = ExecutedResCounts[PIdx];
2627}
2628
2629/// Add the given processor resource to this scheduled zone.
2630///
2631/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2632/// cycles during which this resource is released.
2633///
2634/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2635/// cycles at which the resource is aquired after issue (assuming no stalls).
2636///
2637/// \return the next cycle at which the instruction may execute without
2638/// oversubscribing resources.
2639unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2640 unsigned ReleaseAtCycle,
2641 unsigned NextCycle,
2642 unsigned AcquireAtCycle) {
2643 unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2644 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2645 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2646 << ReleaseAtCycle << "x" << Factor << "u\n");
2647
2648 // Update Executed resources counts.
2649 incExecutedResources(PIdx, Count);
2650 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2651 Rem->RemainingCounts[PIdx] -= Count;
2652
2653 // Check if this resource exceeds the current critical resource. If so, it
2654 // becomes the critical resource.
2655 if (ZoneCritResIdx != PIdx && (getResourceCount(ResIdx: PIdx) > getCriticalCount())) {
2656 ZoneCritResIdx = PIdx;
2657 LLVM_DEBUG(dbgs() << " *** Critical resource "
2658 << SchedModel->getResourceName(PIdx) << ": "
2659 << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2660 << "c\n");
2661 }
2662 // For reserved resources, record the highest cycle using the resource.
2663 unsigned NextAvailable, InstanceIdx;
2664 std::tie(args&: NextAvailable, args&: InstanceIdx) =
2665 getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2666 if (NextAvailable > CurrCycle) {
2667 LLVM_DEBUG(dbgs() << " Resource conflict: "
2668 << SchedModel->getResourceName(PIdx)
2669 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2670 << " reserved until @" << NextAvailable << "\n");
2671 }
2672 return NextAvailable;
2673}
2674
2675/// Move the boundary of scheduled code by one SUnit.
2676void SchedBoundary::bumpNode(SUnit *SU) {
2677 // Update the reservation table.
2678 if (HazardRec->isEnabled()) {
2679 if (!isTop() && SU->isCall) {
2680 // Calls are scheduled with their preceding instructions. For bottom-up
2681 // scheduling, clear the pipeline state before emitting.
2682 HazardRec->Reset();
2683 }
2684 HazardRec->EmitInstruction(SU);
2685 // Scheduling an instruction may have made pending instructions available.
2686 CheckPending = true;
2687 }
2688 // checkHazard should prevent scheduling multiple instructions per cycle that
2689 // exceed the issue width.
2690 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2691 unsigned IncMOps = SchedModel->getNumMicroOps(MI: SU->getInstr());
2692 assert(
2693 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2694 "Cannot schedule this instruction's MicroOps in the current cycle.");
2695
2696 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2697 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2698
2699 unsigned NextCycle = CurrCycle;
2700 switch (SchedModel->getMicroOpBufferSize()) {
2701 case 0:
2702 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2703 break;
2704 case 1:
2705 if (ReadyCycle > NextCycle) {
2706 NextCycle = ReadyCycle;
2707 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2708 }
2709 break;
2710 default:
2711 // We don't currently model the OOO reorder buffer, so consider all
2712 // scheduled MOps to be "retired". We do loosely model in-order resource
2713 // latency. If this instruction uses an in-order resource, account for any
2714 // likely stall cycles.
2715 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2716 NextCycle = ReadyCycle;
2717 break;
2718 }
2719 RetiredMOps += IncMOps;
2720
2721 // Update resource counts and critical resource.
2722 if (SchedModel->hasInstrSchedModel()) {
2723 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2724 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2725 Rem->RemIssueCount -= DecRemIssue;
2726 if (ZoneCritResIdx) {
2727 // Scale scheduled micro-ops for comparing with the critical resource.
2728 unsigned ScaledMOps =
2729 RetiredMOps * SchedModel->getMicroOpFactor();
2730
2731 // If scaled micro-ops are now more than the previous critical resource by
2732 // a full cycle, then micro-ops issue becomes critical.
2733 if ((int)(ScaledMOps - getResourceCount(ResIdx: ZoneCritResIdx))
2734 >= (int)SchedModel->getLatencyFactor()) {
2735 ZoneCritResIdx = 0;
2736 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2737 << ScaledMOps / SchedModel->getLatencyFactor()
2738 << "c\n");
2739 }
2740 }
2741 for (TargetSchedModel::ProcResIter
2742 PI = SchedModel->getWriteProcResBegin(SC),
2743 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2744 unsigned RCycle =
2745 countResource(SC, PIdx: PI->ProcResourceIdx, ReleaseAtCycle: PI->ReleaseAtCycle, NextCycle,
2746 AcquireAtCycle: PI->AcquireAtCycle);
2747 if (RCycle > NextCycle)
2748 NextCycle = RCycle;
2749 }
2750 if (SU->hasReservedResource) {
2751 // For reserved resources, record the highest cycle using the resource.
2752 // For top-down scheduling, this is the cycle in which we schedule this
2753 // instruction plus the number of cycles the operations reserves the
2754 // resource. For bottom-up is it simply the instruction's cycle.
2755 for (TargetSchedModel::ProcResIter
2756 PI = SchedModel->getWriteProcResBegin(SC),
2757 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2758 unsigned PIdx = PI->ProcResourceIdx;
2759 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2760
2761 if (SchedModel && SchedModel->enableIntervals()) {
2762 unsigned ReservedUntil, InstanceIdx;
2763 std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2764 SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2765 if (isTop()) {
2766 ReservedResourceSegments[InstanceIdx].add(
2767 A: ResourceSegments::getResourceIntervalTop(
2768 C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2769 CutOff: MIResourceCutOff);
2770 } else {
2771 ReservedResourceSegments[InstanceIdx].add(
2772 A: ResourceSegments::getResourceIntervalBottom(
2773 C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2774 CutOff: MIResourceCutOff);
2775 }
2776 } else {
2777
2778 unsigned ReservedUntil, InstanceIdx;
2779 std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2780 SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2781 if (isTop()) {
2782 ReservedCycles[InstanceIdx] =
2783 std::max(a: ReservedUntil, b: NextCycle + PI->ReleaseAtCycle);
2784 } else
2785 ReservedCycles[InstanceIdx] = NextCycle;
2786 }
2787 }
2788 }
2789 }
2790 }
2791 // Update ExpectedLatency and DependentLatency.
2792 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2793 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2794 if (SU->getDepth() > TopLatency) {
2795 TopLatency = SU->getDepth();
2796 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2797 << SU->NodeNum << ") " << TopLatency << "c\n");
2798 }
2799 if (SU->getHeight() > BotLatency) {
2800 BotLatency = SU->getHeight();
2801 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2802 << SU->NodeNum << ") " << BotLatency << "c\n");
2803 }
2804 // If we stall for any reason, bump the cycle.
2805 if (NextCycle > CurrCycle)
2806 bumpCycle(NextCycle);
2807 else
2808 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2809 // resource limited. If a stall occurred, bumpCycle does this.
2810 IsResourceLimited =
2811 checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2812 Latency: getScheduledLatency(), AfterSchedNode: true);
2813
2814 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2815 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2816 // one cycle. Since we commonly reach the max MOps here, opportunistically
2817 // bump the cycle to avoid uselessly checking everything in the readyQ.
2818 CurrMOps += IncMOps;
2819
2820 // Bump the cycle count for issue group constraints.
2821 // This must be done after NextCycle has been adjust for all other stalls.
2822 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2823 // currCycle to X.
2824 if ((isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())) ||
2825 (!isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr()))) {
2826 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2827 << " group\n");
2828 bumpCycle(NextCycle: ++NextCycle);
2829 }
2830
2831 while (CurrMOps >= SchedModel->getIssueWidth()) {
2832 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2833 << CurrCycle << '\n');
2834 bumpCycle(NextCycle: ++NextCycle);
2835 }
2836 LLVM_DEBUG(dumpScheduledState());
2837}
2838
2839/// Release pending ready nodes in to the available queue. This makes them
2840/// visible to heuristics.
2841void SchedBoundary::releasePending() {
2842 // If the available queue is empty, it is safe to reset MinReadyCycle.
2843 if (Available.empty())
2844 MinReadyCycle = std::numeric_limits<unsigned>::max();
2845
2846 // Check to see if any of the pending instructions are ready to issue. If
2847 // so, add them to the available queue.
2848 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2849 SUnit *SU = *(Pending.begin() + I);
2850 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2851
2852 if (ReadyCycle < MinReadyCycle)
2853 MinReadyCycle = ReadyCycle;
2854
2855 if (Available.size() >= ReadyListLimit)
2856 break;
2857
2858 releaseNode(SU, ReadyCycle, InPQueue: true, Idx: I);
2859 if (E != Pending.size()) {
2860 --I;
2861 --E;
2862 }
2863 }
2864 CheckPending = false;
2865}
2866
2867/// Remove SU from the ready set for this boundary.
2868void SchedBoundary::removeReady(SUnit *SU) {
2869 if (Available.isInQueue(SU))
2870 Available.remove(I: Available.find(SU));
2871 else {
2872 assert(Pending.isInQueue(SU) && "bad ready count");
2873 Pending.remove(I: Pending.find(SU));
2874 }
2875}
2876
2877/// If this queue only has one ready candidate, return it. As a side effect,
2878/// defer any nodes that now hit a hazard, and advance the cycle until at least
2879/// one node is ready. If multiple instructions are ready, return NULL.
2880SUnit *SchedBoundary::pickOnlyChoice() {
2881 if (CheckPending)
2882 releasePending();
2883
2884 // Defer any ready instrs that now have a hazard.
2885 for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2886 if (checkHazard(SU: *I)) {
2887 Pending.push(SU: *I);
2888 I = Available.remove(I);
2889 continue;
2890 }
2891 ++I;
2892 }
2893 for (unsigned i = 0; Available.empty(); ++i) {
2894// FIXME: Re-enable assert once PR20057 is resolved.
2895// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2896// "permanent hazard");
2897 (void)i;
2898 bumpCycle(NextCycle: CurrCycle + 1);
2899 releasePending();
2900 }
2901
2902 LLVM_DEBUG(Pending.dump());
2903 LLVM_DEBUG(Available.dump());
2904
2905 if (Available.size() == 1)
2906 return *Available.begin();
2907 return nullptr;
2908}
2909
2910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2911
2912/// Dump the content of the \ref ReservedCycles vector for the
2913/// resources that are used in the basic block.
2914///
2915LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
2916 if (!SchedModel->hasInstrSchedModel())
2917 return;
2918
2919 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2920 unsigned StartIdx = 0;
2921
2922 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2923 const unsigned NumUnits = SchedModel->getProcResource(PIdx: ResIdx)->NumUnits;
2924 std::string ResName = SchedModel->getResourceName(PIdx: ResIdx);
2925 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2926 dbgs() << ResName << "(" << UnitIdx << ") = ";
2927 if (SchedModel && SchedModel->enableIntervals()) {
2928 if (ReservedResourceSegments.count(x: StartIdx + UnitIdx))
2929 dbgs() << ReservedResourceSegments.at(k: StartIdx + UnitIdx);
2930 else
2931 dbgs() << "{ }\n";
2932 } else
2933 dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
2934 }
2935 StartIdx += NumUnits;
2936 }
2937}
2938
2939// This is useful information to dump after bumpNode.
2940// Note that the Queue contents are more useful before pickNodeFromQueue.
2941LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2942 unsigned ResFactor;
2943 unsigned ResCount;
2944 if (ZoneCritResIdx) {
2945 ResFactor = SchedModel->getResourceFactor(ResIdx: ZoneCritResIdx);
2946 ResCount = getResourceCount(ResIdx: ZoneCritResIdx);
2947 } else {
2948 ResFactor = SchedModel->getMicroOpFactor();
2949 ResCount = RetiredMOps * ResFactor;
2950 }
2951 unsigned LFactor = SchedModel->getLatencyFactor();
2952 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2953 << " Retired: " << RetiredMOps;
2954 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2955 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2956 << ResCount / ResFactor << " "
2957 << SchedModel->getResourceName(PIdx: ZoneCritResIdx)
2958 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2959 << (IsResourceLimited ? " - Resource" : " - Latency")
2960 << " limited.\n";
2961 if (MISchedDumpReservedCycles)
2962 dumpReservedCycles();
2963}
2964#endif
2965
2966//===----------------------------------------------------------------------===//
2967// GenericScheduler - Generic implementation of MachineSchedStrategy.
2968//===----------------------------------------------------------------------===//
2969
2970void GenericSchedulerBase::SchedCandidate::
2971initResourceDelta(const ScheduleDAGMI *DAG,
2972 const TargetSchedModel *SchedModel) {
2973 if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2974 return;
2975
2976 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2977 for (TargetSchedModel::ProcResIter
2978 PI = SchedModel->getWriteProcResBegin(SC),
2979 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2980 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2981 ResDelta.CritResources += PI->ReleaseAtCycle;
2982 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2983 ResDelta.DemandedResources += PI->ReleaseAtCycle;
2984 }
2985}
2986
2987/// Compute remaining latency. We need this both to determine whether the
2988/// overall schedule has become latency-limited and whether the instructions
2989/// outside this zone are resource or latency limited.
2990///
2991/// The "dependent" latency is updated incrementally during scheduling as the
2992/// max height/depth of scheduled nodes minus the cycles since it was
2993/// scheduled:
2994/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2995///
2996/// The "independent" latency is the max ready queue depth:
2997/// ILat = max N.depth for N in Available|Pending
2998///
2999/// RemainingLatency is the greater of independent and dependent latency.
3000///
3001/// These computations are expensive, especially in DAGs with many edges, so
3002/// only do them if necessary.
3003static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3004 unsigned RemLatency = CurrZone.getDependentLatency();
3005 RemLatency = std::max(a: RemLatency,
3006 b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Available.elements()));
3007 RemLatency = std::max(a: RemLatency,
3008 b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Pending.elements()));
3009 return RemLatency;
3010}
3011
3012/// Returns true if the current cycle plus remaning latency is greater than
3013/// the critical path in the scheduling region.
3014bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3015 SchedBoundary &CurrZone,
3016 bool ComputeRemLatency,
3017 unsigned &RemLatency) const {
3018 // The current cycle is already greater than the critical path, so we are
3019 // already latency limited and don't need to compute the remaining latency.
3020 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3021 return true;
3022
3023 // If we haven't scheduled anything yet, then we aren't latency limited.
3024 if (CurrZone.getCurrCycle() == 0)
3025 return false;
3026
3027 if (ComputeRemLatency)
3028 RemLatency = computeRemLatency(CurrZone);
3029
3030 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3031}
3032
3033/// Set the CandPolicy given a scheduling zone given the current resources and
3034/// latencies inside and outside the zone.
3035void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
3036 SchedBoundary &CurrZone,
3037 SchedBoundary *OtherZone) {
3038 // Apply preemptive heuristics based on the total latency and resources
3039 // inside and outside this zone. Potential stalls should be considered before
3040 // following this policy.
3041
3042 // Compute the critical resource outside the zone.
3043 unsigned OtherCritIdx = 0;
3044 unsigned OtherCount =
3045 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
3046
3047 bool OtherResLimited = false;
3048 unsigned RemLatency = 0;
3049 bool RemLatencyComputed = false;
3050 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
3051 RemLatency = computeRemLatency(CurrZone);
3052 RemLatencyComputed = true;
3053 OtherResLimited = checkResourceLimit(LFactor: SchedModel->getLatencyFactor(),
3054 Count: OtherCount, Latency: RemLatency, AfterSchedNode: false);
3055 }
3056
3057 // Schedule aggressively for latency in PostRA mode. We don't check for
3058 // acyclic latency during PostRA, and highly out-of-order processors will
3059 // skip PostRA scheduling.
3060 if (!OtherResLimited &&
3061 (IsPostRA || shouldReduceLatency(Policy, CurrZone, ComputeRemLatency: !RemLatencyComputed,
3062 RemLatency))) {
3063 Policy.ReduceLatency |= true;
3064 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3065 << " RemainingLatency " << RemLatency << " + "
3066 << CurrZone.getCurrCycle() << "c > CritPath "
3067 << Rem.CriticalPath << "\n");
3068 }
3069 // If the same resource is limiting inside and outside the zone, do nothing.
3070 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3071 return;
3072
3073 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3074 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3075 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3076 } if (OtherResLimited) dbgs()
3077 << " RemainingLimit: "
3078 << SchedModel->getResourceName(OtherCritIdx) << "\n";
3079 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3080 << " Latency limited both directions.\n");
3081
3082 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3083 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3084
3085 if (OtherResLimited)
3086 Policy.DemandResIdx = OtherCritIdx;
3087}
3088
3089#ifndef NDEBUG
3090const char *GenericSchedulerBase::getReasonStr(
3091 GenericSchedulerBase::CandReason Reason) {
3092 switch (Reason) {
3093 case NoCand: return "NOCAND ";
3094 case Only1: return "ONLY1 ";
3095 case PhysReg: return "PHYS-REG ";
3096 case RegExcess: return "REG-EXCESS";
3097 case RegCritical: return "REG-CRIT ";
3098 case Stall: return "STALL ";
3099 case Cluster: return "CLUSTER ";
3100 case Weak: return "WEAK ";
3101 case RegMax: return "REG-MAX ";
3102 case ResourceReduce: return "RES-REDUCE";
3103 case ResourceDemand: return "RES-DEMAND";
3104 case TopDepthReduce: return "TOP-DEPTH ";
3105 case TopPathReduce: return "TOP-PATH ";
3106 case BotHeightReduce:return "BOT-HEIGHT";
3107 case BotPathReduce: return "BOT-PATH ";
3108 case NextDefUse: return "DEF-USE ";
3109 case NodeOrder: return "ORDER ";
3110 };
3111 llvm_unreachable("Unknown reason!");
3112}
3113
3114void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3115 PressureChange P;
3116 unsigned ResIdx = 0;
3117 unsigned Latency = 0;
3118 switch (Cand.Reason) {
3119 default:
3120 break;
3121 case RegExcess:
3122 P = Cand.RPDelta.Excess;
3123 break;
3124 case RegCritical:
3125 P = Cand.RPDelta.CriticalMax;
3126 break;
3127 case RegMax:
3128 P = Cand.RPDelta.CurrentMax;
3129 break;
3130 case ResourceReduce:
3131 ResIdx = Cand.Policy.ReduceResIdx;
3132 break;
3133 case ResourceDemand:
3134 ResIdx = Cand.Policy.DemandResIdx;
3135 break;
3136 case TopDepthReduce:
3137 Latency = Cand.SU->getDepth();
3138 break;
3139 case TopPathReduce:
3140 Latency = Cand.SU->getHeight();
3141 break;
3142 case BotHeightReduce:
3143 Latency = Cand.SU->getHeight();
3144 break;
3145 case BotPathReduce:
3146 Latency = Cand.SU->getDepth();
3147 break;
3148 }
3149 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Reason: Cand.Reason);
3150 if (P.isValid())
3151 dbgs() << " " << TRI->getRegPressureSetName(Idx: P.getPSet())
3152 << ":" << P.getUnitInc() << " ";
3153 else
3154 dbgs() << " ";
3155 if (ResIdx)
3156 dbgs() << " " << SchedModel->getProcResource(PIdx: ResIdx)->Name << " ";
3157 else
3158 dbgs() << " ";
3159 if (Latency)
3160 dbgs() << " " << Latency << " cycles ";
3161 else
3162 dbgs() << " ";
3163 dbgs() << '\n';
3164}
3165#endif
3166
3167namespace llvm {
3168/// Return true if this heuristic determines order.
3169/// TODO: Consider refactor return type of these functions as integer or enum,
3170/// as we may need to differentiate whether TryCand is better than Cand.
3171bool tryLess(int TryVal, int CandVal,
3172 GenericSchedulerBase::SchedCandidate &TryCand,
3173 GenericSchedulerBase::SchedCandidate &Cand,
3174 GenericSchedulerBase::CandReason Reason) {
3175 if (TryVal < CandVal) {
3176 TryCand.Reason = Reason;
3177 return true;
3178 }
3179 if (TryVal > CandVal) {
3180 if (Cand.Reason > Reason)
3181 Cand.Reason = Reason;
3182 return true;
3183 }
3184 return false;
3185}
3186
3187bool tryGreater(int TryVal, int CandVal,
3188 GenericSchedulerBase::SchedCandidate &TryCand,
3189 GenericSchedulerBase::SchedCandidate &Cand,
3190 GenericSchedulerBase::CandReason Reason) {
3191 if (TryVal > CandVal) {
3192 TryCand.Reason = Reason;
3193 return true;
3194 }
3195 if (TryVal < CandVal) {
3196 if (Cand.Reason > Reason)
3197 Cand.Reason = Reason;
3198 return true;
3199 }
3200 return false;
3201}
3202
3203bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3204 GenericSchedulerBase::SchedCandidate &Cand,
3205 SchedBoundary &Zone) {
3206 if (Zone.isTop()) {
3207 // Prefer the candidate with the lesser depth, but only if one of them has
3208 // depth greater than the total latency scheduled so far, otherwise either
3209 // of them could be scheduled now with no stall.
3210 if (std::max(a: TryCand.SU->getDepth(), b: Cand.SU->getDepth()) >
3211 Zone.getScheduledLatency()) {
3212 if (tryLess(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3213 TryCand, Cand, Reason: GenericSchedulerBase::TopDepthReduce))
3214 return true;
3215 }
3216 if (tryGreater(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3217 TryCand, Cand, Reason: GenericSchedulerBase::TopPathReduce))
3218 return true;
3219 } else {
3220 // Prefer the candidate with the lesser height, but only if one of them has
3221 // height greater than the total latency scheduled so far, otherwise either
3222 // of them could be scheduled now with no stall.
3223 if (std::max(a: TryCand.SU->getHeight(), b: Cand.SU->getHeight()) >
3224 Zone.getScheduledLatency()) {
3225 if (tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3226 TryCand, Cand, Reason: GenericSchedulerBase::BotHeightReduce))
3227 return true;
3228 }
3229 if (tryGreater(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3230 TryCand, Cand, Reason: GenericSchedulerBase::BotPathReduce))
3231 return true;
3232 }
3233 return false;
3234}
3235} // end namespace llvm
3236
3237static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3238 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3239 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3240}
3241
3242static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
3243 tracePick(Reason: Cand.Reason, IsTop: Cand.AtTop);
3244}
3245
3246void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3247 assert(dag->hasVRegLiveness() &&
3248 "(PreRA)GenericScheduler needs vreg liveness");
3249 DAG = static_cast<ScheduleDAGMILive*>(dag);
3250 SchedModel = DAG->getSchedModel();
3251 TRI = DAG->TRI;
3252
3253 if (RegionPolicy.ComputeDFSResult)
3254 DAG->computeDFSResult();
3255
3256 Rem.init(DAG, SchedModel);
3257 Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3258 Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3259
3260 // Initialize resource counts.
3261
3262 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3263 // are disabled, then these HazardRecs will be disabled.
3264 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3265 if (!Top.HazardRec) {
3266 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3267 }
3268 if (!Bot.HazardRec) {
3269 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3270 }
3271 TopCand.SU = nullptr;
3272 BotCand.SU = nullptr;
3273}
3274
3275/// Initialize the per-region scheduling policy.
3276void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3277 MachineBasicBlock::iterator End,
3278 unsigned NumRegionInstrs) {
3279 const MachineFunction &MF = *Begin->getMF();
3280 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3281
3282 // Avoid setting up the register pressure tracker for small regions to save
3283 // compile time. As a rough heuristic, only track pressure when the number of
3284 // schedulable instructions exceeds half the allocatable integer register file
3285 // that is the largest legal integer regiser type.
3286 RegionPolicy.ShouldTrackPressure = true;
3287 for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3288 MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3289 if (TLI->isTypeLegal(VT: LegalIntVT)) {
3290 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3291 RC: TLI->getRegClassFor(VT: LegalIntVT));
3292 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3293 break;
3294 }
3295 }
3296
3297 // For generic targets, we default to bottom-up, because it's simpler and more
3298 // compile-time optimizations have been implemented in that direction.
3299 RegionPolicy.OnlyBottomUp = true;
3300
3301 // Allow the subtarget to override default policy.
3302 MF.getSubtarget().overrideSchedPolicy(Policy&: RegionPolicy, NumRegionInstrs);
3303
3304 // After subtarget overrides, apply command line options.
3305 if (!EnableRegPressure) {
3306 RegionPolicy.ShouldTrackPressure = false;
3307 RegionPolicy.ShouldTrackLaneMasks = false;
3308 }
3309
3310 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3311 // e.g. -misched-bottomup=false allows scheduling in both directions.
3312 assert((!ForceTopDown || !ForceBottomUp) &&
3313 "-misched-topdown incompatible with -misched-bottomup");
3314 if (ForceBottomUp.getNumOccurrences() > 0) {
3315 RegionPolicy.OnlyBottomUp = ForceBottomUp;
3316 if (RegionPolicy.OnlyBottomUp)
3317 RegionPolicy.OnlyTopDown = false;
3318 }
3319 if (ForceTopDown.getNumOccurrences() > 0) {
3320 RegionPolicy.OnlyTopDown = ForceTopDown;
3321 if (RegionPolicy.OnlyTopDown)
3322 RegionPolicy.OnlyBottomUp = false;
3323 }
3324}
3325
3326void GenericScheduler::dumpPolicy() const {
3327 // Cannot completely remove virtual function even in release mode.
3328#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3329 dbgs() << "GenericScheduler RegionPolicy: "
3330 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3331 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3332 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3333 << "\n";
3334#endif
3335}
3336
3337/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3338/// critical path by more cycles than it takes to drain the instruction buffer.
3339/// We estimate an upper bounds on in-flight instructions as:
3340///
3341/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3342/// InFlightIterations = AcyclicPath / CyclesPerIteration
3343/// InFlightResources = InFlightIterations * LoopResources
3344///
3345/// TODO: Check execution resources in addition to IssueCount.
3346void GenericScheduler::checkAcyclicLatency() {
3347 if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
3348 return;
3349
3350 // Scaled number of cycles per loop iteration.
3351 unsigned IterCount =
3352 std::max(a: Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3353 b: Rem.RemIssueCount);
3354 // Scaled acyclic critical path.
3355 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3356 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3357 unsigned InFlightCount =
3358 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3359 unsigned BufferLimit =
3360 SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3361
3362 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3363
3364 LLVM_DEBUG(
3365 dbgs() << "IssueCycles="
3366 << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3367 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3368 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3369 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3370 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3371 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3372}
3373
3374void GenericScheduler::registerRoots() {
3375 Rem.CriticalPath = DAG->ExitSU.getDepth();
3376
3377 // Some roots may not feed into ExitSU. Check all of them in case.
3378 for (const SUnit *SU : Bot.Available) {
3379 if (SU->getDepth() > Rem.CriticalPath)
3380 Rem.CriticalPath = SU->getDepth();
3381 }
3382 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3383 if (DumpCriticalPathLength) {
3384 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3385 }
3386
3387 if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
3388 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3389 checkAcyclicLatency();
3390 }
3391}
3392
3393namespace llvm {
3394bool tryPressure(const PressureChange &TryP,
3395 const PressureChange &CandP,
3396 GenericSchedulerBase::SchedCandidate &TryCand,
3397 GenericSchedulerBase::SchedCandidate &Cand,
3398 GenericSchedulerBase::CandReason Reason,
3399 const TargetRegisterInfo *TRI,
3400 const MachineFunction &MF) {
3401 // If one candidate decreases and the other increases, go with it.
3402 // Invalid candidates have UnitInc==0.
3403 if (tryGreater(TryVal: TryP.getUnitInc() < 0, CandVal: CandP.getUnitInc() < 0, TryCand, Cand,
3404 Reason)) {
3405 return true;
3406 }
3407 // Do not compare the magnitude of pressure changes between top and bottom
3408 // boundary.
3409 if (Cand.AtTop != TryCand.AtTop)
3410 return false;
3411
3412 // If both candidates affect the same set in the same boundary, go with the
3413 // smallest increase.
3414 unsigned TryPSet = TryP.getPSetOrMax();
3415 unsigned CandPSet = CandP.getPSetOrMax();
3416 if (TryPSet == CandPSet) {
3417 return tryLess(TryVal: TryP.getUnitInc(), CandVal: CandP.getUnitInc(), TryCand, Cand,
3418 Reason);
3419 }
3420
3421 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: TryPSet) :
3422 std::numeric_limits<int>::max();
3423
3424 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: CandPSet) :
3425 std::numeric_limits<int>::max();
3426
3427 // If the candidates are decreasing pressure, reverse priority.
3428 if (TryP.getUnitInc() < 0)
3429 std::swap(a&: TryRank, b&: CandRank);
3430 return tryGreater(TryVal: TryRank, CandVal: CandRank, TryCand, Cand, Reason);
3431}
3432
3433unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3434 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3435}
3436
3437/// Minimize physical register live ranges. Regalloc wants them adjacent to
3438/// their physreg def/use.
3439///
3440/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3441/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3442/// with the operation that produces or consumes the physreg. We'll do this when
3443/// regalloc has support for parallel copies.
3444int biasPhysReg(const SUnit *SU, bool isTop) {
3445 const MachineInstr *MI = SU->getInstr();
3446
3447 if (MI->isCopy()) {
3448 unsigned ScheduledOper = isTop ? 1 : 0;
3449 unsigned UnscheduledOper = isTop ? 0 : 1;
3450 // If we have already scheduled the physreg produce/consumer, immediately
3451 // schedule the copy.
3452 if (MI->getOperand(i: ScheduledOper).getReg().isPhysical())
3453 return 1;
3454 // If the physreg is at the boundary, defer it. Otherwise schedule it
3455 // immediately to free the dependent. We can hoist the copy later.
3456 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3457 if (MI->getOperand(i: UnscheduledOper).getReg().isPhysical())
3458 return AtBoundary ? -1 : 1;
3459 }
3460
3461 if (MI->isMoveImmediate()) {
3462 // If we have a move immediate and all successors have been assigned, bias
3463 // towards scheduling this later. Make sure all register defs are to
3464 // physical registers.
3465 bool DoBias = true;
3466 for (const MachineOperand &Op : MI->defs()) {
3467 if (Op.isReg() && !Op.getReg().isPhysical()) {
3468 DoBias = false;
3469 break;
3470 }
3471 }
3472
3473 if (DoBias)
3474 return isTop ? -1 : 1;
3475 }
3476
3477 return 0;
3478}
3479} // end namespace llvm
3480
3481void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3482 bool AtTop,
3483 const RegPressureTracker &RPTracker,
3484 RegPressureTracker &TempTracker) {
3485 Cand.SU = SU;
3486 Cand.AtTop = AtTop;
3487 if (DAG->isTrackingPressure()) {
3488 if (AtTop) {
3489 TempTracker.getMaxDownwardPressureDelta(
3490 MI: Cand.SU->getInstr(),
3491 Delta&: Cand.RPDelta,
3492 CriticalPSets: DAG->getRegionCriticalPSets(),
3493 MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3494 } else {
3495 if (VerifyScheduling) {
3496 TempTracker.getMaxUpwardPressureDelta(
3497 MI: Cand.SU->getInstr(),
3498 PDiff: &DAG->getPressureDiff(SU: Cand.SU),
3499 Delta&: Cand.RPDelta,
3500 CriticalPSets: DAG->getRegionCriticalPSets(),
3501 MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3502 } else {
3503 RPTracker.getUpwardPressureDelta(
3504 MI: Cand.SU->getInstr(),
3505 PDiff&: DAG->getPressureDiff(SU: Cand.SU),
3506 Delta&: Cand.RPDelta,
3507 CriticalPSets: DAG->getRegionCriticalPSets(),
3508 MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3509 }
3510 }
3511 }
3512 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3513 << " Try SU(" << Cand.SU->NodeNum << ") "
3514 << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3515 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3516}
3517
3518/// Apply a set of heuristics to a new candidate. Heuristics are currently
3519/// hierarchical. This may be more efficient than a graduated cost model because
3520/// we don't need to evaluate all aspects of the model for each node in the
3521/// queue. But it's really done to make the heuristics easier to debug and
3522/// statistically analyze.
3523///
3524/// \param Cand provides the policy and current best candidate.
3525/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3526/// \param Zone describes the scheduled zone that we are extending, or nullptr
3527/// if Cand is from a different zone than TryCand.
3528/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3529bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3530 SchedCandidate &TryCand,
3531 SchedBoundary *Zone) const {
3532 // Initialize the candidate if needed.
3533 if (!Cand.isValid()) {
3534 TryCand.Reason = NodeOrder;
3535 return true;
3536 }
3537
3538 // Bias PhysReg Defs and copies to their uses and defined respectively.
3539 if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
3540 CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
3541 return TryCand.Reason != NoCand;
3542
3543 // Avoid exceeding the target's limit.
3544 if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.Excess,
3545 CandP: Cand.RPDelta.Excess,
3546 TryCand, Cand, Reason: RegExcess, TRI,
3547 MF: DAG->MF))
3548 return TryCand.Reason != NoCand;
3549
3550 // Avoid increasing the max critical pressure in the scheduled region.
3551 if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CriticalMax,
3552 CandP: Cand.RPDelta.CriticalMax,
3553 TryCand, Cand, Reason: RegCritical, TRI,
3554 MF: DAG->MF))
3555 return TryCand.Reason != NoCand;
3556
3557 // We only compare a subset of features when comparing nodes between
3558 // Top and Bottom boundary. Some properties are simply incomparable, in many
3559 // other instances we should only override the other boundary if something
3560 // is a clear good pick on one boundary. Skip heuristics that are more
3561 // "tie-breaking" in nature.
3562 bool SameBoundary = Zone != nullptr;
3563 if (SameBoundary) {
3564 // For loops that are acyclic path limited, aggressively schedule for
3565 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3566 // heuristics to take precedence.
3567 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3568 tryLatency(TryCand, Cand, Zone&: *Zone))
3569 return TryCand.Reason != NoCand;
3570
3571 // Prioritize instructions that read unbuffered resources by stall cycles.
3572 if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
3573 CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3574 return TryCand.Reason != NoCand;
3575 }
3576
3577 // Keep clustered nodes together to encourage downstream peephole
3578 // optimizations which may reduce resource requirements.
3579 //
3580 // This is a best effort to set things up for a post-RA pass. Optimizations
3581 // like generating loads of multiple registers should ideally be done within
3582 // the scheduler pass by combining the loads during DAG postprocessing.
3583 const SUnit *CandNextClusterSU =
3584 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3585 const SUnit *TryCandNextClusterSU =
3586 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3587 if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU,
3588 CandVal: Cand.SU == CandNextClusterSU,
3589 TryCand, Cand, Reason: Cluster))
3590 return TryCand.Reason != NoCand;
3591
3592 if (SameBoundary) {
3593 // Weak edges are for clustering and other constraints.
3594 if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
3595 CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop),
3596 TryCand, Cand, Reason: Weak))
3597 return TryCand.Reason != NoCand;
3598 }
3599
3600 // Avoid increasing the max pressure of the entire region.
3601 if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CurrentMax,
3602 CandP: Cand.RPDelta.CurrentMax,
3603 TryCand, Cand, Reason: RegMax, TRI,
3604 MF: DAG->MF))
3605 return TryCand.Reason != NoCand;
3606
3607 if (SameBoundary) {
3608 // Avoid critical resource consumption and balance the schedule.
3609 TryCand.initResourceDelta(DAG, SchedModel);
3610 if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3611 TryCand, Cand, Reason: ResourceReduce))
3612 return TryCand.Reason != NoCand;
3613 if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3614 CandVal: Cand.ResDelta.DemandedResources,
3615 TryCand, Cand, Reason: ResourceDemand))
3616 return TryCand.Reason != NoCand;
3617
3618 // Avoid serializing long latency dependence chains.
3619 // For acyclic path limited loops, latency was already checked above.
3620 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3621 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone))
3622 return TryCand.Reason != NoCand;
3623
3624 // Fall through to original instruction order.
3625 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3626 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3627 TryCand.Reason = NodeOrder;
3628 return true;
3629 }
3630 }
3631
3632 return false;
3633}
3634
3635/// Pick the best candidate from the queue.
3636///
3637/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3638/// DAG building. To adjust for the current scheduling location we need to
3639/// maintain the number of vreg uses remaining to be top-scheduled.
3640void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3641 const CandPolicy &ZonePolicy,
3642 const RegPressureTracker &RPTracker,
3643 SchedCandidate &Cand) {
3644 // getMaxPressureDelta temporarily modifies the tracker.
3645 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3646
3647 ReadyQueue &Q = Zone.Available;
3648 for (SUnit *SU : Q) {
3649
3650 SchedCandidate TryCand(ZonePolicy);
3651 initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, TempTracker);
3652 // Pass SchedBoundary only when comparing nodes from the same boundary.
3653 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3654 if (tryCandidate(Cand, TryCand, Zone: ZoneArg)) {
3655 // Initialize resource delta if needed in case future heuristics query it.
3656 if (TryCand.ResDelta == SchedResourceDelta())
3657 TryCand.initResourceDelta(DAG, SchedModel);
3658 Cand.setBest(TryCand);
3659 LLVM_DEBUG(traceCandidate(Cand));
3660 }
3661 }
3662}
3663
3664/// Pick the best candidate node from either the top or bottom queue.
3665SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3666 // Schedule as far as possible in the direction of no choice. This is most
3667 // efficient, but also provides the best heuristics for CriticalPSets.
3668 if (SUnit *SU = Bot.pickOnlyChoice()) {
3669 IsTopNode = false;
3670 tracePick(Reason: Only1, IsTop: false);
3671 return SU;
3672 }
3673 if (SUnit *SU = Top.pickOnlyChoice()) {
3674 IsTopNode = true;
3675 tracePick(Reason: Only1, IsTop: true);
3676 return SU;
3677 }
3678 // Set the bottom-up policy based on the state of the current bottom zone and
3679 // the instructions outside the zone, including the top zone.
3680 CandPolicy BotPolicy;
3681 setPolicy(Policy&: BotPolicy, /*IsPostRA=*/false, CurrZone&: Bot, OtherZone: &Top);
3682 // Set the top-down policy based on the state of the current top zone and
3683 // the instructions outside the zone, including the bottom zone.
3684 CandPolicy TopPolicy;
3685 setPolicy(Policy&: TopPolicy, /*IsPostRA=*/false, CurrZone&: Top, OtherZone: &Bot);
3686
3687 // See if BotCand is still valid (because we previously scheduled from Top).
3688 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3689 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3690 BotCand.Policy != BotPolicy) {
3691 BotCand.reset(NewPolicy: CandPolicy());
3692 pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3693 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3694 } else {
3695 LLVM_DEBUG(traceCandidate(BotCand));
3696#ifndef NDEBUG
3697 if (VerifyScheduling) {
3698 SchedCandidate TCand;
3699 TCand.reset(NewPolicy: CandPolicy());
3700 pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: TCand);
3701 assert(TCand.SU == BotCand.SU &&
3702 "Last pick result should correspond to re-picking right now");
3703 }
3704#endif
3705 }
3706
3707 // Check if the top Q has a better candidate.
3708 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3709 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3710 TopCand.Policy != TopPolicy) {
3711 TopCand.reset(NewPolicy: CandPolicy());
3712 pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3713 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3714 } else {
3715 LLVM_DEBUG(traceCandidate(TopCand));
3716#ifndef NDEBUG
3717 if (VerifyScheduling) {
3718 SchedCandidate TCand;
3719 TCand.reset(NewPolicy: CandPolicy());
3720 pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TCand);
3721 assert(TCand.SU == TopCand.SU &&
3722 "Last pick result should correspond to re-picking right now");
3723 }
3724#endif
3725 }
3726
3727 // Pick best from BotCand and TopCand.
3728 assert(BotCand.isValid());
3729 assert(TopCand.isValid());
3730 SchedCandidate Cand = BotCand;
3731 TopCand.Reason = NoCand;
3732 if (tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr)) {
3733 Cand.setBest(TopCand);
3734 LLVM_DEBUG(traceCandidate(Cand));
3735 }
3736
3737 IsTopNode = Cand.AtTop;
3738 tracePick(Cand);
3739 return Cand.SU;
3740}
3741
3742/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3743SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
3744 if (DAG->top() == DAG->bottom()) {
3745 assert(Top.Available.empty() && Top.Pending.empty() &&
3746 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3747 return nullptr;
3748 }
3749 SUnit *SU;
3750 do {
3751 if (RegionPolicy.OnlyTopDown) {
3752 SU = Top.pickOnlyChoice();
3753 if (!SU) {
3754 CandPolicy NoPolicy;
3755 TopCand.reset(NewPolicy: NoPolicy);
3756 pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3757 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3758 tracePick(Cand: TopCand);
3759 SU = TopCand.SU;
3760 }
3761 IsTopNode = true;
3762 } else if (RegionPolicy.OnlyBottomUp) {
3763 SU = Bot.pickOnlyChoice();
3764 if (!SU) {
3765 CandPolicy NoPolicy;
3766 BotCand.reset(NewPolicy: NoPolicy);
3767 pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3768 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3769 tracePick(Cand: BotCand);
3770 SU = BotCand.SU;
3771 }
3772 IsTopNode = false;
3773 } else {
3774 SU = pickNodeBidirectional(IsTopNode);
3775 }
3776 } while (SU->isScheduled);
3777
3778 if (SU->isTopReady())
3779 Top.removeReady(SU);
3780 if (SU->isBottomReady())
3781 Bot.removeReady(SU);
3782
3783 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3784 << *SU->getInstr());
3785 return SU;
3786}
3787
3788void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
3789 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3790 if (!isTop)
3791 ++InsertPos;
3792 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3793
3794 // Find already scheduled copies with a single physreg dependence and move
3795 // them just above the scheduled instruction.
3796 for (SDep &Dep : Deps) {
3797 if (Dep.getKind() != SDep::Data ||
3798 !Register::isPhysicalRegister(Reg: Dep.getReg()))
3799 continue;
3800 SUnit *DepSU = Dep.getSUnit();
3801 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3802 continue;
3803 MachineInstr *Copy = DepSU->getInstr();
3804 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3805 continue;
3806 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3807 DAG->dumpNode(*Dep.getSUnit()));
3808 DAG->moveInstruction(MI: Copy, InsertPos);
3809 }
3810}
3811
3812/// Update the scheduler's state after scheduling a node. This is the same node
3813/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3814/// update it's state based on the current cycle before MachineSchedStrategy
3815/// does.
3816///
3817/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3818/// them here. See comments in biasPhysReg.
3819void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3820 if (IsTopNode) {
3821 SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
3822 Top.bumpNode(SU);
3823 if (SU->hasPhysRegUses)
3824 reschedulePhysReg(SU, isTop: true);
3825 } else {
3826 SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
3827 Bot.bumpNode(SU);
3828 if (SU->hasPhysRegDefs)
3829 reschedulePhysReg(SU, isTop: false);
3830 }
3831}
3832
3833/// Create the standard converging machine scheduler. This will be used as the
3834/// default scheduler if the target does not set a default.
3835ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
3836 ScheduleDAGMILive *DAG =
3837 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(args&: C));
3838 // Register DAG post-processors.
3839 //
3840 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3841 // data and pass it to later mutations. Have a single mutation that gathers
3842 // the interesting nodes in one pass.
3843 DAG->addMutation(Mutation: createCopyConstrainDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
3844
3845 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3846 // Add MacroFusion mutation if fusions are not empty.
3847 const auto &MacroFusions = STI.getMacroFusions();
3848 if (!MacroFusions.empty())
3849 DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
3850 return DAG;
3851}
3852
3853static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
3854 return createGenericSchedLive(C);
3855}
3856
3857static MachineSchedRegistry
3858GenericSchedRegistry("converge", "Standard converging scheduler.",
3859 createConvergingSched);
3860
3861//===----------------------------------------------------------------------===//
3862// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3863//===----------------------------------------------------------------------===//
3864
3865void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3866 DAG = Dag;
3867 SchedModel = DAG->getSchedModel();
3868 TRI = DAG->TRI;
3869
3870 Rem.init(DAG, SchedModel);
3871 Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3872 Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3873
3874 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3875 // or are disabled, then these HazardRecs will be disabled.
3876 const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3877 if (!Top.HazardRec) {
3878 Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3879 }
3880 if (!Bot.HazardRec) {
3881 Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3882 }
3883}
3884
3885void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3886 MachineBasicBlock::iterator End,
3887 unsigned NumRegionInstrs) {
3888 if (PostRADirection == MISchedPostRASched::TopDown) {
3889 RegionPolicy.OnlyTopDown = true;
3890 RegionPolicy.OnlyBottomUp = false;
3891 } else if (PostRADirection == MISchedPostRASched::BottomUp) {
3892 RegionPolicy.OnlyTopDown = false;
3893 RegionPolicy.OnlyBottomUp = true;
3894 } else if (PostRADirection == MISchedPostRASched::Bidirectional) {
3895 RegionPolicy.OnlyBottomUp = false;
3896 RegionPolicy.OnlyTopDown = false;
3897 }
3898}
3899
3900void PostGenericScheduler::registerRoots() {
3901 Rem.CriticalPath = DAG->ExitSU.getDepth();
3902
3903 // Some roots may not feed into ExitSU. Check all of them in case.
3904 for (const SUnit *SU : Bot.Available) {
3905 if (SU->getDepth() > Rem.CriticalPath)
3906 Rem.CriticalPath = SU->getDepth();
3907 }
3908 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3909 if (DumpCriticalPathLength) {
3910 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3911 }
3912}
3913
3914/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3915///
3916/// \param Cand provides the policy and current best candidate.
3917/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3918/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3919bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3920 SchedCandidate &TryCand) {
3921 // Initialize the candidate if needed.
3922 if (!Cand.isValid()) {
3923 TryCand.Reason = NodeOrder;
3924 return true;
3925 }
3926
3927 // Prioritize instructions that read unbuffered resources by stall cycles.
3928 if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU),
3929 CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3930 return TryCand.Reason != NoCand;
3931
3932 // Keep clustered nodes together.
3933 if (tryGreater(TryVal: TryCand.SU == DAG->getNextClusterSucc(),
3934 CandVal: Cand.SU == DAG->getNextClusterSucc(),
3935 TryCand, Cand, Reason: Cluster))
3936 return TryCand.Reason != NoCand;
3937
3938 // Avoid critical resource consumption and balance the schedule.
3939 if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3940 TryCand, Cand, Reason: ResourceReduce))
3941 return TryCand.Reason != NoCand;
3942 if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3943 CandVal: Cand.ResDelta.DemandedResources,
3944 TryCand, Cand, Reason: ResourceDemand))
3945 return TryCand.Reason != NoCand;
3946
3947 // Avoid serializing long latency dependence chains.
3948 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Zone&: Top)) {
3949 return TryCand.Reason != NoCand;
3950 }
3951
3952 // Fall through to original instruction order.
3953 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3954 TryCand.Reason = NodeOrder;
3955 return true;
3956 }
3957
3958 return false;
3959}
3960
3961void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3962 SchedCandidate &Cand) {
3963 ReadyQueue &Q = Zone.Available;
3964 for (SUnit *SU : Q) {
3965 SchedCandidate TryCand(Cand.Policy);
3966 TryCand.SU = SU;
3967 TryCand.AtTop = Zone.isTop();
3968 TryCand.initResourceDelta(DAG, SchedModel);
3969 if (tryCandidate(Cand, TryCand)) {
3970 Cand.setBest(TryCand);
3971 LLVM_DEBUG(traceCandidate(Cand));
3972 }
3973 }
3974}
3975
3976/// Pick the best candidate node from either the top or bottom queue.
3977SUnit *PostGenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
3978 // FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
3979 // out common parts.
3980
3981 // Schedule as far as possible in the direction of no choice. This is most
3982 // efficient, but also provides the best heuristics for CriticalPSets.
3983 if (SUnit *SU = Bot.pickOnlyChoice()) {
3984 IsTopNode = false;
3985 tracePick(Reason: Only1, IsTop: false);
3986 return SU;
3987 }
3988 if (SUnit *SU = Top.pickOnlyChoice()) {
3989 IsTopNode = true;
3990 tracePick(Reason: Only1, IsTop: true);
3991 return SU;
3992 }
3993 // Set the bottom-up policy based on the state of the current bottom zone and
3994 // the instructions outside the zone, including the top zone.
3995 CandPolicy BotPolicy;
3996 setPolicy(Policy&: BotPolicy, /*IsPostRA=*/true, CurrZone&: Bot, OtherZone: &Top);
3997 // Set the top-down policy based on the state of the current top zone and
3998 // the instructions outside the zone, including the bottom zone.
3999 CandPolicy TopPolicy;
4000 setPolicy(Policy&: TopPolicy, /*IsPostRA=*/true, CurrZone&: Top, OtherZone: &Bot);
4001
4002 // See if BotCand is still valid (because we previously scheduled from Top).
4003 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4004 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
4005 BotCand.Policy != BotPolicy) {
4006 BotCand.reset(NewPolicy: CandPolicy());
4007 pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4008 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4009 } else {
4010 LLVM_DEBUG(traceCandidate(BotCand));
4011#ifndef NDEBUG
4012 if (VerifyScheduling) {
4013 SchedCandidate TCand;
4014 TCand.reset(NewPolicy: CandPolicy());
4015 pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4016 assert(TCand.SU == BotCand.SU &&
4017 "Last pick result should correspond to re-picking right now");
4018 }
4019#endif
4020 }
4021
4022 // Check if the top Q has a better candidate.
4023 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4024 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
4025 TopCand.Policy != TopPolicy) {
4026 TopCand.reset(NewPolicy: CandPolicy());
4027 pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4028 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4029 } else {
4030 LLVM_DEBUG(traceCandidate(TopCand));
4031#ifndef NDEBUG
4032 if (VerifyScheduling) {
4033 SchedCandidate TCand;
4034 TCand.reset(NewPolicy: CandPolicy());
4035 pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4036 assert(TCand.SU == TopCand.SU &&
4037 "Last pick result should correspond to re-picking right now");
4038 }
4039#endif
4040 }
4041
4042 // Pick best from BotCand and TopCand.
4043 assert(BotCand.isValid());
4044 assert(TopCand.isValid());
4045 SchedCandidate Cand = BotCand;
4046 TopCand.Reason = NoCand;
4047 if (tryCandidate(Cand, TryCand&: TopCand)) {
4048 Cand.setBest(TopCand);
4049 LLVM_DEBUG(traceCandidate(Cand));
4050 }
4051
4052 IsTopNode = Cand.AtTop;
4053 tracePick(Cand);
4054 return Cand.SU;
4055}
4056
4057/// Pick the next node to schedule.
4058SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
4059 if (DAG->top() == DAG->bottom()) {
4060 assert(Top.Available.empty() && Top.Pending.empty() &&
4061 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4062 return nullptr;
4063 }
4064 SUnit *SU;
4065 do {
4066 if (RegionPolicy.OnlyBottomUp) {
4067 SU = Bot.pickOnlyChoice();
4068 if (SU) {
4069 tracePick(Reason: Only1, IsTop: true);
4070 } else {
4071 CandPolicy NoPolicy;
4072 BotCand.reset(NewPolicy: NoPolicy);
4073 // Set the bottom-up policy based on the state of the current bottom
4074 // zone and the instructions outside the zone, including the top zone.
4075 setPolicy(Policy&: BotCand.Policy, /*IsPostRA=*/true, CurrZone&: Bot, OtherZone: nullptr);
4076 pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4077 assert(BotCand.Reason != NoCand && "failed to find a candidate");
4078 tracePick(Cand: BotCand);
4079 SU = BotCand.SU;
4080 }
4081 IsTopNode = false;
4082 } else if (RegionPolicy.OnlyTopDown) {
4083 SU = Top.pickOnlyChoice();
4084 if (SU) {
4085 tracePick(Reason: Only1, IsTop: true);
4086 } else {
4087 CandPolicy NoPolicy;
4088 TopCand.reset(NewPolicy: NoPolicy);
4089 // Set the top-down policy based on the state of the current top zone
4090 // and the instructions outside the zone, including the bottom zone.
4091 setPolicy(Policy&: TopCand.Policy, /*IsPostRA=*/true, CurrZone&: Top, OtherZone: nullptr);
4092 pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4093 assert(TopCand.Reason != NoCand && "failed to find a candidate");
4094 tracePick(Cand: TopCand);
4095 SU = TopCand.SU;
4096 }
4097 IsTopNode = true;
4098 } else {
4099 SU = pickNodeBidirectional(IsTopNode);
4100 }
4101 } while (SU->isScheduled);
4102
4103 if (SU->isTopReady())
4104 Top.removeReady(SU);
4105 if (SU->isBottomReady())
4106 Bot.removeReady(SU);
4107
4108 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4109 << *SU->getInstr());
4110 return SU;
4111}
4112
4113/// Called after ScheduleDAGMI has scheduled an instruction and updated
4114/// scheduled/remaining flags in the DAG nodes.
4115void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
4116 if (IsTopNode) {
4117 SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
4118 Top.bumpNode(SU);
4119 } else {
4120 SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
4121 Bot.bumpNode(SU);
4122 }
4123}
4124
4125ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
4126 ScheduleDAGMI *DAG =
4127 new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(args&: C),
4128 /*RemoveKillFlags=*/true);
4129 const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4130 // Add MacroFusion mutation if fusions are not empty.
4131 const auto &MacroFusions = STI.getMacroFusions();
4132 if (!MacroFusions.empty())
4133 DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
4134 return DAG;
4135}
4136
4137//===----------------------------------------------------------------------===//
4138// ILP Scheduler. Currently for experimental analysis of heuristics.
4139//===----------------------------------------------------------------------===//
4140
4141namespace {
4142
4143/// Order nodes by the ILP metric.
4144struct ILPOrder {
4145 const SchedDFSResult *DFSResult = nullptr;
4146 const BitVector *ScheduledTrees = nullptr;
4147 bool MaximizeILP;
4148
4149 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4150
4151 /// Apply a less-than relation on node priority.
4152 ///
4153 /// (Return true if A comes after B in the Q.)
4154 bool operator()(const SUnit *A, const SUnit *B) const {
4155 unsigned SchedTreeA = DFSResult->getSubtreeID(SU: A);
4156 unsigned SchedTreeB = DFSResult->getSubtreeID(SU: B);
4157 if (SchedTreeA != SchedTreeB) {
4158 // Unscheduled trees have lower priority.
4159 if (ScheduledTrees->test(Idx: SchedTreeA) != ScheduledTrees->test(Idx: SchedTreeB))
4160 return ScheduledTrees->test(Idx: SchedTreeB);
4161
4162 // Trees with shallower connections have lower priority.
4163 if (DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4164 != DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB)) {
4165 return DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4166 < DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB);
4167 }
4168 }
4169 if (MaximizeILP)
4170 return DFSResult->getILP(SU: A) < DFSResult->getILP(SU: B);
4171 else
4172 return DFSResult->getILP(SU: A) > DFSResult->getILP(SU: B);
4173 }
4174};
4175
4176/// Schedule based on the ILP metric.
4177class ILPScheduler : public MachineSchedStrategy {
4178 ScheduleDAGMILive *DAG = nullptr;
4179 ILPOrder Cmp;
4180
4181 std::vector<SUnit*> ReadyQ;
4182
4183public:
4184 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
4185
4186 void initialize(ScheduleDAGMI *dag) override {
4187 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4188 DAG = static_cast<ScheduleDAGMILive*>(dag);
4189 DAG->computeDFSResult();
4190 Cmp.DFSResult = DAG->getDFSResult();
4191 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4192 ReadyQ.clear();
4193 }
4194
4195 void registerRoots() override {
4196 // Restore the heap in ReadyQ with the updated DFS results.
4197 std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4198 }
4199
4200 /// Implement MachineSchedStrategy interface.
4201 /// -----------------------------------------
4202
4203 /// Callback to select the highest priority node from the ready Q.
4204 SUnit *pickNode(bool &IsTopNode) override {
4205 if (ReadyQ.empty()) return nullptr;
4206 std::pop_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4207 SUnit *SU = ReadyQ.back();
4208 ReadyQ.pop_back();
4209 IsTopNode = false;
4210 LLVM_DEBUG(dbgs() << "Pick node "
4211 << "SU(" << SU->NodeNum << ") "
4212 << " ILP: " << DAG->getDFSResult()->getILP(SU)
4213 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4214 << " @"
4215 << DAG->getDFSResult()->getSubtreeLevel(
4216 DAG->getDFSResult()->getSubtreeID(SU))
4217 << '\n'
4218 << "Scheduling " << *SU->getInstr());
4219 return SU;
4220 }
4221
4222 /// Scheduler callback to notify that a new subtree is scheduled.
4223 void scheduleTree(unsigned SubtreeID) override {
4224 std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4225 }
4226
4227 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4228 /// DFSResults, and resort the priority Q.
4229 void schedNode(SUnit *SU, bool IsTopNode) override {
4230 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4231 }
4232
4233 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
4234
4235 void releaseBottomNode(SUnit *SU) override {
4236 ReadyQ.push_back(x: SU);
4237 std::push_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4238 }
4239};
4240
4241} // end anonymous namespace
4242
4243static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
4244 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(args: true));
4245}
4246static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
4247 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(args: false));
4248}
4249
4250static MachineSchedRegistry ILPMaxRegistry(
4251 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4252static MachineSchedRegistry ILPMinRegistry(
4253 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4254
4255//===----------------------------------------------------------------------===//
4256// Machine Instruction Shuffler for Correctness Testing
4257//===----------------------------------------------------------------------===//
4258
4259#ifndef NDEBUG
4260namespace {
4261
4262/// Apply a less-than relation on the node order, which corresponds to the
4263/// instruction order prior to scheduling. IsReverse implements greater-than.
4264template<bool IsReverse>
4265struct SUnitOrder {
4266 bool operator()(SUnit *A, SUnit *B) const {
4267 if (IsReverse)
4268 return A->NodeNum > B->NodeNum;
4269 else
4270 return A->NodeNum < B->NodeNum;
4271 }
4272};
4273
4274/// Reorder instructions as much as possible.
4275class InstructionShuffler : public MachineSchedStrategy {
4276 bool IsAlternating;
4277 bool IsTopDown;
4278
4279 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4280 // gives nodes with a higher number higher priority causing the latest
4281 // instructions to be scheduled first.
4282 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
4283 TopQ;
4284
4285 // When scheduling bottom-up, use greater-than as the queue priority.
4286 PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
4287 BottomQ;
4288
4289public:
4290 InstructionShuffler(bool alternate, bool topdown)
4291 : IsAlternating(alternate), IsTopDown(topdown) {}
4292
4293 void initialize(ScheduleDAGMI*) override {
4294 TopQ.clear();
4295 BottomQ.clear();
4296 }
4297
4298 /// Implement MachineSchedStrategy interface.
4299 /// -----------------------------------------
4300
4301 SUnit *pickNode(bool &IsTopNode) override {
4302 SUnit *SU;
4303 if (IsTopDown) {
4304 do {
4305 if (TopQ.empty()) return nullptr;
4306 SU = TopQ.top();
4307 TopQ.pop();
4308 } while (SU->isScheduled);
4309 IsTopNode = true;
4310 } else {
4311 do {
4312 if (BottomQ.empty()) return nullptr;
4313 SU = BottomQ.top();
4314 BottomQ.pop();
4315 } while (SU->isScheduled);
4316 IsTopNode = false;
4317 }
4318 if (IsAlternating)
4319 IsTopDown = !IsTopDown;
4320 return SU;
4321 }
4322
4323 void schedNode(SUnit *SU, bool IsTopNode) override {}
4324
4325 void releaseTopNode(SUnit *SU) override {
4326 TopQ.push(x: SU);
4327 }
4328 void releaseBottomNode(SUnit *SU) override {
4329 BottomQ.push(x: SU);
4330 }
4331};
4332
4333} // end anonymous namespace
4334
4335static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
4336 bool Alternate = !ForceTopDown && !ForceBottomUp;
4337 bool TopDown = !ForceBottomUp;
4338 assert((TopDown || !ForceTopDown) &&
4339 "-misched-topdown incompatible with -misched-bottomup");
4340 return new ScheduleDAGMILive(
4341 C, std::make_unique<InstructionShuffler>(args&: Alternate, args&: TopDown));
4342}
4343
4344static MachineSchedRegistry ShufflerRegistry(
4345 "shuffle", "Shuffle machine instructions alternating directions",
4346 createInstructionShuffler);
4347#endif // !NDEBUG
4348
4349//===----------------------------------------------------------------------===//
4350// GraphWriter support for ScheduleDAGMILive.
4351//===----------------------------------------------------------------------===//
4352
4353#ifndef NDEBUG
4354namespace llvm {
4355
4356template<> struct GraphTraits<
4357 ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
4358
4359template<>
4360struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
4361 DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
4362
4363 static std::string getGraphName(const ScheduleDAG *G) {
4364 return std::string(G->MF.getName());
4365 }
4366
4367 static bool renderGraphFromBottomUp() {
4368 return true;
4369 }
4370
4371 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4372 if (ViewMISchedCutoff == 0)
4373 return false;
4374 return (Node->Preds.size() > ViewMISchedCutoff
4375 || Node->Succs.size() > ViewMISchedCutoff);
4376 }
4377
4378 /// If you want to override the dot attributes printed for a particular
4379 /// edge, override this method.
4380 static std::string getEdgeAttributes(const SUnit *Node,
4381 SUnitIterator EI,
4382 const ScheduleDAG *Graph) {
4383 if (EI.isArtificialDep())
4384 return "color=cyan,style=dashed";
4385 if (EI.isCtrlDep())
4386 return "color=blue,style=dashed";
4387 return "";
4388 }
4389
4390 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4391 std::string Str;
4392 raw_string_ostream SS(Str);
4393 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4394 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4395 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4396 SS << "SU:" << SU->NodeNum;
4397 if (DFS)
4398 SS << " I:" << DFS->getNumInstrs(SU);
4399 return SS.str();
4400 }
4401
4402 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4403 return G->getGraphNodeLabel(SU);
4404 }
4405
4406 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4407 std::string Str("shape=Mrecord");
4408 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4409 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4410 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4411 if (DFS) {
4412 Str += ",style=filled,fillcolor=\"#";
4413 Str += DOT::getColorString(NodeNumber: DFS->getSubtreeID(SU: N));
4414 Str += '"';
4415 }
4416 return Str;
4417 }
4418};
4419
4420} // end namespace llvm
4421#endif // NDEBUG
4422
4423/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4424/// rendered using 'dot'.
4425void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4426#ifndef NDEBUG
4427 ViewGraph(G: this, Name, ShortNames: false, Title);
4428#else
4429 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4430 << "systems with Graphviz or gv!\n";
4431#endif // NDEBUG
4432}
4433
4434/// Out-of-line implementation with no arguments is handy for gdb.
4435void ScheduleDAGMI::viewGraph() {
4436 viewGraph(Name: getDAGName(), Title: "Scheduling-Units Graph for " + getDAGName());
4437}
4438
4439/// Sort predicate for the intervals stored in an instance of
4440/// ResourceSegments. Intervals are always disjoint (no intersection
4441/// for any pairs of intervals), therefore we can sort the totality of
4442/// the intervals by looking only at the left boundary.
4443static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4444 const ResourceSegments::IntervalTy &B) {
4445 return A.first < B.first;
4446}
4447
4448unsigned ResourceSegments::getFirstAvailableAt(
4449 unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4450 std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4451 IntervalBuilder) const {
4452 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4453 sortIntervals) &&
4454 "Cannot execute on an un-sorted set of intervals.");
4455
4456 // Zero resource usage is allowed by TargetSchedule.td but we do not construct
4457 // a ResourceSegment interval for that situation.
4458 if (AcquireAtCycle == ReleaseAtCycle)
4459 return CurrCycle;
4460
4461 unsigned RetCycle = CurrCycle;
4462 ResourceSegments::IntervalTy NewInterval =
4463 IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4464 for (auto &Interval : _Intervals) {
4465 if (!intersects(A: NewInterval, B: Interval))
4466 continue;
4467
4468 // Move the interval right next to the top of the one it
4469 // intersects.
4470 assert(Interval.second > NewInterval.first &&
4471 "Invalid intervals configuration.");
4472 RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4473 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle, ReleaseAtCycle);
4474 }
4475 return RetCycle;
4476}
4477
4478void ResourceSegments::add(ResourceSegments::IntervalTy A,
4479 const unsigned CutOff) {
4480 assert(A.first <= A.second && "Cannot add negative resource usage");
4481 assert(CutOff > 0 && "0-size interval history has no use.");
4482 // Zero resource usage is allowed by TargetSchedule.td, in the case that the
4483 // instruction needed the resource to be available but does not use it.
4484 // However, ResourceSegment represents an interval that is closed on the left
4485 // and open on the right. It is impossible to represent an empty interval when
4486 // the left is closed. Do not add it to Intervals.
4487 if (A.first == A.second)
4488 return;
4489
4490 assert(all_of(_Intervals,
4491 [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4492 return !intersects(A, Interval);
4493 }) &&
4494 "A resource is being overwritten");
4495 _Intervals.push_back(x: A);
4496
4497 sortAndMerge();
4498
4499 // Do not keep the full history of the intervals, just the
4500 // latest #CutOff.
4501 while (_Intervals.size() > CutOff)
4502 _Intervals.pop_front();
4503}
4504
4505bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4506 ResourceSegments::IntervalTy B) {
4507 assert(A.first <= A.second && "Invalid interval");
4508 assert(B.first <= B.second && "Invalid interval");
4509
4510 // Share one boundary.
4511 if ((A.first == B.first) || (A.second == B.second))
4512 return true;
4513
4514 // full intersersect: [ *** ) B
4515 // [***) A
4516 if ((A.first > B.first) && (A.second < B.second))
4517 return true;
4518
4519 // right intersect: [ ***) B
4520 // [*** ) A
4521 if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4522 return true;
4523
4524 // left intersect: [*** ) B
4525 // [ ***) A
4526 if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4527 return true;
4528
4529 return false;
4530}
4531
4532void ResourceSegments::sortAndMerge() {
4533 if (_Intervals.size() <= 1)
4534 return;
4535
4536 // First sort the collection.
4537 _Intervals.sort(comp: sortIntervals);
4538
4539 // can use next because I have at least 2 elements in the list
4540 auto next = std::next(x: std::begin(cont&: _Intervals));
4541 auto E = std::end(cont&: _Intervals);
4542 for (; next != E; ++next) {
4543 if (std::prev(x: next)->second >= next->first) {
4544 next->first = std::prev(x: next)->first;
4545 _Intervals.erase(position: std::prev(x: next));
4546 continue;
4547 }
4548 }
4549}
4550

source code of llvm/lib/CodeGen/MachineScheduler.cpp