MachineScheduler.cpp source code [llvm/lib/CodeGen/MachineScheduler.cpp]

1	//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// MachineScheduler schedules machine instructions after phi elimination. It
10	// preserves LiveIntervals so it can be invoked before register allocation.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/MachineScheduler.h"
15	#include "llvm/ADT/ArrayRef.h"
16	#include "llvm/ADT/BitVector.h"
17	#include "llvm/ADT/DenseMap.h"
18	#include "llvm/ADT/PriorityQueue.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/ADT/Statistic.h"
22	#include "llvm/ADT/iterator_range.h"
23	#include "llvm/Analysis/AliasAnalysis.h"
24	#include "llvm/CodeGen/LiveInterval.h"
25	#include "llvm/CodeGen/LiveIntervals.h"
26	#include "llvm/CodeGen/MachineBasicBlock.h"
27	#include "llvm/CodeGen/MachineDominators.h"
28	#include "llvm/CodeGen/MachineFunction.h"
29	#include "llvm/CodeGen/MachineFunctionPass.h"
30	#include "llvm/CodeGen/MachineInstr.h"
31	#include "llvm/CodeGen/MachineLoopInfo.h"
32	#include "llvm/CodeGen/MachineOperand.h"
33	#include "llvm/CodeGen/MachinePassRegistry.h"
34	#include "llvm/CodeGen/MachineRegisterInfo.h"
35	#include "llvm/CodeGen/RegisterClassInfo.h"
36	#include "llvm/CodeGen/RegisterPressure.h"
37	#include "llvm/CodeGen/ScheduleDAG.h"
38	#include "llvm/CodeGen/ScheduleDAGInstrs.h"
39	#include "llvm/CodeGen/ScheduleDAGMutation.h"
40	#include "llvm/CodeGen/ScheduleDFS.h"
41	#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
42	#include "llvm/CodeGen/SlotIndexes.h"
43	#include "llvm/CodeGen/TargetFrameLowering.h"
44	#include "llvm/CodeGen/TargetInstrInfo.h"
45	#include "llvm/CodeGen/TargetLowering.h"
46	#include "llvm/CodeGen/TargetPassConfig.h"
47	#include "llvm/CodeGen/TargetRegisterInfo.h"
48	#include "llvm/CodeGen/TargetSchedule.h"
49	#include "llvm/CodeGen/TargetSubtargetInfo.h"
50	#include "llvm/CodeGenTypes/MachineValueType.h"
51	#include "llvm/Config/llvm-config.h"
52	#include "llvm/InitializePasses.h"
53	#include "llvm/MC/LaneBitmask.h"
54	#include "llvm/Pass.h"
55	#include "llvm/Support/CommandLine.h"
56	#include "llvm/Support/Compiler.h"
57	#include "llvm/Support/Debug.h"
58	#include "llvm/Support/ErrorHandling.h"
59	#include "llvm/Support/GraphWriter.h"
60	#include "llvm/Support/raw_ostream.h"
61	#include <algorithm>
62	#include <cassert>
63	#include <cstdint>
64	#include <iterator>
65	#include <limits>
66	#include <memory>
67	#include <string>
68	#include <tuple>
69	#include <utility>
70	#include <vector>
71
72	using namespace llvm;
73
74	#define DEBUG_TYPE "machine-scheduler"
75
76	STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78	namespace llvm {
79
80	cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81	cl::desc ("Force top-down list scheduling"));
82	cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83	cl::desc ("Force bottom-up list scheduling"));
84	namespace MISchedPostRASched {
85	enum Direction {
86	TopDown,
87	BottomUp,
88	Bidirectional,
89	};
90	} // end namespace MISchedPostRASched
91	cl::opt<MISchedPostRASched::Direction> PostRADirection(
92	"misched-postra-direction", cl::Hidden,
93	cl::desc ("Post reg-alloc list scheduling direction"),
94	// Default to top-down because it was implemented first and existing targets
95	// expect that behavior by default.
96	cl::init(Val: MISchedPostRASched::TopDown),
97	cl::values(
98	clEnumValN(MISchedPostRASched::TopDown, "topdown",
99	"Force top-down post reg-alloc list scheduling"),
100	clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
101	"Force bottom-up post reg-alloc list scheduling"),
102	clEnumValN(MISchedPostRASched::Bidirectional, "bidirectional",
103	"Force bidirectional post reg-alloc list scheduling")));
104	cl::opt<bool>
105	DumpCriticalPathLength("misched-dcpl", cl::Hidden,
106	cl::desc ("Print critical path length to stdout"));
107
108	cl::opt<bool> VerifyScheduling(
109	"verify-misched", cl::Hidden,
110	cl::desc ("Verify machine instrs before and after machine scheduling"));
111
112	#ifndef NDEBUG
113	cl::opt<bool> ViewMISchedDAGs(
114	"view-misched-dags", cl::Hidden,
115	cl::desc ("Pop up a window to show MISched dags after they are processed"));
116	cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
117	cl::desc ("Print schedule DAGs"));
118	cl::opt<bool> MISchedDumpReservedCycles(
119	"misched-dump-reserved-cycles", cl::Hidden, cl::init(Val: false),
120	cl::desc ("Dump resource usage at schedule boundary."));
121	cl::opt<bool> MischedDetailResourceBooking(
122	"misched-detail-resource-booking", cl::Hidden, cl::init(Val: false),
123	cl::desc ("Show details of invoking getNextResoufceCycle."));
124	#else
125	const bool ViewMISchedDAGs = false;
126	const bool PrintDAGs = false;
127	const bool MischedDetailResourceBooking = false;
128	#ifdef LLVM_ENABLE_DUMP
129	const bool MISchedDumpReservedCycles = false;
130	#endif // LLVM_ENABLE_DUMP
131	#endif // NDEBUG
132
133	} // end namespace llvm
134
135	#ifndef NDEBUG
136	/// In some situations a few uninteresting nodes depend on nearly all other
137	/// nodes in the graph, provide a cutoff to hide them.
138	static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
139	cl::desc ("Hide nodes with more predecessor/successor than cutoff"));
140
141	static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
142	cl::desc ("Stop scheduling after N instructions"), cl::init(Val: ~`0U`));
143
144	static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
145	cl::desc ("Only schedule this function"));
146	static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
147	cl::desc ("Only schedule this MBB#"));
148	#endif // NDEBUG
149
150	/// Avoid quadratic complexity in unusually large basic blocks by limiting the
151	/// size of the ready lists.
152	static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
153	cl::desc ("Limit ready list to N instructions"), cl::init(Val: `256`));
154
155	static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
156	cl::desc ("Enable register pressure scheduling."), cl::init(Val: true));
157
158	static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
159	cl::desc ("Enable cyclic critical path analysis."), cl::init(Val: true));
160
161	static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
162	cl::desc ("Enable memop clustering."),
163	cl::init(Val: true));
164	static cl::opt<bool>
165	ForceFastCluster("force-fast-cluster", cl::Hidden,
166	cl::desc ("Switch to fast cluster algorithm with the lost "
167	"of some fusion opportunities"),
168	cl::init(Val: false));
169	static cl::opt<unsigned>
170	FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
171	cl::desc ("The threshold for fast cluster"),
172	cl::init(Val: `1000`));
173
174	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
175	static cl::opt<bool> MISchedDumpScheduleTrace(
176	"misched-dump-schedule-trace", cl::Hidden, cl::init(Val: false),
177	cl::desc ("Dump resource usage at schedule boundary."));
178	static cl::opt<unsigned>
179	HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
180	cl::desc ("Set width of the columns with "
181	"the resources and schedule units"),
182	cl::init(Val: `19`));
183	static cl::opt<unsigned>
184	ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
185	cl::desc ("Set width of the columns showing resource booking."),
186	cl::init(Val: `5`));
187	static cl::opt<bool> MISchedSortResourcesInTrace(
188	"misched-sort-resources-in-trace", cl::Hidden, cl::init(Val: true),
189	cl::desc ("Sort the resources printed in the dump trace"));
190	#endif
191
192	static cl::opt<unsigned>
193	MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
194	cl::desc ("Number of intervals to track"), cl::init(Val: `10`));
195
196	// DAG subtrees must have at least this many nodes.
197	static const unsigned MinSubtreeSize = `8`;
198
199	// Pin the vtables to this file.
200	void MachineSchedStrategy::anchor() {}
201
202	void ScheduleDAGMutation::anchor() {}
203
204	//===----------------------------------------------------------------------===//
205	// Machine Instruction Scheduling Pass and Registry
206	//===----------------------------------------------------------------------===//
207
208	MachineSchedContext::MachineSchedContext() {
209	RegClassInfo = new RegisterClassInfo ();
210	}
211
212	MachineSchedContext::~MachineSchedContext() {
213	delete RegClassInfo;
214	}
215
216	namespace {
217
218	/// Base class for a machine scheduler class that can run at any point.
219	class MachineSchedulerBase : public MachineSchedContext,
220	public MachineFunctionPass {
221	public:
222	MachineSchedulerBase(char &ID): MachineFunctionPass (ID) {}
223
224	void print(raw_ostream &O, const Module* = nullptr) const override;
225
226	protected:
227	void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
228	};
229
230	/// MachineScheduler runs after coalescing and before register allocation.
231	class MachineScheduler : public MachineSchedulerBase {
232	public:
233	MachineScheduler();
234
235	void getAnalysisUsage(AnalysisUsage &AU) const override;
236
237	bool runOnMachineFunction(MachineFunction&) override;
238
239	static char ID; // Class identification, replacement for typeinfo
240
241	protected:
242	ScheduleDAGInstrs *createMachineScheduler();
243	};
244
245	/// PostMachineScheduler runs after shortly before code emission.
246	class PostMachineScheduler : public MachineSchedulerBase {
247	public:
248	PostMachineScheduler();
249
250	void getAnalysisUsage(AnalysisUsage &AU) const override;
251
252	bool runOnMachineFunction(MachineFunction&) override;
253
254	static char ID; // Class identification, replacement for typeinfo
255
256	protected:
257	ScheduleDAGInstrs *createPostMachineScheduler();
258	};
259
260	} // end anonymous namespace
261
262	char MachineScheduler::ID = `0`;
263
264	char &llvm::MachineSchedulerID = MachineScheduler::ID;
265
266	INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
267	"Machine Instruction Scheduler", false, false)
268	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
269	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
270	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
271	INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
272	INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
273	INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
274	"Machine Instruction Scheduler", false, false)
275
276	MachineScheduler::MachineScheduler() : MachineSchedulerBase (ID) {
277	initializeMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
278	}
279
280	void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
281	AU.setPreservesCFG();
282	AU.addRequired<MachineDominatorTree>();
283	AU.addRequired<MachineLoopInfo>();
284	AU.addRequired<AAResultsWrapperPass>();
285	AU.addRequired<TargetPassConfig>();
286	AU.addRequired<SlotIndexes>();
287	AU.addPreserved<SlotIndexes>();
288	AU.addRequired<LiveIntervals>();
289	AU.addPreserved<LiveIntervals>();
290	MachineFunctionPass::getAnalysisUsage(AU);
291	}
292
293	char PostMachineScheduler::ID = `0`;
294
295	char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
296
297	INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
298	"PostRA Machine Instruction Scheduler", false, false)
299	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
300	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
301	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
302	INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
303	"PostRA Machine Instruction Scheduler", false, false)
304
305	PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase (ID) {
306	initializePostMachineSchedulerPass(Registry&: *PassRegistry::getPassRegistry());
307	}
308
309	void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
310	AU.setPreservesCFG();
311	AU.addRequired<MachineDominatorTree>();
312	AU.addRequired<MachineLoopInfo>();
313	AU.addRequired<AAResultsWrapperPass>();
314	AU.addRequired<TargetPassConfig>();
315	MachineFunctionPass::getAnalysisUsage(AU);
316	}
317
318	MachinePassRegistry<MachineSchedRegistry::ScheduleDAGCtor>
319	MachineSchedRegistry::Registry;
320
321	/// A dummy default scheduler factory indicates whether the scheduler
322	/// is overridden on the command line.
323	static ScheduleDAGInstrs useDefaultMachineSched(MachineSchedContext C) {
324	return nullptr;
325	}
326
327	/// MachineSchedOpt allows command line selection of the scheduler.
328	static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
329	RegisterPassParser<MachineSchedRegistry>>
330	MachineSchedOpt("misched",
331	cl::init(Val: &useDefaultMachineSched), cl::Hidden,
332	cl::desc ("Machine instruction scheduler to use"));
333
334	static MachineSchedRegistry
335	DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
336	useDefaultMachineSched);
337
338	static cl::opt<bool> EnableMachineSched(
339	"enable-misched",
340	cl::desc ("Enable the machine instruction scheduling pass."), cl::init(Val: true),
341	cl::Hidden);
342
343	static cl::opt<bool> EnablePostRAMachineSched(
344	"enable-post-misched",
345	cl::desc ("Enable the post-ra machine instruction scheduling pass."),
346	cl::init(Val: true), cl::Hidden);
347
348	/// Decrement this iterator until reaching the top or a non-debug instr.
349	static MachineBasicBlock::const_iterator
350	priorNonDebug(MachineBasicBlock::const_iterator I,
351	MachineBasicBlock::const_iterator Beg) {
352	assert(I != Beg && "reached the top of the region, cannot decrement");
353	while (--I != Beg) {
354	if (!I ->isDebugOrPseudoInstr())
355	break;
356	}
357	return I;
358	}
359
360	/// Non-const version.
361	static MachineBasicBlock::iterator
362	priorNonDebug(MachineBasicBlock::iterator I,
363	MachineBasicBlock::const_iterator Beg) {
364	return priorNonDebug(I: MachineBasicBlock::const_iterator (I), Beg)
365	.getNonConstIterator();
366	}
367
368	/// If this iterator is a debug value, increment until reaching the End or a
369	/// non-debug instruction.
370	static MachineBasicBlock::const_iterator
371	nextIfDebug(MachineBasicBlock::const_iterator I,
372	MachineBasicBlock::const_iterator End) {
373	for(; I != End; ++I) {
374	if (!I ->isDebugOrPseudoInstr())
375	break;
376	}
377	return I;
378	}
379
380	/// Non-const version.
381	static MachineBasicBlock::iterator
382	nextIfDebug(MachineBasicBlock::iterator I,
383	MachineBasicBlock::const_iterator End) {
384	return nextIfDebug(I: MachineBasicBlock::const_iterator (I), End)
385	.getNonConstIterator();
386	}
387
388	/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
389	ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
390	// Select the scheduler, or set the default.
391	MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
392	if (Ctor != useDefaultMachineSched)
393	return Ctor(this);
394
395	// Get the default scheduler set by the target for this function.
396	ScheduleDAGInstrs Scheduler = PassConfig->createMachineScheduler(C: this*);
397	if (Scheduler)
398	return Scheduler;
399
400	// Default to GenericScheduler.
401	return createGenericSchedLive(C: this);
402	}
403
404	/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
405	/// the caller. We don't have a command line option to override the postRA
406	/// scheduler. The Target must configure it.
407	ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
408	// Get the postRA scheduler set by the target for this function.
409	ScheduleDAGInstrs Scheduler = PassConfig->createPostMachineScheduler(C: this*);
410	if (Scheduler)
411	return Scheduler;
412
413	// Default to GenericScheduler.
414	return createGenericSchedPostRA(C: this);
415	}
416
417	/// Top-level MachineScheduler pass driver.
418	///
419	/// Visit blocks in function order. Divide each block into scheduling regions
420	/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
421	/// consistent with the DAG builder, which traverses the interior of the
422	/// scheduling regions bottom-up.
423	///
424	/// This design avoids exposing scheduling boundaries to the DAG builder,
425	/// simplifying the DAG builder's support for "special" target instructions.
426	/// At the same time the design allows target schedulers to operate across
427	/// scheduling boundaries, for example to bundle the boundary instructions
428	/// without reordering them. This creates complexity, because the target
429	/// scheduler must update the RegionBegin and RegionEnd positions cached by
430	/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
431	/// design would be to split blocks at scheduling boundaries, but LLVM has a
432	/// general bias against block splitting purely for implementation simplicity.
433	bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
434	if (skipFunction(F: mf.getFunction()))
435	return false;
436
437	if (EnableMachineSched.getNumOccurrences()) {
438	if (!EnableMachineSched)
439	return false;
440	} else if (!mf.getSubtarget().enableMachineScheduler())
441	return false;
442
443	LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
444
445	// Initialize the context of the pass.
446	MF = &mf;
447	MLI = &getAnalysis<MachineLoopInfo>();
448	MDT = &getAnalysis<MachineDominatorTree>();
449	PassConfig = &getAnalysis<TargetPassConfig>();
450	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
451
452	LIS = &getAnalysis<LiveIntervals>();
453
454	if (VerifyScheduling) {
455	LLVM_DEBUG(LIS->dump());
456	MF->verify(p: this, Banner: "Before machine scheduling.");
457	}
458	RegClassInfo->runOnMachineFunction(MF: *MF);
459
460	// Instantiate the selected scheduler for this target, function, and
461	// optimization level.
462	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
463	ScheduleDAGMI::DumpDirection D;
464	if (ForceTopDown)
465	D = ScheduleDAGMI::DumpDirection::TopDown;
466	else if (ForceBottomUp)
467	D = ScheduleDAGMI::DumpDirection::BottomUp;
468	else
469	D = ScheduleDAGMI::DumpDirection::Bidirectional;
470	Scheduler ->setDumpDirection(D);
471	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: false*);
472
473	LLVM_DEBUG(LIS->dump());
474	if (VerifyScheduling)
475	MF->verify(p: this, Banner: "After machine scheduling.");
476	return true;
477	}
478
479	bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
480	if (skipFunction(F: mf.getFunction()))
481	return false;
482
483	if (EnablePostRAMachineSched.getNumOccurrences()) {
484	if (!EnablePostRAMachineSched)
485	return false;
486	} else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
487	LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
488	return false;
489	}
490	LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
491
492	// Initialize the context of the pass.
493	MF = &mf;
494	MLI = &getAnalysis<MachineLoopInfo>();
495	PassConfig = &getAnalysis<TargetPassConfig>();
496	AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
497
498	if (VerifyScheduling)
499	MF->verify(p: this, Banner: "Before post machine scheduling.");
500
501	// Instantiate the selected scheduler for this target, function, and
502	// optimization level.
503	std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
504	ScheduleDAGMI::DumpDirection D;
505	if (PostRADirection == MISchedPostRASched::TopDown)
506	D = ScheduleDAGMI::DumpDirection::TopDown;
507	else if (PostRADirection == MISchedPostRASched::BottomUp)
508	D = ScheduleDAGMI::DumpDirection::BottomUp;
509	else
510	D = ScheduleDAGMI::DumpDirection::Bidirectional;
511	Scheduler ->setDumpDirection(D);
512	scheduleRegions(Scheduler&: Scheduler, FixKillFlags: true*);
513
514	if (VerifyScheduling)
515	MF->verify(p: this, Banner: "After post machine scheduling.");
516	return true;
517	}
518
519	/// Return true of the given instruction should not be included in a scheduling
520	/// region.
521	///
522	/// MachineScheduler does not currently support scheduling across calls. To
523	/// handle calls, the DAG builder needs to be modified to create register
524	/// anti/output dependencies on the registers clobbered by the call's regmask
525	/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
526	/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
527	/// the boundary, but there would be no benefit to postRA scheduling across
528	/// calls this late anyway.
529	static bool isSchedBoundary(MachineBasicBlock::iterator MI,
530	MachineBasicBlock *MBB,
531	MachineFunction *MF,
532	const TargetInstrInfo *TII) {
533	return MI ->isCall() \|\| TII->isSchedulingBoundary(MI: MI, MBB, MF: MF);
534	}
535
536	/// A region of an MBB for scheduling.
537	namespace {
538	struct SchedRegion {
539	/// RegionBegin is the first instruction in the scheduling region, and
540	/// RegionEnd is either MBB->end() or the scheduling boundary after the
541	/// last instruction in the scheduling region. These iterators cannot refer
542	/// to instructions outside of the identified scheduling region because
543	/// those may be reordered before scheduling this region.
544	MachineBasicBlock::iterator RegionBegin;
545	MachineBasicBlock::iterator RegionEnd;
546	unsigned NumRegionInstrs;
547
548	SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
549	unsigned N) :
550	RegionBegin (B), RegionEnd (E), NumRegionInstrs(N) {}
551	};
552	} // end anonymous namespace
553
554	using MBBRegionsVector = SmallVector<SchedRegion, `16`>;
555
556	static void
557	getSchedRegions(MachineBasicBlock *MBB,
558	MBBRegionsVector &Regions,
559	bool RegionsTopDown) {
560	MachineFunction *MF = MBB->getParent();
561	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
562
563	MachineBasicBlock::iterator I = nullptr;
564	for(MachineBasicBlock::iterator RegionEnd = MBB->end();
565	RegionEnd != MBB->begin(); RegionEnd = I) {
566
567	// Avoid decrementing RegionEnd for blocks with no terminator.
568	if (RegionEnd != MBB->end() \|\|
569	isSchedBoundary(MI: &std::prev(x: RegionEnd), MBB: &MBB, MF, TII)) {
570	--RegionEnd;
571	}
572
573	// The next region starts above the previous region. Look backward in the
574	// instruction stream until we find the nearest boundary.
575	unsigned NumRegionInstrs = `0`;
576	I = RegionEnd;
577	for (;I != MBB->begin(); --I) {
578	MachineInstr &MI = *std::prev(x: I);
579	if (isSchedBoundary(MI: &MI, MBB: &*MBB, MF, TII))
580	break;
581	if (!MI.isDebugOrPseudoInstr()) {
582	// MBB::size() uses instr_iterator to count. Here we need a bundle to
583	// count as a single instruction.
584	++NumRegionInstrs;
585	}
586	}
587
588	// It's possible we found a scheduling region that only has debug
589	// instructions. Don't bother scheduling these.
590	if (NumRegionInstrs != `0`)
591	Regions.push_back(Elt: SchedRegion (I, RegionEnd, NumRegionInstrs));
592	}
593
594	if (RegionsTopDown)
595	std::reverse(first: Regions.begin(), last: Regions.end());
596	}
597
598	/// Main driver for both MachineScheduler and PostMachineScheduler.
599	void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
600	bool FixKillFlags) {
601	// Visit all machine basic blocks.
602	//
603	// TODO: Visit blocks in global postorder or postorder within the bottom-up
604	// loop tree. Then we can optionally compute global RegPressure.
605	for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
606	MBB != MBBEnd; ++MBB) {
607
608	Scheduler.startBlock(BB: &*MBB);
609
610	#ifndef NDEBUG
611	if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
612	continue;
613	if (SchedOnlyBlock.getNumOccurrences()
614	&& (int)SchedOnlyBlock != MBB ->getNumber())
615	continue;
616	#endif
617
618	// Break the block into scheduling regions [I, RegionEnd). RegionEnd
619	// points to the scheduling boundary at the bottom of the region. The DAG
620	// does not include RegionEnd, but the region does (i.e. the next
621	// RegionEnd is above the previous RegionBegin). If the current block has
622	// no terminator then RegionEnd == MBB->end() for the bottom region.
623	//
624	// All the regions of MBB are first found and stored in MBBRegions, which
625	// will be processed (MBB) top-down if initialized with true.
626	//
627	// The Scheduler may insert instructions during either schedule() or
628	// exitRegion(), even for empty regions. So the local iterators 'I' and
629	// 'RegionEnd' are invalid across these calls. Instructions must not be
630	// added to other regions than the current one without updating MBBRegions.
631
632	MBBRegionsVector MBBRegions;
633	getSchedRegions(MBB: &*MBB, Regions&: MBBRegions, RegionsTopDown: Scheduler.doMBBSchedRegionsTopDown());
634	for (const SchedRegion &R : MBBRegions) {
635	MachineBasicBlock::iterator I = R.RegionBegin;
636	MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
637	unsigned NumRegionInstrs = R.NumRegionInstrs;
638
639	// Notify the scheduler of the region, even if we may skip scheduling
640	// it. Perhaps it still needs to be bundled.
641	Scheduler.enterRegion(bb: &*MBB, begin: I, end: RegionEnd, regioninstrs: NumRegionInstrs);
642
643	// Skip empty scheduling regions (0 or 1 schedulable instructions).
644	if (I == RegionEnd \|\| I == std::prev(x: RegionEnd)) {
645	// Close the current region. Bundle the terminator if needed.
646	// This invalidates 'RegionEnd' and 'I'.
647	Scheduler.exitRegion();
648	continue;
649	}
650	LLVM_DEBUG(dbgs() << "******** MI Scheduling ********\n");
651	LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
652	<< " " << MBB ->getName() << "\n From: " << *I
653	<< " To: ";
654	if (RegionEnd != MBB ->end()) dbgs() << *RegionEnd;
655	else dbgs() << "End\n";
656	dbgs() << " RegionInstrs: " << NumRegionInstrs << `'\n'`);
657	if (DumpCriticalPathLength) {
658	errs() << MF->getName();
659	errs() << ":%bb. " << MBB ->getNumber();
660	errs() << " " << MBB ->getName() << " \n";
661	}
662
663	// Schedule a region: possibly reorder instructions.
664	// This invalidates the original region iterators.
665	Scheduler.schedule();
666
667	// Close the current region.
668	Scheduler.exitRegion();
669	}
670	Scheduler.finishBlock();
671	// FIXME: Ideally, no further passes should rely on kill flags. However,
672	// thumb2 size reduction is currently an exception, so the PostMIScheduler
673	// needs to do this.
674	if (FixKillFlags)
675	Scheduler.fixupKills(MBB&: *MBB);
676	}
677	Scheduler.finalizeSchedule();
678	}
679
680	void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
681	// unimplemented
682	}
683
684	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
685	LLVM_DUMP_METHOD void ReadyQueue::dump() const {
686	dbgs() << "Queue " << Name << ": ";
687	for (const SUnit *SU : Queue)
688	dbgs() << SU->NodeNum << " ";
689	dbgs() << "\n";
690	}
691	#endif
692
693	//===----------------------------------------------------------------------===//
694	// ScheduleDAGMI - Basic machine instruction scheduling. This is
695	// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
696	// virtual registers.
697	// ===----------------------------------------------------------------------===/
698
699	// Provide a vtable anchor.
700	ScheduleDAGMI::~ScheduleDAGMI() = default;
701
702	/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
703	/// NumPredsLeft reaches zero, release the successor node.
704	///
705	/// FIXME: Adjust SuccSU height based on MinLatency.
706	void ScheduleDAGMI::releaseSucc(SUnit SU, SDep SuccEdge) {
707	SUnit *SuccSU = SuccEdge->getSUnit();
708
709	if (SuccEdge->isWeak()) {
710	--SuccSU->WeakPredsLeft;
711	if (SuccEdge->isCluster())
712	NextClusterSucc = SuccSU;
713	return;
714	}
715	#ifndef NDEBUG
716	if (SuccSU->NumPredsLeft == `0`) {
717	dbgs() << "* Scheduling failed! *\n";
718	dumpNode(SU: *SuccSU);
719	dbgs() << " has been released too many times!\n";
720	llvm_unreachable(nullptr);
721	}
722	#endif
723	// SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
724	// CurrCycle may have advanced since then.
725	if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
726	SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
727
728	--SuccSU->NumPredsLeft;
729	if (SuccSU->NumPredsLeft == `0` && SuccSU != &ExitSU)
730	SchedImpl ->releaseTopNode(SU: SuccSU);
731	}
732
733	/// releaseSuccessors - Call releaseSucc on each of SU's successors.
734	void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
735	for (SDep &Succ : SU->Succs)
736	releaseSucc(SU, SuccEdge: &Succ);
737	}
738
739	/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
740	/// NumSuccsLeft reaches zero, release the predecessor node.
741	///
742	/// FIXME: Adjust PredSU height based on MinLatency.
743	void ScheduleDAGMI::releasePred(SUnit SU, SDep PredEdge) {
744	SUnit *PredSU = PredEdge->getSUnit();
745
746	if (PredEdge->isWeak()) {
747	--PredSU->WeakSuccsLeft;
748	if (PredEdge->isCluster())
749	NextClusterPred = PredSU;
750	return;
751	}
752	#ifndef NDEBUG
753	if (PredSU->NumSuccsLeft == `0`) {
754	dbgs() << "* Scheduling failed! *\n";
755	dumpNode(SU: *PredSU);
756	dbgs() << " has been released too many times!\n";
757	llvm_unreachable(nullptr);
758	}
759	#endif
760	// SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
761	// CurrCycle may have advanced since then.
762	if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
763	PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
764
765	--PredSU->NumSuccsLeft;
766	if (PredSU->NumSuccsLeft == `0` && PredSU != &EntrySU)
767	SchedImpl ->releaseBottomNode(SU: PredSU);
768	}
769
770	/// releasePredecessors - Call releasePred on each of SU's predecessors.
771	void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
772	for (SDep &Pred : SU->Preds)
773	releasePred(SU, PredEdge: &Pred);
774	}
775
776	void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
777	ScheduleDAGInstrs::startBlock(BB: bb);
778	SchedImpl ->enterMBB(MBB: bb);
779	}
780
781	void ScheduleDAGMI::finishBlock() {
782	SchedImpl ->leaveMBB();
783	ScheduleDAGInstrs::finishBlock();
784	}
785
786	/// enterRegion - Called back from PostMachineScheduler::runOnMachineFunction
787	/// after crossing a scheduling boundary. [begin, end) includes all instructions
788	/// in the region, including the boundary itself and single-instruction regions
789	/// that don't get scheduled.
790	void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
791	MachineBasicBlock::iterator begin,
792	MachineBasicBlock::iterator end,
793	unsigned regioninstrs)
794	{
795	ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
796
797	SchedImpl ->initPolicy(Begin: begin, End: end, NumRegionInstrs: regioninstrs);
798	}
799
800	/// This is normally called from the main scheduler loop but may also be invoked
801	/// by the scheduling strategy to perform additional code motion.
802	void ScheduleDAGMI::moveInstruction(
803	MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
804	// Advance RegionBegin if the first instruction moves down.
805	if (&*RegionBegin == MI)
806	++RegionBegin;
807
808	// Update the instruction stream.
809	BB->splice(Where: InsertPos, Other: BB, From: MI);
810
811	// Update LiveIntervals
812	if (LIS)
813	LIS->handleMove(MI&: MI, /UpdateFlags=/*true);
814
815	// Recede RegionBegin if an instruction moves above the first.
816	if (RegionBegin == InsertPos)
817	RegionBegin = MI;
818	}
819
820	bool ScheduleDAGMI::checkSchedLimit() {
821	#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
822	if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~`0U`) {
823	CurrentTop = CurrentBottom;
824	return false;
825	}
826	++NumInstrsScheduled;
827	#endif
828	return true;
829	}
830
831	/// Per-region scheduling driver, called back from
832	/// PostMachineScheduler::runOnMachineFunction. This is a simplified driver
833	/// that does not consider liveness or register pressure. It is useful for
834	/// PostRA scheduling and potentially other custom schedulers.
835	void ScheduleDAGMI::schedule() {
836	LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
837	LLVM_DEBUG(SchedImpl ->dumpPolicy());
838
839	// Build the DAG.
840	buildSchedGraph(AA);
841
842	postProcessDAG();
843
844	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
845	findRootsAndBiasEdges(TopRoots, BotRoots);
846
847	LLVM_DEBUG(dump());
848	if (PrintDAGs) dump();
849	if (ViewMISchedDAGs) viewGraph();
850
851	// Initialize the strategy before modifying the DAG.
852	// This may initialize a DFSResult to be used for queue priority.
853	SchedImpl ->initialize(DAG: this);
854
855	// Initialize ready queues now that the DAG and priority data are finalized.
856	initQueues(TopRoots, BotRoots);
857
858	bool IsTopNode = false;
859	while (true) {
860	LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
861	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
862	if (!SU) break;
863
864	assert(!SU->isScheduled && "Node already scheduled");
865	if (!checkSchedLimit())
866	break;
867
868	MachineInstr *MI = SU->getInstr();
869	if (IsTopNode) {
870	assert(SU->isTopReady() && "node still has unscheduled dependencies");
871	if (&*CurrentTop == MI)
872	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
873	else
874	moveInstruction(MI, InsertPos: CurrentTop);
875	} else {
876	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
877	MachineBasicBlock::iterator priorII =
878	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
879	if (&*priorII == MI)
880	CurrentBottom = priorII;
881	else {
882	if (&*CurrentTop == MI)
883	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
884	moveInstruction(MI, InsertPos: CurrentBottom);
885	CurrentBottom = MI;
886	}
887	}
888	// Notify the scheduling strategy before updating the DAG.
889	// This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
890	// runs, it can then use the accurate ReadyCycle time to determine whether
891	// newly released nodes can move to the readyQ.
892	SchedImpl ->schedNode(SU, IsTopNode);
893
894	updateQueues(SU, IsTopNode);
895	}
896	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
897
898	placeDebugValues();
899
900	LLVM_DEBUG({
901	dbgs() << "*** Final schedule for "
902	<< printMBBReference(begin()->getParent()) << " **\n";
903	dumpSchedule();
904	dbgs() << `'\n'`;
905	});
906	}
907
908	/// Apply each ScheduleDAGMutation step in order.
909	void ScheduleDAGMI::postProcessDAG() {
910	for (auto &m : Mutations)
911	m ->apply(DAG: this);
912	}
913
914	void ScheduleDAGMI::
915	findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
916	SmallVectorImpl<SUnit*> &BotRoots) {
917	for (SUnit &SU : SUnits) {
918	assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
919
920	// Order predecessors so DFSResult follows the critical path.
921	SU.biasCriticalPath();
922
923	// A SUnit is ready to top schedule if it has no predecessors.
924	if (!SU.NumPredsLeft)
925	TopRoots.push_back(Elt: &SU);
926	// A SUnit is ready to bottom schedule if it has no successors.
927	if (!SU.NumSuccsLeft)
928	BotRoots.push_back(Elt: &SU);
929	}
930	ExitSU.biasCriticalPath();
931	}
932
933	/// Identify DAG roots and setup scheduler queues.
934	void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
935	ArrayRef<SUnit*> BotRoots) {
936	NextClusterSucc = nullptr;
937	NextClusterPred = nullptr;
938
939	// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
940	//
941	// Nodes with unreleased weak edges can still be roots.
942	// Release top roots in forward order.
943	for (SUnit *SU : TopRoots)
944	SchedImpl ->releaseTopNode(SU);
945
946	// Release bottom roots in reverse order so the higher priority nodes appear
947	// first. This is more natural and slightly more efficient.
948	for (SmallVectorImpl<SUnit*>::const_reverse_iterator
949	I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
950	SchedImpl ->releaseBottomNode(SU: *I);
951	}
952
953	releaseSuccessors(SU: &EntrySU);
954	releasePredecessors(SU: &ExitSU);
955
956	SchedImpl ->registerRoots();
957
958	// Advance past initial DebugValues.
959	CurrentTop = nextIfDebug(I: RegionBegin, End: RegionEnd);
960	CurrentBottom = RegionEnd;
961	}
962
963	/// Update scheduler queues after scheduling an instruction.
964	void ScheduleDAGMI::updateQueues(SUnit SU, bool* IsTopNode) {
965	// Release dependent instructions for scheduling.
966	if (IsTopNode)
967	releaseSuccessors(SU);
968	else
969	releasePredecessors(SU);
970
971	SU->isScheduled = true;
972	}
973
974	/// Reinsert any remaining debug_values, just like the PostRA scheduler.
975	void ScheduleDAGMI::placeDebugValues() {
976	// If first instruction was a DBG_VALUE then put it back.
977	if (FirstDbgValue) {
978	BB->splice(Where: RegionBegin, Other: BB, From: FirstDbgValue);
979	RegionBegin = FirstDbgValue;
980	}
981
982	for (std::vector<std::pair<MachineInstr , MachineInstr >>::iterator
983	DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
984	std::pair<MachineInstr , MachineInstr > P = *std::prev(x: DI);
985	MachineInstr *DbgValue = P.first;
986	MachineBasicBlock::iterator OrigPrevMI = P.second;
987	if (&*RegionBegin == DbgValue)
988	++RegionBegin;
989	BB->splice(Where: std::next(x: OrigPrevMI), Other: BB, From: DbgValue);
990	if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
991	RegionEnd = DbgValue;
992	}
993	}
994
995	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
996	static const char *scheduleTableLegend = " i: issue\n x: resource booked";
997
998	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
999	// Bail off when there is no schedule model to query.
1000	if (!SchedModel.hasInstrSchedModel())
1001	return;
1002
1003	// Nothing to show if there is no or just one instruction.
1004	if (BB->size() < `2`)
1005	return;
1006
1007	dbgs() << " * Schedule table (TopDown):\n";
1008	dbgs() << scheduleTableLegend << "\n";
1009	const unsigned FirstCycle = getSUnit(MI: &(std::begin(cont: this)))->TopReadyCycle;
1010	unsigned LastCycle = getSUnit(MI: &(std::prev(x: std::end(cont: this))))->TopReadyCycle;
1011	for (MachineInstr &MI : *this) {
1012	SUnit *SU = getSUnit(MI: &MI);
1013	if (!SU)
1014	continue;
1015	const MCSchedClassDesc *SC = getSchedClass(SU);
1016	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1017	PE = SchedModel.getWriteProcResEnd(SC);
1018	PI != PE; ++PI) {
1019	if (SU->TopReadyCycle + PI->ReleaseAtCycle - `1` > LastCycle)
1020	LastCycle = SU->TopReadyCycle + PI->ReleaseAtCycle - `1`;
1021	}
1022	}
1023	// Print the header with the cycles
1024	dbgs() << llvm::left_justify(Str: "Cycle", Width: HeaderColWidth);
1025	for (unsigned C = FirstCycle; C <= LastCycle; ++C)
1026	dbgs() << llvm::left_justify(Str: "\| " + std::to_string(val: C), Width: ColWidth);
1027	dbgs() << "\|\n";
1028
1029	for (MachineInstr &MI : *this) {
1030	SUnit *SU = getSUnit(MI: &MI);
1031	if (!SU) {
1032	dbgs() << "Missing SUnit\n";
1033	continue;
1034	}
1035	std::string NodeName("SU(");
1036	NodeName += std::to_string(val: SU->NodeNum) + ")";
1037	dbgs() << llvm::left_justify(Str: NodeName, Width: HeaderColWidth);
1038	unsigned C = FirstCycle;
1039	for (; C <= LastCycle; ++C) {
1040	if (C == SU->TopReadyCycle)
1041	dbgs() << llvm::left_justify(Str: "\| i", Width: ColWidth);
1042	else
1043	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1044	}
1045	dbgs() << "\|\n";
1046	const MCSchedClassDesc *SC = getSchedClass(SU);
1047
1048	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1049	make_range(x: SchedModel.getWriteProcResBegin(SC),
1050	y: SchedModel.getWriteProcResEnd(SC)));
1051
1052	if (MISchedSortResourcesInTrace)
1053	llvm::stable_sort(Range&: ResourcesIt,
1054	C: [](const MCWriteProcResEntry &LHS,
1055	const MCWriteProcResEntry &RHS) -> bool {
1056	return LHS.AcquireAtCycle < RHS.AcquireAtCycle \|\|
1057	(LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1058	LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1059	});
1060	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1061	C = FirstCycle;
1062	const std::string ResName =
1063	SchedModel.getResourceName(PIdx: PI.ProcResourceIdx);
1064	dbgs() << llvm::right_justify(Str: ResName + " ", Width: HeaderColWidth);
1065	for (; C < SU->TopReadyCycle + PI.AcquireAtCycle; ++C) {
1066	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1067	}
1068	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1069	++I, ++C)
1070	dbgs() << llvm::left_justify(Str: "\| x", Width: ColWidth);
1071	while (C++ <= LastCycle)
1072	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1073	// Place end char
1074	dbgs() << "\| \n";
1075	}
1076	}
1077	}
1078
1079	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
1080	// Bail off when there is no schedule model to query.
1081	if (!SchedModel.hasInstrSchedModel())
1082	return;
1083
1084	// Nothing to show if there is no or just one instruction.
1085	if (BB->size() < `2`)
1086	return;
1087
1088	dbgs() << " * Schedule table (BottomUp):\n";
1089	dbgs() << scheduleTableLegend << "\n";
1090
1091	const int FirstCycle = getSUnit(MI: &(std::begin(cont: this)))->BotReadyCycle;
1092	int LastCycle = getSUnit(MI: &(std::prev(x: std::end(cont: this))))->BotReadyCycle;
1093	for (MachineInstr &MI : *this) {
1094	SUnit *SU = getSUnit(MI: &MI);
1095	if (!SU)
1096	continue;
1097	const MCSchedClassDesc *SC = getSchedClass(SU);
1098	for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
1099	PE = SchedModel.getWriteProcResEnd(SC);
1100	PI != PE; ++PI) {
1101	if ((int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1` < LastCycle)
1102	LastCycle = (int)SU->BotReadyCycle - PI->ReleaseAtCycle + `1`;
1103	}
1104	}
1105	// Print the header with the cycles
1106	dbgs() << llvm::left_justify(Str: "Cycle", Width: HeaderColWidth);
1107	for (int C = FirstCycle; C >= LastCycle; --C)
1108	dbgs() << llvm::left_justify(Str: "\| " + std::to_string(val: C), Width: ColWidth);
1109	dbgs() << "\|\n";
1110
1111	for (MachineInstr &MI : *this) {
1112	SUnit *SU = getSUnit(MI: &MI);
1113	if (!SU) {
1114	dbgs() << "Missing SUnit\n";
1115	continue;
1116	}
1117	std::string NodeName("SU(");
1118	NodeName += std::to_string(val: SU->NodeNum) + ")";
1119	dbgs() << llvm::left_justify(Str: NodeName, Width: HeaderColWidth);
1120	int C = FirstCycle;
1121	for (; C >= LastCycle; --C) {
1122	if (C == (int)SU->BotReadyCycle)
1123	dbgs() << llvm::left_justify(Str: "\| i", Width: ColWidth);
1124	else
1125	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1126	}
1127	dbgs() << "\|\n";
1128	const MCSchedClassDesc *SC = getSchedClass(SU);
1129	SmallVector<MCWriteProcResEntry, `4`> ResourcesIt(
1130	make_range(x: SchedModel.getWriteProcResBegin(SC),
1131	y: SchedModel.getWriteProcResEnd(SC)));
1132
1133	if (MISchedSortResourcesInTrace)
1134	llvm::stable_sort(Range&: ResourcesIt,
1135	C: [](const MCWriteProcResEntry &LHS,
1136	const MCWriteProcResEntry &RHS) -> bool {
1137	return LHS.AcquireAtCycle < RHS.AcquireAtCycle \|\|
1138	(LHS.AcquireAtCycle == RHS.AcquireAtCycle &&
1139	LHS.ReleaseAtCycle < RHS.ReleaseAtCycle);
1140	});
1141	for (const MCWriteProcResEntry &PI : ResourcesIt) {
1142	C = FirstCycle;
1143	const std::string ResName =
1144	SchedModel.getResourceName(PIdx: PI.ProcResourceIdx);
1145	dbgs() << llvm::right_justify(Str: ResName + " ", Width: HeaderColWidth);
1146	for (; C > ((int)SU->BotReadyCycle - (int)PI.AcquireAtCycle); --C) {
1147	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1148	}
1149	for (unsigned I = `0`, E = PI.ReleaseAtCycle - PI.AcquireAtCycle; I != E;
1150	++I, --C)
1151	dbgs() << llvm::left_justify(Str: "\| x", Width: ColWidth);
1152	while (C-- >= LastCycle)
1153	dbgs() << llvm::left_justify(Str: "\|", Width: ColWidth);
1154	// Place end char
1155	dbgs() << "\| \n";
1156	}
1157	}
1158	}
1159	#endif
1160
1161	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1162	LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
1163	if (MISchedDumpScheduleTrace) {
1164	if (DumpDir == DumpDirection::TopDown)
1165	dumpScheduleTraceTopDown();
1166	else if (DumpDir == DumpDirection::BottomUp)
1167	dumpScheduleTraceBottomUp();
1168	else if (DumpDir == DumpDirection::Bidirectional) {
1169	dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1170	} else {
1171	dbgs() << "* Schedule table: DumpDirection not set.\n";
1172	}
1173	}
1174
1175	for (MachineInstr &MI : *this) {
1176	if (SUnit *SU = getSUnit(MI: &MI))
1177	dumpNode(SU: *SU);
1178	else
1179	dbgs() << "Missing SUnit\n";
1180	}
1181	}
1182	#endif
1183
1184	//===----------------------------------------------------------------------===//
1185	// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1186	// preservation.
1187	//===----------------------------------------------------------------------===//
1188
1189	ScheduleDAGMILive::~ScheduleDAGMILive() {
1190	delete DFSResult;
1191	}
1192
1193	void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
1194	const MachineInstr &MI = *SU.getInstr();
1195	for (const MachineOperand &MO : MI.operands()) {
1196	if (!MO.isReg())
1197	continue;
1198	if (!MO.readsReg())
1199	continue;
1200	if (TrackLaneMasks && !MO.isUse())
1201	continue;
1202
1203	Register Reg = MO.getReg();
1204	if (!Reg.isVirtual())
1205	continue;
1206
1207	// Ignore re-defs.
1208	if (TrackLaneMasks) {
1209	bool FoundDef = false;
1210	for (const MachineOperand &MO2 : MI.all_defs()) {
1211	if (MO2.getReg() == Reg && !MO2.isDead()) {
1212	FoundDef = true;
1213	break;
1214	}
1215	}
1216	if (FoundDef)
1217	continue;
1218	}
1219
1220	// Record this local VReg use.
1221	VReg2SUnitMultiMap::iterator UI = VRegUses.find(Key: Reg);
1222	for (; UI != VRegUses.end(); ++UI) {
1223	if (UI ->SU == &SU)
1224	break;
1225	}
1226	if (UI == VRegUses.end())
1227	VRegUses.insert(Val: VReg2SUnit (Reg, LaneBitmask::getNone(), &SU));
1228	}
1229	}
1230
1231	/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1232	/// crossing a scheduling boundary. [begin, end) includes all instructions in
1233	/// the region, including the boundary itself and single-instruction regions
1234	/// that don't get scheduled.
1235	void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
1236	MachineBasicBlock::iterator begin,
1237	MachineBasicBlock::iterator end,
1238	unsigned regioninstrs)
1239	{
1240	// ScheduleDAGMI initializes SchedImpl's per-region policy.
1241	ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1242
1243	// For convenience remember the end of the liveness region.
1244	LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(x: RegionEnd);
1245
1246	SUPressureDiffs.clear();
1247
1248	ShouldTrackPressure = SchedImpl ->shouldTrackPressure();
1249	ShouldTrackLaneMasks = SchedImpl ->shouldTrackLaneMasks();
1250
1251	assert((!ShouldTrackLaneMasks \|\| ShouldTrackPressure) &&
1252	"ShouldTrackLaneMasks requires ShouldTrackPressure");
1253	}
1254
1255	// Setup the register pressure trackers for the top scheduled and bottom
1256	// scheduled regions.
1257	void ScheduleDAGMILive::initRegPressure() {
1258	VRegUses.clear();
1259	VRegUses.setUniverse(MRI.getNumVirtRegs());
1260	for (SUnit &SU : SUnits)
1261	collectVRegUses(SU);
1262
1263	TopRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: RegionBegin,
1264	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1265	BotRPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1266	TrackLaneMasks: ShouldTrackLaneMasks, TrackUntiedDefs: false);
1267
1268	// Close the RPTracker to finalize live ins.
1269	RPTracker.closeRegion();
1270
1271	LLVM_DEBUG(RPTracker.dump());
1272
1273	// Initialize the live ins and live outs.
1274	TopRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveInRegs);
1275	BotRPTracker.addLiveRegs(Regs: RPTracker.getPressure().LiveOutRegs);
1276
1277	// Close one end of the tracker so we can call
1278	// getMaxUpward/DownwardPressureDelta before advancing across any
1279	// instructions. This converts currently live regs into live ins/outs.
1280	TopRPTracker.closeTop();
1281	BotRPTracker.closeBottom();
1282
1283	BotRPTracker.initLiveThru(RPTracker);
1284	if (!BotRPTracker.getLiveThru().empty()) {
1285	TopRPTracker.initLiveThru(PressureSet: BotRPTracker.getLiveThru());
1286	LLVM_DEBUG(dbgs() << "Live Thru: ";
1287	dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1288	};
1289
1290	// For each live out vreg reduce the pressure change associated with other
1291	// uses of the same vreg below the live-out reaching def.
1292	updatePressureDiffs(LiveUses: RPTracker.getPressure().LiveOutRegs);
1293
1294	// Account for liveness generated by the region boundary.
1295	if (LiveRegionEnd != RegionEnd) {
1296	SmallVector<RegisterMaskPair, `8`> LiveUses;
1297	BotRPTracker.recede(LiveUses: &LiveUses);
1298	updatePressureDiffs(LiveUses);
1299	}
1300
1301	LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1302	dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1303	dbgs() << "Bottom Pressure:\n";
1304	dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1305
1306	assert((BotRPTracker.getPos() == RegionEnd \|\|
1307	(RegionEnd ->isDebugInstr() &&
1308	BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1309	"Can't find the region bottom");
1310
1311	// Cache the list of excess pressure sets in this region. This will also track
1312	// the max pressure in the scheduled code for these sets.
1313	RegionCriticalPSets.clear();
1314	const std::vector<unsigned> &RegionPressure =
1315	RPTracker.getPressure().MaxSetPressure;
1316	for (unsigned i = `0`, e = RegionPressure.size(); i < e; ++i) {
1317	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: i);
1318	if (RegionPressure [i] > Limit) {
1319	LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1320	<< " Actual " << RegionPressure[i] << "\n");
1321	RegionCriticalPSets.push_back(x: PressureChange (i));
1322	}
1323	}
1324	LLVM_DEBUG(dbgs() << "Excess PSets: ";
1325	for (const PressureChange &RCPS
1326	: RegionCriticalPSets) dbgs()
1327	<< TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1328	dbgs() << "\n");
1329	}
1330
1331	void ScheduleDAGMILive::
1332	updateScheduledPressure(const SUnit *SU,
1333	const std::vector<unsigned> &NewMaxPressure) {
1334	const PressureDiff &PDiff = getPressureDiff(SU);
1335	unsigned CritIdx = `0`, CritEnd = RegionCriticalPSets.size();
1336	for (const PressureChange &PC : PDiff) {
1337	if (!PC.isValid())
1338	break;
1339	unsigned ID = PC.getPSet();
1340	while (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() < ID)
1341	++CritIdx;
1342	if (CritIdx != CritEnd && RegionCriticalPSets [CritIdx].getPSet() == ID) {
1343	if ((int)NewMaxPressure [ID] > RegionCriticalPSets [CritIdx].getUnitInc()
1344	&& NewMaxPressure [ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1345	RegionCriticalPSets [CritIdx].setUnitInc(NewMaxPressure [ID]);
1346	}
1347	unsigned Limit = RegClassInfo->getRegPressureSetLimit(Idx: ID);
1348	if (NewMaxPressure [ID] >= Limit - `2`) {
1349	LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1350	<< NewMaxPressure[ID]
1351	<< ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1352	<< Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1353	<< " livethru)\n");
1354	}
1355	}
1356	}
1357
1358	/// Update the PressureDiff array for liveness after scheduling this
1359	/// instruction.
1360	void ScheduleDAGMILive::updatePressureDiffs(
1361	ArrayRef<RegisterMaskPair> LiveUses) {
1362	for (const RegisterMaskPair &P : LiveUses) {
1363	Register Reg = P.RegUnit;
1364	/// FIXME: Currently assuming single-use physregs.
1365	if (!Reg.isVirtual())
1366	continue;
1367
1368	if (ShouldTrackLaneMasks) {
1369	// If the register has just become live then other uses won't change
1370	// this fact anymore => decrement pressure.
1371	// If the register has just become dead then other uses make it come
1372	// back to life => increment pressure.
1373	bool Decrement = P.LaneMask.any();
1374
1375	for (const VReg2SUnit &V2SU
1376	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1377	SUnit &SU = *V2SU.SU;
1378	if (SU.isScheduled \|\| &SU == &ExitSU)
1379	continue;
1380
1381	PressureDiff &PDiff = getPressureDiff(SU: &SU);
1382	PDiff.addPressureChange(RegUnit: Reg, IsDec: Decrement, MRI: &MRI);
1383	LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1384	<< printReg(Reg, TRI) << `':'`
1385	<< PrintLaneMask(P.LaneMask) << `' '` << *SU.getInstr();
1386	dbgs() << " to "; PDiff.dump(*TRI););
1387	}
1388	} else {
1389	assert(P.LaneMask.any());
1390	LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1391	// This may be called before CurrentBottom has been initialized. However,
1392	// BotRPTracker must have a valid position. We want the value live into the
1393	// instruction or live out of the block, so ask for the previous
1394	// instruction's live-out.
1395	const LiveInterval &LI = LIS->getInterval(Reg);
1396	VNInfo *VNI;
1397	MachineBasicBlock::const_iterator I =
1398	nextIfDebug(I: BotRPTracker.getPos(), End: BB->end());
1399	if (I == BB->end())
1400	VNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1401	else {
1402	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *I));
1403	VNI = LRQ.valueIn();
1404	}
1405	// RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1406	assert(VNI && "No live value at use.");
1407	for (const VReg2SUnit &V2SU
1408	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1409	SUnit *SU = V2SU.SU;
1410	// If this use comes before the reaching def, it cannot be a last use,
1411	// so decrease its pressure change.
1412	if (!SU->isScheduled && SU != &ExitSU) {
1413	LiveQueryResult LRQ =
1414	LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1415	if (LRQ.valueIn() == VNI) {
1416	PressureDiff &PDiff = getPressureDiff(SU);
1417	PDiff.addPressureChange(RegUnit: Reg, IsDec: true, MRI: &MRI);
1418	LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1419	<< *SU->getInstr();
1420	dbgs() << " to "; PDiff.dump(*TRI););
1421	}
1422	}
1423	}
1424	}
1425	}
1426	}
1427
1428	void ScheduleDAGMILive::dump() const {
1429	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1430	if (EntrySU.getInstr() != nullptr)
1431	dumpNodeAll(SU: EntrySU);
1432	for (const SUnit &SU : SUnits) {
1433	dumpNodeAll(SU);
1434	if (ShouldTrackPressure) {
1435	dbgs() << " Pressure Diff : ";
1436	getPressureDiff(SU: &SU).dump(TRI: *TRI);
1437	}
1438	dbgs() << " Single Issue : ";
1439	if (SchedModel.mustBeginGroup(MI: SU.getInstr()) &&
1440	SchedModel.mustEndGroup(MI: SU.getInstr()))
1441	dbgs() << "true;";
1442	else
1443	dbgs() << "false;";
1444	dbgs() << `'\n'`;
1445	}
1446	if (ExitSU.getInstr() != nullptr)
1447	dumpNodeAll(SU: ExitSU);
1448	#endif
1449	}
1450
1451	/// schedule - Called back from MachineScheduler::runOnMachineFunction
1452	/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1453	/// only includes instructions that have DAG nodes, not scheduling boundaries.
1454	///
1455	/// This is a skeletal driver, with all the functionality pushed into helpers,
1456	/// so that it can be easily extended by experimental schedulers. Generally,
1457	/// implementing MachineSchedStrategy should be sufficient to implement a new
1458	/// scheduling algorithm. However, if a scheduler further subclasses
1459	/// ScheduleDAGMILive then it will want to override this virtual method in order
1460	/// to update any specialized state.
1461	void ScheduleDAGMILive::schedule() {
1462	LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1463	LLVM_DEBUG(SchedImpl ->dumpPolicy());
1464	buildDAGWithRegPressure();
1465
1466	postProcessDAG();
1467
1468	SmallVector<SUnit*, `8`> TopRoots, BotRoots;
1469	findRootsAndBiasEdges(TopRoots, BotRoots);
1470
1471	// Initialize the strategy before modifying the DAG.
1472	// This may initialize a DFSResult to be used for queue priority.
1473	SchedImpl ->initialize(DAG: this);
1474
1475	LLVM_DEBUG(dump());
1476	if (PrintDAGs) dump();
1477	if (ViewMISchedDAGs) viewGraph();
1478
1479	// Initialize ready queues now that the DAG and priority data are finalized.
1480	initQueues(TopRoots, BotRoots);
1481
1482	bool IsTopNode = false;
1483	while (true) {
1484	LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1485	SUnit *SU = SchedImpl ->pickNode(IsTopNode);
1486	if (!SU) break;
1487
1488	assert(!SU->isScheduled && "Node already scheduled");
1489	if (!checkSchedLimit())
1490	break;
1491
1492	scheduleMI(SU, IsTopNode);
1493
1494	if (DFSResult) {
1495	unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1496	if (!ScheduledTrees.test(Idx: SubtreeID)) {
1497	ScheduledTrees.set(SubtreeID);
1498	DFSResult->scheduleTree(SubtreeID);
1499	SchedImpl ->scheduleTree(SubtreeID);
1500	}
1501	}
1502
1503	// Notify the scheduling strategy after updating the DAG.
1504	SchedImpl ->schedNode(SU, IsTopNode);
1505
1506	updateQueues(SU, IsTopNode);
1507	}
1508	assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1509
1510	placeDebugValues();
1511
1512	LLVM_DEBUG({
1513	dbgs() << "*** Final schedule for "
1514	<< printMBBReference(begin()->getParent()) << " **\n";
1515	dumpSchedule();
1516	dbgs() << `'\n'`;
1517	});
1518	}
1519
1520	/// Build the DAG and setup three register pressure trackers.
1521	void ScheduleDAGMILive::buildDAGWithRegPressure() {
1522	if (!ShouldTrackPressure) {
1523	RPTracker.reset();
1524	RegionCriticalPSets.clear();
1525	buildSchedGraph(AA);
1526	return;
1527	}
1528
1529	// Initialize the register pressure tracker used by buildSchedGraph.
1530	RPTracker.init(mf: &MF, rci: RegClassInfo, lis: LIS, mbb: BB, pos: LiveRegionEnd,
1531	TrackLaneMasks: ShouldTrackLaneMasks, /TrackUntiedDefs=/true);
1532
1533	// Account for liveness generate by the region boundary.
1534	if (LiveRegionEnd != RegionEnd)
1535	RPTracker.recede();
1536
1537	// Build the DAG, and compute current register pressure.
1538	buildSchedGraph(AA, RPTracker: &RPTracker, PDiffs: &SUPressureDiffs, LIS, TrackLaneMasks: ShouldTrackLaneMasks);
1539
1540	// Initialize top/bottom trackers after computing region pressure.
1541	initRegPressure();
1542	}
1543
1544	void ScheduleDAGMILive::computeDFSResult() {
1545	if (!DFSResult)
1546	DFSResult = new SchedDFSResult (/BottomU/true, MinSubtreeSize);
1547	DFSResult->clear();
1548	ScheduledTrees.clear();
1549	DFSResult->resize(NumSUnits: SUnits.size());
1550	DFSResult->compute(SUnits);
1551	ScheduledTrees.resize(N: DFSResult->getNumSubtrees());
1552	}
1553
1554	/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1555	/// only provides the critical path for single block loops. To handle loops that
1556	/// span blocks, we could use the vreg path latencies provided by
1557	/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1558	/// available for use in the scheduler.
1559	///
1560	/// The cyclic path estimation identifies a def-use pair that crosses the back
1561	/// edge and considers the depth and height of the nodes. For example, consider
1562	/// the following instruction sequence where each instruction has unit latency
1563	/// and defines an eponymous virtual register:
1564	///
1565	/// a->b(a,c)->c(b)->d(c)->exit
1566	///
1567	/// The cyclic critical path is a two cycles: b->c->b
1568	/// The acyclic critical path is four cycles: a->b->c->d->exit
1569	/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1570	/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1571	/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1572	/// LiveInDepth = depth(b) = len(a->b) = 1
1573	///
1574	/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1575	/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1576	/// CyclicCriticalPath = min(2, 2) = 2
1577	///
1578	/// This could be relevant to PostRA scheduling, but is currently implemented
1579	/// assuming LiveIntervals.
1580	unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
1581	// This only applies to single block loop.
1582	if (!BB->isSuccessor(MBB: BB))
1583	return `0`;
1584
1585	unsigned MaxCyclicLatency = `0`;
1586	// Visit each live out vreg def to find def/use pairs that cross iterations.
1587	for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1588	Register Reg = P.RegUnit;
1589	if (!Reg.isVirtual())
1590	continue;
1591	const LiveInterval &LI = LIS->getInterval(Reg);
1592	const VNInfo *DefVNI = LI.getVNInfoBefore(Idx: LIS->getMBBEndIdx(mbb: BB));
1593	if (!DefVNI)
1594	continue;
1595
1596	MachineInstr *DefMI = LIS->getInstructionFromIndex(index: DefVNI->def);
1597	const SUnit *DefSU = getSUnit(MI: DefMI);
1598	if (!DefSU)
1599	continue;
1600
1601	unsigned LiveOutHeight = DefSU->getHeight();
1602	unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1603	// Visit all local users of the vreg def.
1604	for (const VReg2SUnit &V2SU
1605	: make_range(x: VRegUses.find(Key: Reg), y: VRegUses.end())) {
1606	SUnit *SU = V2SU.SU;
1607	if (SU == &ExitSU)
1608	continue;
1609
1610	// Only consider uses of the phi.
1611	LiveQueryResult LRQ = LI.Query(Idx: LIS->getInstructionIndex(Instr: *SU->getInstr()));
1612	if (!LRQ.valueIn()->isPHIDef())
1613	continue;
1614
1615	// Assume that a path spanning two iterations is a cycle, which could
1616	// overestimate in strange cases. This allows cyclic latency to be
1617	// estimated as the minimum slack of the vreg's depth or height.
1618	unsigned CyclicLatency = `0`;
1619	if (LiveOutDepth > SU->getDepth())
1620	CyclicLatency = LiveOutDepth - SU->getDepth();
1621
1622	unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1623	if (LiveInHeight > LiveOutHeight) {
1624	if (LiveInHeight - LiveOutHeight < CyclicLatency)
1625	CyclicLatency = LiveInHeight - LiveOutHeight;
1626	} else
1627	CyclicLatency = `0`;
1628
1629	LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1630	<< SU->NodeNum << ") = " << CyclicLatency << "c\n");
1631	if (CyclicLatency > MaxCyclicLatency)
1632	MaxCyclicLatency = CyclicLatency;
1633	}
1634	}
1635	LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1636	return MaxCyclicLatency;
1637	}
1638
1639	/// Release ExitSU predecessors and setup scheduler queues. Re-position
1640	/// the Top RP tracker in case the region beginning has changed.
1641	void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
1642	ArrayRef<SUnit*> BotRoots) {
1643	ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1644	if (ShouldTrackPressure) {
1645	assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1646	TopRPTracker.setPos(CurrentTop);
1647	}
1648	}
1649
1650	/// Move an instruction and update register pressure.
1651	void ScheduleDAGMILive::scheduleMI(SUnit SU, bool* IsTopNode) {
1652	// Move the instruction to its new location in the instruction stream.
1653	MachineInstr *MI = SU->getInstr();
1654
1655	if (IsTopNode) {
1656	assert(SU->isTopReady() && "node still has unscheduled dependencies");
1657	if (&*CurrentTop == MI)
1658	CurrentTop = nextIfDebug(I: ++CurrentTop, End: CurrentBottom);
1659	else {
1660	moveInstruction(MI, InsertPos: CurrentTop);
1661	TopRPTracker.setPos(MI);
1662	}
1663
1664	if (ShouldTrackPressure) {
1665	// Update top scheduled pressure.
1666	RegisterOperands RegOpers;
1667	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks, IgnoreDead: false);
1668	if (ShouldTrackLaneMasks) {
1669	// Adjust liveness and add missing dead+read-undef flags.
1670	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1671	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1672	} else {
1673	// Adjust for missing dead-def flags.
1674	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1675	}
1676
1677	TopRPTracker.advance(RegOpers);
1678	assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1679	LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1680	TopRPTracker.getRegSetPressureAtPos(), TRI););
1681
1682	updateScheduledPressure(SU, NewMaxPressure: TopRPTracker.getPressure().MaxSetPressure);
1683	}
1684	} else {
1685	assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1686	MachineBasicBlock::iterator priorII =
1687	priorNonDebug(I: CurrentBottom, Beg: CurrentTop);
1688	if (&*priorII == MI)
1689	CurrentBottom = priorII;
1690	else {
1691	if (&*CurrentTop == MI) {
1692	CurrentTop = nextIfDebug(I: ++CurrentTop, End: priorII);
1693	TopRPTracker.setPos(CurrentTop);
1694	}
1695	moveInstruction(MI, InsertPos: CurrentBottom);
1696	CurrentBottom = MI;
1697	BotRPTracker.setPos(CurrentBottom);
1698	}
1699	if (ShouldTrackPressure) {
1700	RegisterOperands RegOpers;
1701	RegOpers.collect(MI: MI, TRI: TRI, MRI, TrackLaneMasks: ShouldTrackLaneMasks, IgnoreDead: false);
1702	if (ShouldTrackLaneMasks) {
1703	// Adjust liveness and add missing dead+read-undef flags.
1704	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: *MI).getRegSlot();
1705	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx, AddFlagsMI: MI);
1706	} else {
1707	// Adjust for missing dead-def flags.
1708	RegOpers.detectDeadDefs(MI: MI, LIS: LIS);
1709	}
1710
1711	if (BotRPTracker.getPos() != CurrentBottom)
1712	BotRPTracker.recedeSkipDebugValues();
1713	SmallVector<RegisterMaskPair, `8`> LiveUses;
1714	BotRPTracker.recede(RegOpers, LiveUses: &LiveUses);
1715	assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1716	LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1717	BotRPTracker.getRegSetPressureAtPos(), TRI););
1718
1719	updateScheduledPressure(SU, NewMaxPressure: BotRPTracker.getPressure().MaxSetPressure);
1720	updatePressureDiffs(LiveUses);
1721	}
1722	}
1723	}
1724
1725	//===----------------------------------------------------------------------===//
1726	// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1727	//===----------------------------------------------------------------------===//
1728
1729	namespace {
1730
1731	/// Post-process the DAG to create cluster edges between neighboring
1732	/// loads or between neighboring stores.
1733	class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1734	struct MemOpInfo {
1735	SUnit *SU;
1736	SmallVector<const MachineOperand *, `4`> BaseOps;
1737	int64_t Offset;
1738	LocationSize Width;
1739	bool OffsetIsScalable;
1740
1741	MemOpInfo(SUnit SU, ArrayRef<const* MachineOperand *> BaseOps,
1742	int64_t Offset, bool OffsetIsScalable, LocationSize Width)
1743	: SU(SU), BaseOps (BaseOps.begin(), BaseOps.end()), Offset(Offset),
1744	Width (Width), OffsetIsScalable(OffsetIsScalable) {}
1745
1746	static bool Compare(const MachineOperand *const &A,
1747	const MachineOperand *const &B) {
1748	if (A->getType() != B->getType())
1749	return A->getType() < B->getType();
1750	if (A->isReg())
1751	return A->getReg() < B->getReg();
1752	if (A->isFI()) {
1753	const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1754	const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1755	bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1756	TargetFrameLowering::StackGrowsDown;
1757	return StackGrowsDown ? A->getIndex() > B->getIndex()
1758	: A->getIndex() < B->getIndex();
1759	}
1760
1761	llvm_unreachable("MemOpClusterMutation only supports register or frame "
1762	"index bases.");
1763	}
1764
1765	bool operator<(const MemOpInfo &RHS) const {
1766	// FIXME: Don't compare everything twice. Maybe use C++20 three way
1767	// comparison instead when it's available.
1768	if (std::lexicographical_compare(first1: BaseOps.begin(), last1: BaseOps.end(),
1769	first2: RHS.BaseOps.begin(), last2: RHS.BaseOps.end(),
1770	comp: Compare))
1771	return true;
1772	if (std::lexicographical_compare(first1: RHS.BaseOps.begin(), last1: RHS.BaseOps.end(),
1773	first2: BaseOps.begin(), last2: BaseOps.end(), comp: Compare))
1774	return false;
1775	if (Offset != RHS.Offset)
1776	return Offset < RHS.Offset;
1777	return SU->NodeNum < RHS.SU->NodeNum;
1778	}
1779	};
1780
1781	const TargetInstrInfo *TII;
1782	const TargetRegisterInfo *TRI;
1783	bool IsLoad;
1784	bool ReorderWhileClustering;
1785
1786	public:
1787	BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1788	const TargetRegisterInfo tri, bool* IsLoad,
1789	bool ReorderWhileClustering)
1790	: TII(tii), TRI(tri), IsLoad(IsLoad),
1791	ReorderWhileClustering(ReorderWhileClustering) {}
1792
1793	void apply(ScheduleDAGInstrs *DAGInstrs) override;
1794
1795	protected:
1796	void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1797	ScheduleDAGInstrs *DAG);
1798	void collectMemOpRecords(std::vector<SUnit> &SUnits,
1799	SmallVectorImpl<MemOpInfo> &MemOpRecords);
1800	bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1801	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups);
1802	};
1803
1804	class StoreClusterMutation : public BaseMemOpClusterMutation {
1805	public:
1806	StoreClusterMutation(const TargetInstrInfo *tii,
1807	const TargetRegisterInfo *tri,
1808	bool ReorderWhileClustering)
1809	: BaseMemOpClusterMutation (tii, tri, false, ReorderWhileClustering) {}
1810	};
1811
1812	class LoadClusterMutation : public BaseMemOpClusterMutation {
1813	public:
1814	LoadClusterMutation(const TargetInstrInfo tii, const* TargetRegisterInfo *tri,
1815	bool ReorderWhileClustering)
1816	: BaseMemOpClusterMutation (tii, tri, true, ReorderWhileClustering) {}
1817	};
1818
1819	} // end anonymous namespace
1820
1821	namespace llvm {
1822
1823	std::unique_ptr<ScheduleDAGMutation>
1824	createLoadClusterDAGMutation(const TargetInstrInfo *TII,
1825	const TargetRegisterInfo *TRI,
1826	bool ReorderWhileClustering) {
1827	return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
1828	args&: TII, args&: TRI, args&: ReorderWhileClustering)
1829	: nullptr;
1830	}
1831
1832	std::unique_ptr<ScheduleDAGMutation>
1833	createStoreClusterDAGMutation(const TargetInstrInfo *TII,
1834	const TargetRegisterInfo *TRI,
1835	bool ReorderWhileClustering) {
1836	return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
1837	args&: TII, args&: TRI, args&: ReorderWhileClustering)
1838	: nullptr;
1839	}
1840
1841	} // end namespace llvm
1842
1843	// Sorting all the loads/stores first, then for each load/store, checking the
1844	// following load/store one by one, until reach the first non-dependent one and
1845	// call target hook to see if they can cluster.
1846	// If FastCluster is enabled, we assume that, all the loads/stores have been
1847	// preprocessed and now, they didn't have dependencies on each other.
1848	void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1849	ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1850	ScheduleDAGInstrs *DAG) {
1851	// Keep track of the current cluster length and bytes for each SUnit.
1852	DenseMap<unsigned, std::pair<unsigned, unsigned>> SUnit2ClusterInfo;
1853
1854	// At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1855	// cluster mem ops collected within `MemOpRecords` array.
1856	for (unsigned Idx = `0`, End = MemOpRecords.size(); Idx < (End - `1`); ++Idx) {
1857	// Decision to cluster mem ops is taken based on target dependent logic
1858	auto MemOpa = MemOpRecords [Idx];
1859
1860	// Seek for the next load/store to do the cluster.
1861	unsigned NextIdx = Idx + `1`;
1862	for (; NextIdx < End; ++NextIdx)
1863	// Skip if MemOpb has been clustered already or has dependency with
1864	// MemOpa.
1865	if (!SUnit2ClusterInfo.count(Val: MemOpRecords [NextIdx].SU->NodeNum) &&
1866	(FastCluster \|\|
1867	(!DAG->IsReachable(SU: MemOpRecords [NextIdx].SU, TargetSU: MemOpa.SU) &&
1868	!DAG->IsReachable(SU: MemOpa.SU, TargetSU: MemOpRecords [NextIdx].SU))))
1869	break;
1870	if (NextIdx == End)
1871	continue;
1872
1873	auto MemOpb = MemOpRecords [NextIdx];
1874	unsigned ClusterLength = `2`;
1875	unsigned CurrentClusterBytes = MemOpa.Width.getValue().getKnownMinValue() +
1876	MemOpb.Width.getValue().getKnownMinValue();
1877	if (SUnit2ClusterInfo.count(Val: MemOpa.SU->NodeNum)) {
1878	ClusterLength = SUnit2ClusterInfo [MemOpa.SU->NodeNum].first + `1`;
1879	CurrentClusterBytes = SUnit2ClusterInfo [MemOpa.SU->NodeNum].second +
1880	MemOpb.Width.getValue().getKnownMinValue();
1881	}
1882
1883	if (!TII->shouldClusterMemOps(BaseOps1: MemOpa.BaseOps, Offset1: MemOpa.Offset,
1884	OffsetIsScalable1: MemOpa.OffsetIsScalable, BaseOps2: MemOpb.BaseOps,
1885	Offset2: MemOpb.Offset, OffsetIsScalable2: MemOpb.OffsetIsScalable,
1886	ClusterSize: ClusterLength, NumBytes: CurrentClusterBytes))
1887	continue;
1888
1889	SUnit *SUa = MemOpa.SU;
1890	SUnit *SUb = MemOpb.SU;
1891	if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
1892	std::swap(a&: SUa, b&: SUb);
1893
1894	// FIXME: Is this check really required?
1895	if (!DAG->addEdge(SuccSU: SUb, PredDep: SDep (SUa, SDep::Cluster)))
1896	continue;
1897
1898	LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1899	<< SUb->NodeNum << ")\n");
1900	++NumClustered;
1901
1902	if (IsLoad) {
1903	// Copy successor edges from SUa to SUb. Interleaving computation
1904	// dependent on SUa can prevent load combining due to register reuse.
1905	// Predecessor edges do not need to be copied from SUb to SUa since
1906	// nearby loads should have effectively the same inputs.
1907	for (const SDep &Succ : SUa->Succs) {
1908	if (Succ.getSUnit() == SUb)
1909	continue;
1910	LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1911	<< ")\n");
1912	DAG->addEdge(SuccSU: Succ.getSUnit(), PredDep: SDep (SUb, SDep::Artificial));
1913	}
1914	} else {
1915	// Copy predecessor edges from SUb to SUa to avoid the SUnits that
1916	// SUb dependent on scheduled in-between SUb and SUa. Successor edges
1917	// do not need to be copied from SUa to SUb since no one will depend
1918	// on stores.
1919	// Notice that, we don't need to care about the memory dependency as
1920	// we won't try to cluster them if they have any memory dependency.
1921	for (const SDep &Pred : SUb->Preds) {
1922	if (Pred.getSUnit() == SUa)
1923	continue;
1924	LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1925	<< ")\n");
1926	DAG->addEdge(SuccSU: SUa, PredDep: SDep (Pred.getSUnit(), SDep::Artificial));
1927	}
1928	}
1929
1930	SUnit2ClusterInfo [MemOpb.SU->NodeNum] = {ClusterLength,
1931	CurrentClusterBytes};
1932
1933	LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1934	<< ", Curr cluster bytes: " << CurrentClusterBytes
1935	<< "\n");
1936	}
1937	}
1938
1939	void BaseMemOpClusterMutation::collectMemOpRecords(
1940	std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1941	for (auto &SU : SUnits) {
1942	if ((IsLoad && !SU.getInstr()->mayLoad()) \|\|
1943	(!IsLoad && !SU.getInstr()->mayStore()))
1944	continue;
1945
1946	const MachineInstr &MI = *SU.getInstr();
1947	SmallVector<const MachineOperand *, `4`> BaseOps;
1948	int64_t Offset;
1949	bool OffsetIsScalable;
1950	LocationSize Width = `0`;
1951	if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1952	OffsetIsScalable, Width, TRI)) {
1953	MemOpRecords.push_back(
1954	Elt: MemOpInfo (&SU, BaseOps, Offset, OffsetIsScalable, Width));
1955
1956	LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1957	<< Offset << ", OffsetIsScalable: " << OffsetIsScalable
1958	<< ", Width: " << Width << "\n");
1959	}
1960	#ifndef NDEBUG
1961	for (const auto *Op : BaseOps)
1962	assert(Op);
1963	#endif
1964	}
1965	}
1966
1967	bool BaseMemOpClusterMutation::groupMemOps(
1968	ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1969	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> &Groups) {
1970	bool FastCluster =
1971	ForceFastCluster \|\|
1972	MemOps.size() * DAG->SUnits.size() / `1000` > FastClusterThreshold;
1973
1974	for (const auto &MemOp : MemOps) {
1975	unsigned ChainPredID = DAG->SUnits.size();
1976	if (FastCluster) {
1977	for (const SDep &Pred : MemOp.SU->Preds) {
1978	// We only want to cluster the mem ops that have the same ctrl(non-data)
1979	// pred so that they didn't have ctrl dependency for each other. But for
1980	// store instrs, we can still cluster them if the pred is load instr.
1981	if ((Pred.isCtrl() &&
1982	(IsLoad \|\|
1983	(Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1984	!Pred.isArtificial()) {
1985	ChainPredID = Pred.getSUnit()->NodeNum;
1986	break;
1987	}
1988	}
1989	} else
1990	ChainPredID = `0`;
1991
1992	Groups [ChainPredID].push_back(Elt: MemOp);
1993	}
1994	return FastCluster;
1995	}
1996
1997	/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1998	void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1999	// Collect all the clusterable loads/stores
2000	SmallVector<MemOpInfo, `32`> MemOpRecords;
2001	collectMemOpRecords(SUnits&: DAG->SUnits, MemOpRecords);
2002
2003	if (MemOpRecords.size() < `2`)
2004	return;
2005
2006	// Put the loads/stores without dependency into the same group with some
2007	// heuristic if the DAG is too complex to avoid compiling time blow up.
2008	// Notice that, some fusion pair could be lost with this.
2009	DenseMap<unsigned, SmallVector<MemOpInfo, `32`>> Groups;
2010	bool FastCluster = groupMemOps(MemOps: MemOpRecords, DAG, Groups);
2011
2012	for (auto &Group : Groups) {
2013	// Sorting the loads/stores, so that, we can stop the cluster as early as
2014	// possible.
2015	llvm::sort(C&: Group.second);
2016
2017	// Trying to cluster all the neighboring loads/stores.
2018	clusterNeighboringMemOps(MemOpRecords: Group.second, FastCluster, DAG);
2019	}
2020	}
2021
2022	//===----------------------------------------------------------------------===//
2023	// CopyConstrain - DAG post-processing to encourage copy elimination.
2024	//===----------------------------------------------------------------------===//
2025
2026	namespace {
2027
2028	/// Post-process the DAG to create weak edges from all uses of a copy to
2029	/// the one use that defines the copy's source vreg, most likely an induction
2030	/// variable increment.
2031	class CopyConstrain : public ScheduleDAGMutation {
2032	// Transient state.
2033	SlotIndex RegionBeginIdx;
2034
2035	// RegionEndIdx is the slot index of the last non-debug instruction in the
2036	// scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
2037	SlotIndex RegionEndIdx;
2038
2039	public:
2040	CopyConstrain(const TargetInstrInfo , const* TargetRegisterInfo *) {}
2041
2042	void apply(ScheduleDAGInstrs *DAGInstrs) override;
2043
2044	protected:
2045	void constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG);
2046	};
2047
2048	} // end anonymous namespace
2049
2050	namespace llvm {
2051
2052	std::unique_ptr<ScheduleDAGMutation>
2053	createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
2054	const TargetRegisterInfo *TRI) {
2055	return std::make_unique<CopyConstrain>(args&: TII, args&: TRI);
2056	}
2057
2058	} // end namespace llvm
2059
2060	/// constrainLocalCopy handles two possibilities:
2061	/// 1) Local src:
2062	/// I0: = dst
2063	/// I1: src = ...
2064	/// I2: = dst
2065	/// I3: dst = src (copy)
2066	/// (create pred->succ edges I0->I1, I2->I1)
2067	///
2068	/// 2) Local copy:
2069	/// I0: dst = src (copy)
2070	/// I1: = dst
2071	/// I2: src = ...
2072	/// I3: = dst
2073	/// (create pred->succ edges I1->I2, I3->I2)
2074	///
2075	/// Although the MachineScheduler is currently constrained to single blocks,
2076	/// this algorithm should handle extended blocks. An EBB is a set of
2077	/// contiguously numbered blocks such that the previous block in the EBB is
2078	/// always the single predecessor.
2079	void CopyConstrain::constrainLocalCopy(SUnit CopySU, ScheduleDAGMILive DAG) {
2080	LiveIntervals *LIS = DAG->getLIS();
2081	MachineInstr *Copy = CopySU->getInstr();
2082
2083	// Check for pure vreg copies.
2084	const MachineOperand &SrcOp = Copy->getOperand(i: `1`);
2085	Register SrcReg = SrcOp.getReg();
2086	if (!SrcReg.isVirtual() \|\| !SrcOp.readsReg())
2087	return;
2088
2089	const MachineOperand &DstOp = Copy->getOperand(i: `0`);
2090	Register DstReg = DstOp.getReg();
2091	if (!DstReg.isVirtual() \|\| DstOp.isDead())
2092	return;
2093
2094	// Check if either the dest or source is local. If it's live across a back
2095	// edge, it's not local. Note that if both vregs are live across the back
2096	// edge, we cannot successfully contrain the copy without cyclic scheduling.
2097	// If both the copy's source and dest are local live intervals, then we
2098	// should treat the dest as the global for the purpose of adding
2099	// constraints. This adds edges from source's other uses to the copy.
2100	unsigned LocalReg = SrcReg;
2101	unsigned GlobalReg = DstReg;
2102	LiveInterval *LocalLI = &LIS->getInterval(Reg: LocalReg);
2103	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx)) {
2104	LocalReg = DstReg;
2105	GlobalReg = SrcReg;
2106	LocalLI = &LIS->getInterval(Reg: LocalReg);
2107	if (!LocalLI->isLocal(Start: RegionBeginIdx, End: RegionEndIdx))
2108	return;
2109	}
2110	LiveInterval *GlobalLI = &LIS->getInterval(Reg: GlobalReg);
2111
2112	// Find the global segment after the start of the local LI.
2113	LiveInterval::iterator GlobalSegment = GlobalLI->find(Pos: LocalLI->beginIndex());
2114	// If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2115	// local live range. We could create edges from other global uses to the local
2116	// start, but the coalescer should have already eliminated these cases, so
2117	// don't bother dealing with it.
2118	if (GlobalSegment == GlobalLI->end())
2119	return;
2120
2121	// If GlobalSegment is killed at the LocalLI->start, the call to find()
2122	// returned the next global segment. But if GlobalSegment overlaps with
2123	// LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2124	// exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2125	if (GlobalSegment->contains(I: LocalLI->beginIndex()))
2126	++GlobalSegment;
2127
2128	if (GlobalSegment == GlobalLI->end())
2129	return;
2130
2131	// Check if GlobalLI contains a hole in the vicinity of LocalLI.
2132	if (GlobalSegment != GlobalLI->begin()) {
2133	// Two address defs have no hole.
2134	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->end,
2135	B: GlobalSegment->start)) {
2136	return;
2137	}
2138	// If the prior global segment may be defined by the same two-address
2139	// instruction that also defines LocalLI, then can't make a hole here.
2140	if (SlotIndex::isSameInstr(A: std::prev(x: GlobalSegment)->start,
2141	B: LocalLI->beginIndex())) {
2142	return;
2143	}
2144	// If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2145	// it would be a disconnected component in the live range.
2146	assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2147	"Disconnected LRG within the scheduling region.");
2148	}
2149	MachineInstr *GlobalDef = LIS->getInstructionFromIndex(index: GlobalSegment->start);
2150	if (!GlobalDef)
2151	return;
2152
2153	SUnit *GlobalSU = DAG->getSUnit(MI: GlobalDef);
2154	if (!GlobalSU)
2155	return;
2156
2157	// GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2158	// constraining the uses of the last local def to precede GlobalDef.
2159	SmallVector<SUnit*,`8`> LocalUses;
2160	const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(Idx: LocalLI->endIndex());
2161	MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(index: LastLocalVN->def);
2162	SUnit *LastLocalSU = DAG->getSUnit(MI: LastLocalDef);
2163	for (const SDep &Succ : LastLocalSU->Succs) {
2164	if (Succ.getKind() != SDep::Data \|\| Succ.getReg() != LocalReg)
2165	continue;
2166	if (Succ.getSUnit() == GlobalSU)
2167	continue;
2168	if (!DAG->canAddEdge(SuccSU: GlobalSU, PredSU: Succ.getSUnit()))
2169	return;
2170	LocalUses.push_back(Elt: Succ.getSUnit());
2171	}
2172	// Open the top of the GlobalLI hole by constraining any earlier global uses
2173	// to precede the start of LocalLI.
2174	SmallVector<SUnit*,`8`> GlobalUses;
2175	MachineInstr *FirstLocalDef =
2176	LIS->getInstructionFromIndex(index: LocalLI->beginIndex());
2177	SUnit *FirstLocalSU = DAG->getSUnit(MI: FirstLocalDef);
2178	for (const SDep &Pred : GlobalSU->Preds) {
2179	if (Pred.getKind() != SDep::Anti \|\| Pred.getReg() != GlobalReg)
2180	continue;
2181	if (Pred.getSUnit() == FirstLocalSU)
2182	continue;
2183	if (!DAG->canAddEdge(SuccSU: FirstLocalSU, PredSU: Pred.getSUnit()))
2184	return;
2185	GlobalUses.push_back(Elt: Pred.getSUnit());
2186	}
2187	LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2188	// Add the weak edges.
2189	for (SUnit *LU : LocalUses) {
2190	LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2191	<< GlobalSU->NodeNum << ")\n");
2192	DAG->addEdge(SuccSU: GlobalSU, PredDep: SDep (LU, SDep::Weak));
2193	}
2194	for (SUnit *GU : GlobalUses) {
2195	LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2196	<< FirstLocalSU->NodeNum << ")\n");
2197	DAG->addEdge(SuccSU: FirstLocalSU, PredDep: SDep (GU, SDep::Weak));
2198	}
2199	}
2200
2201	/// Callback from DAG postProcessing to create weak edges to encourage
2202	/// copy elimination.
2203	void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2204	ScheduleDAGMI DAG = static_cast<ScheduleDAGMI>(DAGInstrs);
2205	assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2206
2207	MachineBasicBlock::iterator FirstPos = nextIfDebug(I: DAG->begin(), End: DAG->end());
2208	if (FirstPos == DAG->end())
2209	return;
2210	RegionBeginIdx = DAG->getLIS()->getInstructionIndex(Instr: *FirstPos);
2211	RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2212	Instr: *priorNonDebug(I: DAG->end(), Beg: DAG->begin()));
2213
2214	for (SUnit &SU : DAG->SUnits) {
2215	if (!SU.getInstr()->isCopy())
2216	continue;
2217
2218	constrainLocalCopy(CopySU: &SU, DAG: static_cast<ScheduleDAGMILive*>(DAG));
2219	}
2220	}
2221
2222	//===----------------------------------------------------------------------===//
2223	// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2224	// and possibly other custom schedulers.
2225	//===----------------------------------------------------------------------===//
2226
2227	static const unsigned InvalidCycle = ~`0U`;
2228
2229	SchedBoundary::~SchedBoundary() { delete HazardRec; }
2230
2231	/// Given a Count of resource usage and a Latency value, return true if a
2232	/// SchedBoundary becomes resource limited.
2233	/// If we are checking after scheduling a node, we should return true when
2234	/// we just reach the resource limit.
2235	static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2236	unsigned Latency, bool AfterSchedNode) {
2237	int ResCntFactor = (int)(Count - (Latency * LFactor));
2238	if (AfterSchedNode)
2239	return ResCntFactor >= (int)LFactor;
2240	else
2241	return ResCntFactor > (int)LFactor;
2242	}
2243
2244	void SchedBoundary::reset() {
2245	// A new HazardRec is created for each DAG and owned by SchedBoundary.
2246	// Destroying and reconstructing it is very expensive though. So keep
2247	// invalid, placeholder HazardRecs.
2248	if (HazardRec && HazardRec->isEnabled()) {
2249	delete HazardRec;
2250	HazardRec = nullptr;
2251	}
2252	Available.clear();
2253	Pending.clear();
2254	CheckPending = false;
2255	CurrCycle = `0`;
2256	CurrMOps = `0`;
2257	MinReadyCycle = std::numeric_limits<unsigned>::max();
2258	ExpectedLatency = `0`;
2259	DependentLatency = `0`;
2260	RetiredMOps = `0`;
2261	MaxExecutedResCount = `0`;
2262	ZoneCritResIdx = `0`;
2263	IsResourceLimited = false;
2264	ReservedCycles.clear();
2265	ReservedResourceSegments.clear();
2266	ReservedCyclesIndex.clear();
2267	ResourceGroupSubUnitMasks.clear();
2268	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2269	// Track the maximum number of stall cycles that could arise either from the
2270	// latency of a DAG edge or the number of cycles that a processor resource is
2271	// reserved (SchedBoundary::ReservedCycles).
2272	MaxObservedStall = `0`;
2273	#endif
2274	// Reserve a zero-count for invalid CritResIdx.
2275	ExecutedResCounts.resize(N: `1`);
2276	assert(!ExecutedResCounts[`0`] && "nonzero count for bad resource");
2277	}
2278
2279	void SchedRemainder::
2280	init(ScheduleDAGMI DAG, const* TargetSchedModel *SchedModel) {
2281	reset();
2282	if (!SchedModel->hasInstrSchedModel())
2283	return;
2284	RemainingCounts.resize(N: SchedModel->getNumProcResourceKinds());
2285	for (SUnit &SU : DAG->SUnits) {
2286	const MCSchedClassDesc *SC = DAG->getSchedClass(SU: &SU);
2287	RemIssueCount += SchedModel->getNumMicroOps(MI: SU.getInstr(), SC)
2288	* SchedModel->getMicroOpFactor();
2289	for (TargetSchedModel::ProcResIter
2290	PI = SchedModel->getWriteProcResBegin(SC),
2291	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2292	unsigned PIdx = PI->ProcResourceIdx;
2293	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2294	assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2295	RemainingCounts [PIdx] +=
2296	(Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2297	}
2298	}
2299	}
2300
2301	void SchedBoundary::
2302	init(ScheduleDAGMI dag, const* TargetSchedModel smodel, SchedRemainder rem) {
2303	reset();
2304	DAG = dag;
2305	SchedModel = smodel;
2306	Rem = rem;
2307	if (SchedModel->hasInstrSchedModel()) {
2308	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2309	ReservedCyclesIndex.resize(N: ResourceCount);
2310	ExecutedResCounts.resize(N: ResourceCount);
2311	ResourceGroupSubUnitMasks.resize(N: ResourceCount, NV: APInt (ResourceCount, `0`));
2312	unsigned NumUnits = `0`;
2313
2314	for (unsigned i = `0`; i < ResourceCount; ++i) {
2315	ReservedCyclesIndex [i] = NumUnits;
2316	NumUnits += SchedModel->getProcResource(PIdx: i)->NumUnits;
2317	if (isUnbufferedGroup(PIdx: i)) {
2318	auto SubUnits = SchedModel->getProcResource(PIdx: i)->SubUnitsIdxBegin;
2319	for (unsigned U = `0`, UE = SchedModel->getProcResource(PIdx: i)->NumUnits;
2320	U != UE; ++U)
2321	ResourceGroupSubUnitMasks [i].setBit(SubUnits[U]);
2322	}
2323	}
2324
2325	ReservedCycles.resize(new_size: NumUnits, x: InvalidCycle);
2326	}
2327	}
2328
2329	/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2330	/// these "soft stalls" differently than the hard stall cycles based on CPU
2331	/// resources and computed by checkHazard(). A fully in-order model
2332	/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2333	/// available for scheduling until they are ready. However, a weaker in-order
2334	/// model may use this for heuristics. For example, if a processor has in-order
2335	/// behavior when reading certain resources, this may come into play.
2336	unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
2337	if (!SU->isUnbuffered)
2338	return `0`;
2339
2340	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2341	if (ReadyCycle > CurrCycle)
2342	return ReadyCycle - CurrCycle;
2343	return `0`;
2344	}
2345
2346	/// Compute the next cycle at which the given processor resource unit
2347	/// can be scheduled.
2348	unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
2349	unsigned ReleaseAtCycle,
2350	unsigned AcquireAtCycle) {
2351	if (SchedModel && SchedModel->enableIntervals()) {
2352	if (isTop())
2353	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromTop(
2354	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2355
2356	return ReservedResourceSegments [InstanceIdx].getFirstAvailableAtFromBottom(
2357	CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2358	}
2359
2360	unsigned NextUnreserved = ReservedCycles [InstanceIdx];
2361	// If this resource has never been used, always return cycle zero.
2362	if (NextUnreserved == InvalidCycle)
2363	return CurrCycle;
2364	// For bottom-up scheduling add the cycles needed for the current operation.
2365	if (!isTop())
2366	NextUnreserved = std::max(a: CurrCycle, b: NextUnreserved + ReleaseAtCycle);
2367	return NextUnreserved;
2368	}
2369
2370	/// Compute the next cycle at which the given processor resource can be
2371	/// scheduled. Returns the next cycle and the index of the processor resource
2372	/// instance in the reserved cycles vector.
2373	std::pair<unsigned, unsigned>
2374	SchedBoundary::getNextResourceCycle(const MCSchedClassDesc SC, unsigned* PIdx,
2375	unsigned ReleaseAtCycle,
2376	unsigned AcquireAtCycle) {
2377	if (MischedDetailResourceBooking) {
2378	LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
2379	LLVM_DEBUG(dumpReservedCycles());
2380	LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
2381	}
2382	unsigned MinNextUnreserved = InvalidCycle;
2383	unsigned InstanceIdx = `0`;
2384	unsigned StartIndex = ReservedCyclesIndex [PIdx];
2385	unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2386	assert(NumberOfInstances > `0` &&
2387	"Cannot have zero instances of a ProcResource");
2388
2389	if (isUnbufferedGroup(PIdx)) {
2390	// If any subunits are used by the instruction, report that the
2391	// subunits of the resource group are available at the first cycle
2392	// in which the unit is available, effectively removing the group
2393	// record from hazarding and basing the hazarding decisions on the
2394	// subunit records. Otherwise, choose the first available instance
2395	// from among the subunits. Specifications which assign cycles to
2396	// both the subunits and the group or which use an unbuffered
2397	// group with buffered subunits will appear to schedule
2398	// strangely. In the first case, the additional cycles for the
2399	// group will be ignored. In the second, the group will be
2400	// ignored entirely.
2401	for (const MCWriteProcResEntry &PE :
2402	make_range(x: SchedModel->getWriteProcResBegin(SC),
2403	y: SchedModel->getWriteProcResEnd(SC)))
2404	if (ResourceGroupSubUnitMasks [PIdx][PE.ProcResourceIdx])
2405	return std::make_pair(x: getNextResourceCycleByInstance(
2406	InstanceIdx: StartIndex, ReleaseAtCycle, AcquireAtCycle),
2407	y&: StartIndex);
2408
2409	auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2410	for (unsigned I = `0`, End = NumberOfInstances; I < End; ++I) {
2411	unsigned NextUnreserved, NextInstanceIdx;
2412	std::tie(args&: NextUnreserved, args&: NextInstanceIdx) =
2413	getNextResourceCycle(SC, PIdx: SubUnits[I], ReleaseAtCycle, AcquireAtCycle);
2414	if (MinNextUnreserved > NextUnreserved) {
2415	InstanceIdx = NextInstanceIdx;
2416	MinNextUnreserved = NextUnreserved;
2417	}
2418	}
2419	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2420	}
2421
2422	for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2423	++I) {
2424	unsigned NextUnreserved =
2425	getNextResourceCycleByInstance(InstanceIdx: I, ReleaseAtCycle, AcquireAtCycle);
2426	if (MischedDetailResourceBooking)
2427	LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
2428	<< NextUnreserved << "c\n");
2429	if (MinNextUnreserved > NextUnreserved) {
2430	InstanceIdx = I;
2431	MinNextUnreserved = NextUnreserved;
2432	}
2433	}
2434	if (MischedDetailResourceBooking)
2435	LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
2436	<< "[" << InstanceIdx - StartIndex << "]"
2437	<< " available @" << MinNextUnreserved << "c"
2438	<< "\n");
2439	return std::make_pair(x&: MinNextUnreserved, y&: InstanceIdx);
2440	}
2441
2442	/// Does this SU have a hazard within the current instruction group.
2443	///
2444	/// The scheduler supports two modes of hazard recognition. The first is the
2445	/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2446	/// supports highly complicated in-order reservation tables
2447	/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2448	///
2449	/// The second is a streamlined mechanism that checks for hazards based on
2450	/// simple counters that the scheduler itself maintains. It explicitly checks
2451	/// for instruction dispatch limitations, including the number of micro-ops that
2452	/// can dispatch per cycle.
2453	///
2454	/// TODO: Also check whether the SU must start a new group.
2455	bool SchedBoundary::checkHazard(SUnit *SU) {
2456	if (HazardRec->isEnabled()
2457	&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
2458	return true;
2459	}
2460
2461	unsigned uops = SchedModel->getNumMicroOps(MI: SU->getInstr());
2462	if ((CurrMOps > `0`) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2463	LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2464	<< SchedModel->getNumMicroOps(SU->getInstr()) << `'\n'`);
2465	return true;
2466	}
2467
2468	if (CurrMOps > `0` &&
2469	((isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr())) \|\|
2470	(!isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())))) {
2471	LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2472	<< (isTop() ? "begin" : "end") << " group\n");
2473	return true;
2474	}
2475
2476	if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2477	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2478	for (const MCWriteProcResEntry &PE :
2479	make_range(x: SchedModel->getWriteProcResBegin(SC),
2480	y: SchedModel->getWriteProcResEnd(SC))) {
2481	unsigned ResIdx = PE.ProcResourceIdx;
2482	unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2483	unsigned AcquireAtCycle = PE.AcquireAtCycle;
2484	unsigned NRCycle, InstanceIdx;
2485	std::tie(args&: NRCycle, args&: InstanceIdx) =
2486	getNextResourceCycle(SC, PIdx: ResIdx, ReleaseAtCycle, AcquireAtCycle);
2487	if (NRCycle > CurrCycle) {
2488	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2489	MaxObservedStall = std::max(a: ReleaseAtCycle, b: MaxObservedStall);
2490	#endif
2491	LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2492	<< SchedModel->getResourceName(ResIdx)
2493	<< `'['` << InstanceIdx - ReservedCyclesIndex[ResIdx] << `']'`
2494	<< "=" << NRCycle << "c\n");
2495	return true;
2496	}
2497	}
2498	}
2499	return false;
2500	}
2501
2502	// Find the unscheduled node in ReadySUs with the highest latency.
2503	unsigned SchedBoundary::
2504	findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
2505	SUnit LateSU = nullptr*;
2506	unsigned RemLatency = `0`;
2507	for (SUnit *SU : ReadySUs) {
2508	unsigned L = getUnscheduledLatency(SU);
2509	if (L > RemLatency) {
2510	RemLatency = L;
2511	LateSU = SU;
2512	}
2513	}
2514	if (LateSU) {
2515	LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2516	<< LateSU->NodeNum << ") " << RemLatency << "c\n");
2517	}
2518	return RemLatency;
2519	}
2520
2521	// Count resources in this zone and the remaining unscheduled
2522	// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2523	// resource index, or zero if the zone is issue limited.
2524	unsigned SchedBoundary::
2525	getOtherResourceCount(unsigned &OtherCritIdx) {
2526	OtherCritIdx = `0`;
2527	if (!SchedModel->hasInstrSchedModel())
2528	return `0`;
2529
2530	unsigned OtherCritCount = Rem->RemIssueCount
2531	+ (RetiredMOps * SchedModel->getMicroOpFactor());
2532	LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2533	<< OtherCritCount / SchedModel->getMicroOpFactor() << `'\n'`);
2534	for (unsigned PIdx = `1`, PEnd = SchedModel->getNumProcResourceKinds();
2535	PIdx != PEnd; ++PIdx) {
2536	unsigned OtherCount = getResourceCount(ResIdx: PIdx) + Rem->RemainingCounts [PIdx];
2537	if (OtherCount > OtherCritCount) {
2538	OtherCritCount = OtherCount;
2539	OtherCritIdx = PIdx;
2540	}
2541	}
2542	if (OtherCritIdx) {
2543	LLVM_DEBUG(
2544	dbgs() << " " << Available.getName() << " + Remain CritRes: "
2545	<< OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2546	<< " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2547	}
2548	return OtherCritCount;
2549	}
2550
2551	void SchedBoundary::releaseNode(SUnit SU, unsigned* ReadyCycle, bool InPQueue,
2552	unsigned Idx) {
2553	assert(SU->getInstr() && "Scheduled SUnit must have instr");
2554
2555	#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2556	// ReadyCycle was been bumped up to the CurrCycle when this node was
2557	// scheduled, but CurrCycle may have been eagerly advanced immediately after
2558	// scheduling, so may now be greater than ReadyCycle.
2559	if (ReadyCycle > CurrCycle)
2560	MaxObservedStall = std::max(a: ReadyCycle - CurrCycle, b: MaxObservedStall);
2561	#endif
2562
2563	if (ReadyCycle < MinReadyCycle)
2564	MinReadyCycle = ReadyCycle;
2565
2566	// Check for interlocks first. For the purpose of other heuristics, an
2567	// instruction that cannot issue appears as if it's not in the ReadyQueue.
2568	bool IsBuffered = SchedModel->getMicroOpBufferSize() != `0`;
2569	bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) \|\|
2570	checkHazard(SU) \|\| (Available.size() >= ReadyListLimit);
2571
2572	if (!HazardDetected) {
2573	Available.push(SU);
2574
2575	if (InPQueue)
2576	Pending.remove(I: Pending.begin() + Idx);
2577	return;
2578	}
2579
2580	if (!InPQueue)
2581	Pending.push(SU);
2582	}
2583
2584	/// Move the boundary of scheduled code by one cycle.
2585	void SchedBoundary::bumpCycle(unsigned NextCycle) {
2586	if (SchedModel->getMicroOpBufferSize() == `0`) {
2587	assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2588	"MinReadyCycle uninitialized");
2589	if (MinReadyCycle > NextCycle)
2590	NextCycle = MinReadyCycle;
2591	}
2592	// Update the current micro-ops, which will issue in the next cycle.
2593	unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2594	CurrMOps = (CurrMOps <= DecMOps) ? `0` : CurrMOps - DecMOps;
2595
2596	// Decrement DependentLatency based on the next cycle.
2597	if ((NextCycle - CurrCycle) > DependentLatency)
2598	DependentLatency = `0`;
2599	else
2600	DependentLatency -= (NextCycle - CurrCycle);
2601
2602	if (!HazardRec->isEnabled()) {
2603	// Bypass HazardRec virtual calls.
2604	CurrCycle = NextCycle;
2605	} else {
2606	// Bypass getHazardType calls in case of long latency.
2607	for (; CurrCycle != NextCycle; ++CurrCycle) {
2608	if (isTop())
2609	HazardRec->AdvanceCycle();
2610	else
2611	HazardRec->RecedeCycle();
2612	}
2613	}
2614	CheckPending = true;
2615	IsResourceLimited =
2616	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2617	Latency: getScheduledLatency(), AfterSchedNode: true);
2618
2619	LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << `' '` << Available.getName()
2620	<< `'\n'`);
2621	}
2622
2623	void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2624	ExecutedResCounts [PIdx] += Count;
2625	if (ExecutedResCounts [PIdx] > MaxExecutedResCount)
2626	MaxExecutedResCount = ExecutedResCounts [PIdx];
2627	}
2628
2629	/// Add the given processor resource to this scheduled zone.
2630	///
2631	/// \param ReleaseAtCycle indicates the number of consecutive (non-pipelined)
2632	/// cycles during which this resource is released.
2633	///
2634	/// \param AcquireAtCycle indicates the number of consecutive (non-pipelined)
2635	/// cycles at which the resource is aquired after issue (assuming no stalls).
2636	///
2637	/// \return the next cycle at which the instruction may execute without
2638	/// oversubscribing resources.
2639	unsigned SchedBoundary::countResource(const MCSchedClassDesc SC, unsigned* PIdx,
2640	unsigned ReleaseAtCycle,
2641	unsigned NextCycle,
2642	unsigned AcquireAtCycle) {
2643	unsigned Factor = SchedModel->getResourceFactor(ResIdx: PIdx);
2644	unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2645	LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2646	<< ReleaseAtCycle << "x" << Factor << "u\n");
2647
2648	// Update Executed resources counts.
2649	incExecutedResources(PIdx, Count);
2650	assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2651	Rem->RemainingCounts [PIdx] -= Count;
2652
2653	// Check if this resource exceeds the current critical resource. If so, it
2654	// becomes the critical resource.
2655	if (ZoneCritResIdx != PIdx && (getResourceCount(ResIdx: PIdx) > getCriticalCount())) {
2656	ZoneCritResIdx = PIdx;
2657	LLVM_DEBUG(dbgs() << " *** Critical resource "
2658	<< SchedModel->getResourceName(PIdx) << ": "
2659	<< getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2660	<< "c\n");
2661	}
2662	// For reserved resources, record the highest cycle using the resource.
2663	unsigned NextAvailable, InstanceIdx;
2664	std::tie(args&: NextAvailable, args&: InstanceIdx) =
2665	getNextResourceCycle(SC, PIdx, ReleaseAtCycle, AcquireAtCycle);
2666	if (NextAvailable > CurrCycle) {
2667	LLVM_DEBUG(dbgs() << " Resource conflict: "
2668	<< SchedModel->getResourceName(PIdx)
2669	<< `'['` << InstanceIdx - ReservedCyclesIndex[PIdx] << `']'`
2670	<< " reserved until @" << NextAvailable << "\n");
2671	}
2672	return NextAvailable;
2673	}
2674
2675	/// Move the boundary of scheduled code by one SUnit.
2676	void SchedBoundary::bumpNode(SUnit *SU) {
2677	// Update the reservation table.
2678	if (HazardRec->isEnabled()) {
2679	if (!isTop() && SU->isCall) {
2680	// Calls are scheduled with their preceding instructions. For bottom-up
2681	// scheduling, clear the pipeline state before emitting.
2682	HazardRec->Reset();
2683	}
2684	HazardRec->EmitInstruction(SU);
2685	// Scheduling an instruction may have made pending instructions available.
2686	CheckPending = true;
2687	}
2688	// checkHazard should prevent scheduling multiple instructions per cycle that
2689	// exceed the issue width.
2690	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2691	unsigned IncMOps = SchedModel->getNumMicroOps(MI: SU->getInstr());
2692	assert(
2693	(CurrMOps == `0` \|\| (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2694	"Cannot schedule this instruction's MicroOps in the current cycle.");
2695
2696	unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2697	LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2698
2699	unsigned NextCycle = CurrCycle;
2700	switch (SchedModel->getMicroOpBufferSize()) {
2701	case `0`:
2702	assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2703	break;
2704	case `1`:
2705	if (ReadyCycle > NextCycle) {
2706	NextCycle = ReadyCycle;
2707	LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2708	}
2709	break;
2710	default:
2711	// We don't currently model the OOO reorder buffer, so consider all
2712	// scheduled MOps to be "retired". We do loosely model in-order resource
2713	// latency. If this instruction uses an in-order resource, account for any
2714	// likely stall cycles.
2715	if (SU->isUnbuffered && ReadyCycle > NextCycle)
2716	NextCycle = ReadyCycle;
2717	break;
2718	}
2719	RetiredMOps += IncMOps;
2720
2721	// Update resource counts and critical resource.
2722	if (SchedModel->hasInstrSchedModel()) {
2723	unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2724	assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2725	Rem->RemIssueCount -= DecRemIssue;
2726	if (ZoneCritResIdx) {
2727	// Scale scheduled micro-ops for comparing with the critical resource.
2728	unsigned ScaledMOps =
2729	RetiredMOps * SchedModel->getMicroOpFactor();
2730
2731	// If scaled micro-ops are now more than the previous critical resource by
2732	// a full cycle, then micro-ops issue becomes critical.
2733	if ((int)(ScaledMOps - getResourceCount(ResIdx: ZoneCritResIdx))
2734	>= (int)SchedModel->getLatencyFactor()) {
2735	ZoneCritResIdx = `0`;
2736	LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2737	<< ScaledMOps / SchedModel->getLatencyFactor()
2738	<< "c\n");
2739	}
2740	}
2741	for (TargetSchedModel::ProcResIter
2742	PI = SchedModel->getWriteProcResBegin(SC),
2743	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2744	unsigned RCycle =
2745	countResource(SC, PIdx: PI->ProcResourceIdx, ReleaseAtCycle: PI->ReleaseAtCycle, NextCycle,
2746	AcquireAtCycle: PI->AcquireAtCycle);
2747	if (RCycle > NextCycle)
2748	NextCycle = RCycle;
2749	}
2750	if (SU->hasReservedResource) {
2751	// For reserved resources, record the highest cycle using the resource.
2752	// For top-down scheduling, this is the cycle in which we schedule this
2753	// instruction plus the number of cycles the operations reserves the
2754	// resource. For bottom-up is it simply the instruction's cycle.
2755	for (TargetSchedModel::ProcResIter
2756	PI = SchedModel->getWriteProcResBegin(SC),
2757	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2758	unsigned PIdx = PI->ProcResourceIdx;
2759	if (SchedModel->getProcResource(PIdx)->BufferSize == `0`) {
2760
2761	if (SchedModel && SchedModel->enableIntervals()) {
2762	unsigned ReservedUntil, InstanceIdx;
2763	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2764	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2765	if (isTop()) {
2766	ReservedResourceSegments [InstanceIdx].add(
2767	A: ResourceSegments::getResourceIntervalTop(
2768	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2769	CutOff: MIResourceCutOff);
2770	} else {
2771	ReservedResourceSegments [InstanceIdx].add(
2772	A: ResourceSegments::getResourceIntervalBottom(
2773	C: NextCycle, AcquireAtCycle: PI->AcquireAtCycle, ReleaseAtCycle: PI->ReleaseAtCycle),
2774	CutOff: MIResourceCutOff);
2775	}
2776	} else {
2777
2778	unsigned ReservedUntil, InstanceIdx;
2779	std::tie(args&: ReservedUntil, args&: InstanceIdx) = getNextResourceCycle(
2780	SC, PIdx, ReleaseAtCycle: PI->ReleaseAtCycle, AcquireAtCycle: PI->AcquireAtCycle);
2781	if (isTop()) {
2782	ReservedCycles [InstanceIdx] =
2783	std::max(a: ReservedUntil, b: NextCycle + PI->ReleaseAtCycle);
2784	} else
2785	ReservedCycles [InstanceIdx] = NextCycle;
2786	}
2787	}
2788	}
2789	}
2790	}
2791	// Update ExpectedLatency and DependentLatency.
2792	unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2793	unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2794	if (SU->getDepth() > TopLatency) {
2795	TopLatency = SU->getDepth();
2796	LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2797	<< SU->NodeNum << ") " << TopLatency << "c\n");
2798	}
2799	if (SU->getHeight() > BotLatency) {
2800	BotLatency = SU->getHeight();
2801	LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2802	<< SU->NodeNum << ") " << BotLatency << "c\n");
2803	}
2804	// If we stall for any reason, bump the cycle.
2805	if (NextCycle > CurrCycle)
2806	bumpCycle(NextCycle);
2807	else
2808	// After updating ZoneCritResIdx and ExpectedLatency, check if we're
2809	// resource limited. If a stall occurred, bumpCycle does this.
2810	IsResourceLimited =
2811	checkResourceLimit(LFactor: SchedModel->getLatencyFactor(), Count: getCriticalCount(),
2812	Latency: getScheduledLatency(), AfterSchedNode: true);
2813
2814	// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2815	// resets CurrMOps. Loop to handle instructions with more MOps than issue in
2816	// one cycle. Since we commonly reach the max MOps here, opportunistically
2817	// bump the cycle to avoid uselessly checking everything in the readyQ.
2818	CurrMOps += IncMOps;
2819
2820	// Bump the cycle count for issue group constraints.
2821	// This must be done after NextCycle has been adjust for all other stalls.
2822	// Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2823	// currCycle to X.
2824	if ((isTop() && SchedModel->mustEndGroup(MI: SU->getInstr())) \|\|
2825	(!isTop() && SchedModel->mustBeginGroup(MI: SU->getInstr()))) {
2826	LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2827	<< " group\n");
2828	bumpCycle(NextCycle: ++NextCycle);
2829	}
2830
2831	while (CurrMOps >= SchedModel->getIssueWidth()) {
2832	LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2833	<< CurrCycle << `'\n'`);
2834	bumpCycle(NextCycle: ++NextCycle);
2835	}
2836	LLVM_DEBUG(dumpScheduledState());
2837	}
2838
2839	/// Release pending ready nodes in to the available queue. This makes them
2840	/// visible to heuristics.
2841	void SchedBoundary::releasePending() {
2842	// If the available queue is empty, it is safe to reset MinReadyCycle.
2843	if (Available.empty())
2844	MinReadyCycle = std::numeric_limits<unsigned>::max();
2845
2846	// Check to see if any of the pending instructions are ready to issue. If
2847	// so, add them to the available queue.
2848	for (unsigned I = `0`, E = Pending.size(); I < E; ++I) {
2849	SUnit SU = (Pending.begin() + I);
2850	unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2851
2852	if (ReadyCycle < MinReadyCycle)
2853	MinReadyCycle = ReadyCycle;
2854
2855	if (Available.size() >= ReadyListLimit)
2856	break;
2857
2858	releaseNode(SU, ReadyCycle, InPQueue: true, Idx: I);
2859	if (E != Pending.size()) {
2860	--I;
2861	--E;
2862	}
2863	}
2864	CheckPending = false;
2865	}
2866
2867	/// Remove SU from the ready set for this boundary.
2868	void SchedBoundary::removeReady(SUnit *SU) {
2869	if (Available.isInQueue(SU))
2870	Available.remove(I: Available.find(SU));
2871	else {
2872	assert(Pending.isInQueue(SU) && "bad ready count");
2873	Pending.remove(I: Pending.find(SU));
2874	}
2875	}
2876
2877	/// If this queue only has one ready candidate, return it. As a side effect,
2878	/// defer any nodes that now hit a hazard, and advance the cycle until at least
2879	/// one node is ready. If multiple instructions are ready, return NULL.
2880	SUnit *SchedBoundary::pickOnlyChoice() {
2881	if (CheckPending)
2882	releasePending();
2883
2884	// Defer any ready instrs that now have a hazard.
2885	for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2886	if (checkHazard(SU: *I)) {
2887	Pending.push(SU: *I);
2888	I = Available.remove(I);
2889	continue;
2890	}
2891	++I;
2892	}
2893	for (unsigned i = `0`; Available.empty(); ++i) {
2894	// FIXME: Re-enable assert once PR20057 is resolved.
2895	// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2896	// "permanent hazard");
2897	(void)i;
2898	bumpCycle(NextCycle: CurrCycle + `1`);
2899	releasePending();
2900	}
2901
2902	LLVM_DEBUG(Pending.dump());
2903	LLVM_DEBUG(Available.dump());
2904
2905	if (Available.size() == `1`)
2906	return *Available.begin();
2907	return nullptr;
2908	}
2909
2910	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2911
2912	/// Dump the content of the \ref ReservedCycles vector for the
2913	/// resources that are used in the basic block.
2914	///
2915	LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
2916	if (!SchedModel->hasInstrSchedModel())
2917	return;
2918
2919	unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2920	unsigned StartIdx = `0`;
2921
2922	for (unsigned ResIdx = `0`; ResIdx < ResourceCount; ++ResIdx) {
2923	const unsigned NumUnits = SchedModel->getProcResource(PIdx: ResIdx)->NumUnits;
2924	std::string ResName = SchedModel->getResourceName(PIdx: ResIdx);
2925	for (unsigned UnitIdx = `0`; UnitIdx < NumUnits; ++UnitIdx) {
2926	dbgs() << ResName << "(" << UnitIdx << ") = ";
2927	if (SchedModel && SchedModel->enableIntervals()) {
2928	if (ReservedResourceSegments.count(x: StartIdx + UnitIdx))
2929	dbgs() << ReservedResourceSegments.at(k: StartIdx + UnitIdx);
2930	else
2931	dbgs() << "{ }\n";
2932	} else
2933	dbgs() << ReservedCycles [StartIdx + UnitIdx] << "\n";
2934	}
2935	StartIdx += NumUnits;
2936	}
2937	}
2938
2939	// This is useful information to dump after bumpNode.
2940	// Note that the Queue contents are more useful before pickNodeFromQueue.
2941	LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
2942	unsigned ResFactor;
2943	unsigned ResCount;
2944	if (ZoneCritResIdx) {
2945	ResFactor = SchedModel->getResourceFactor(ResIdx: ZoneCritResIdx);
2946	ResCount = getResourceCount(ResIdx: ZoneCritResIdx);
2947	} else {
2948	ResFactor = SchedModel->getMicroOpFactor();
2949	ResCount = RetiredMOps * ResFactor;
2950	}
2951	unsigned LFactor = SchedModel->getLatencyFactor();
2952	dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2953	<< " Retired: " << RetiredMOps;
2954	dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2955	dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2956	<< ResCount / ResFactor << " "
2957	<< SchedModel->getResourceName(PIdx: ZoneCritResIdx)
2958	<< "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2959	<< (IsResourceLimited ? " - Resource" : " - Latency")
2960	<< " limited.\n";
2961	if (MISchedDumpReservedCycles)
2962	dumpReservedCycles();
2963	}
2964	#endif
2965
2966	//===----------------------------------------------------------------------===//
2967	// GenericScheduler - Generic implementation of MachineSchedStrategy.
2968	//===----------------------------------------------------------------------===//
2969
2970	void GenericSchedulerBase::SchedCandidate::
2971	initResourceDelta(const ScheduleDAGMI *DAG,
2972	const TargetSchedModel *SchedModel) {
2973	if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2974	return;
2975
2976	const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2977	for (TargetSchedModel::ProcResIter
2978	PI = SchedModel->getWriteProcResBegin(SC),
2979	PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2980	if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2981	ResDelta.CritResources += PI->ReleaseAtCycle;
2982	if (PI->ProcResourceIdx == Policy.DemandResIdx)
2983	ResDelta.DemandedResources += PI->ReleaseAtCycle;
2984	}
2985	}
2986
2987	/// Compute remaining latency. We need this both to determine whether the
2988	/// overall schedule has become latency-limited and whether the instructions
2989	/// outside this zone are resource or latency limited.
2990	///
2991	/// The "dependent" latency is updated incrementally during scheduling as the
2992	/// max height/depth of scheduled nodes minus the cycles since it was
2993	/// scheduled:
2994	/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2995	///
2996	/// The "independent" latency is the max ready queue depth:
2997	/// ILat = max N.depth for N in Available\|Pending
2998	///
2999	/// RemainingLatency is the greater of independent and dependent latency.
3000	///
3001	/// These computations are expensive, especially in DAGs with many edges, so
3002	/// only do them if necessary.
3003	static unsigned computeRemLatency(SchedBoundary &CurrZone) {
3004	unsigned RemLatency = CurrZone.getDependentLatency();
3005	RemLatency = std::max(a: RemLatency,
3006	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Available.elements()));
3007	RemLatency = std::max(a: RemLatency,
3008	b: CurrZone.findMaxLatency(ReadySUs: CurrZone.Pending.elements()));
3009	return RemLatency;
3010	}
3011
3012	/// Returns true if the current cycle plus remaning latency is greater than
3013	/// the critical path in the scheduling region.
3014	bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
3015	SchedBoundary &CurrZone,
3016	bool ComputeRemLatency,
3017	unsigned &RemLatency) const {
3018	// The current cycle is already greater than the critical path, so we are
3019	// already latency limited and don't need to compute the remaining latency.
3020	if (CurrZone.getCurrCycle() > Rem.CriticalPath)
3021	return true;
3022
3023	// If we haven't scheduled anything yet, then we aren't latency limited.
3024	if (CurrZone.getCurrCycle() == `0`)
3025	return false;
3026
3027	if (ComputeRemLatency)
3028	RemLatency = computeRemLatency(CurrZone);
3029
3030	return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
3031	}
3032
3033	/// Set the CandPolicy given a scheduling zone given the current resources and
3034	/// latencies inside and outside the zone.
3035	void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
3036	SchedBoundary &CurrZone,
3037	SchedBoundary *OtherZone) {
3038	// Apply preemptive heuristics based on the total latency and resources
3039	// inside and outside this zone. Potential stalls should be considered before
3040	// following this policy.
3041
3042	// Compute the critical resource outside the zone.
3043	unsigned OtherCritIdx = `0`;
3044	unsigned OtherCount =
3045	OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : `0`;
3046
3047	bool OtherResLimited = false;
3048	unsigned RemLatency = `0`;
3049	bool RemLatencyComputed = false;
3050	if (SchedModel->hasInstrSchedModel() && OtherCount != `0`) {
3051	RemLatency = computeRemLatency(CurrZone);
3052	RemLatencyComputed = true;
3053	OtherResLimited = checkResourceLimit(LFactor: SchedModel->getLatencyFactor(),
3054	Count: OtherCount, Latency: RemLatency, AfterSchedNode: false);
3055	}
3056
3057	// Schedule aggressively for latency in PostRA mode. We don't check for
3058	// acyclic latency during PostRA, and highly out-of-order processors will
3059	// skip PostRA scheduling.
3060	if (!OtherResLimited &&
3061	(IsPostRA \|\| shouldReduceLatency(Policy, CurrZone, ComputeRemLatency: !RemLatencyComputed,
3062	RemLatency))) {
3063	Policy.ReduceLatency \|= true;
3064	LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
3065	<< " RemainingLatency " << RemLatency << " + "
3066	<< CurrZone.getCurrCycle() << "c > CritPath "
3067	<< Rem.CriticalPath << "\n");
3068	}
3069	// If the same resource is limiting inside and outside the zone, do nothing.
3070	if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
3071	return;
3072
3073	LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
3074	dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
3075	<< SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
3076	} if (OtherResLimited) dbgs()
3077	<< " RemainingLimit: "
3078	<< SchedModel->getResourceName(OtherCritIdx) << "\n";
3079	if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
3080	<< " Latency limited both directions.\n");
3081
3082	if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
3083	Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
3084
3085	if (OtherResLimited)
3086	Policy.DemandResIdx = OtherCritIdx;
3087	}
3088
3089	#ifndef NDEBUG
3090	const char *GenericSchedulerBase::getReasonStr(
3091	GenericSchedulerBase::CandReason Reason) {
3092	switch (Reason) {
3093	case NoCand: return "NOCAND ";
3094	case Only1: return "ONLY1 ";
3095	case PhysReg: return "PHYS-REG ";
3096	case RegExcess: return "REG-EXCESS";
3097	case RegCritical: return "REG-CRIT ";
3098	case Stall: return "STALL ";
3099	case Cluster: return "CLUSTER ";
3100	case Weak: return "WEAK ";
3101	case RegMax: return "REG-MAX ";
3102	case ResourceReduce: return "RES-REDUCE";
3103	case ResourceDemand: return "RES-DEMAND";
3104	case TopDepthReduce: return "TOP-DEPTH ";
3105	case TopPathReduce: return "TOP-PATH ";
3106	case BotHeightReduce:return "BOT-HEIGHT";
3107	case BotPathReduce: return "BOT-PATH ";
3108	case NextDefUse: return "DEF-USE ";
3109	case NodeOrder: return "ORDER ";
3110	};
3111	llvm_unreachable("Unknown reason!");
3112	}
3113
3114	void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
3115	PressureChange P;
3116	unsigned ResIdx = `0`;
3117	unsigned Latency = `0`;
3118	switch (Cand.Reason) {
3119	default:
3120	break;
3121	case RegExcess:
3122	P = Cand.RPDelta.Excess;
3123	break;
3124	case RegCritical:
3125	P = Cand.RPDelta.CriticalMax;
3126	break;
3127	case RegMax:
3128	P = Cand.RPDelta.CurrentMax;
3129	break;
3130	case ResourceReduce:
3131	ResIdx = Cand.Policy.ReduceResIdx;
3132	break;
3133	case ResourceDemand:
3134	ResIdx = Cand.Policy.DemandResIdx;
3135	break;
3136	case TopDepthReduce:
3137	Latency = Cand.SU->getDepth();
3138	break;
3139	case TopPathReduce:
3140	Latency = Cand.SU->getHeight();
3141	break;
3142	case BotHeightReduce:
3143	Latency = Cand.SU->getHeight();
3144	break;
3145	case BotPathReduce:
3146	Latency = Cand.SU->getDepth();
3147	break;
3148	}
3149	dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Reason: Cand.Reason);
3150	if (P.isValid())
3151	dbgs() << " " << TRI->getRegPressureSetName(Idx: P.getPSet())
3152	<< ":" << P.getUnitInc() << " ";
3153	else
3154	dbgs() << " ";
3155	if (ResIdx)
3156	dbgs() << " " << SchedModel->getProcResource(PIdx: ResIdx)->Name << " ";
3157	else
3158	dbgs() << " ";
3159	if (Latency)
3160	dbgs() << " " << Latency << " cycles ";
3161	else
3162	dbgs() << " ";
3163	dbgs() << `'\n'`;
3164	}
3165	#endif
3166
3167	namespace llvm {
3168	/// Return true if this heuristic determines order.
3169	/// TODO: Consider refactor return type of these functions as integer or enum,
3170	/// as we may need to differentiate whether TryCand is better than Cand.
3171	bool tryLess(int TryVal, int CandVal,
3172	GenericSchedulerBase::SchedCandidate &TryCand,
3173	GenericSchedulerBase::SchedCandidate &Cand,
3174	GenericSchedulerBase::CandReason Reason) {
3175	if (TryVal < CandVal) {
3176	TryCand.Reason = Reason;
3177	return true;
3178	}
3179	if (TryVal > CandVal) {
3180	if (Cand.Reason > Reason)
3181	Cand.Reason = Reason;
3182	return true;
3183	}
3184	return false;
3185	}
3186
3187	bool tryGreater(int TryVal, int CandVal,
3188	GenericSchedulerBase::SchedCandidate &TryCand,
3189	GenericSchedulerBase::SchedCandidate &Cand,
3190	GenericSchedulerBase::CandReason Reason) {
3191	if (TryVal > CandVal) {
3192	TryCand.Reason = Reason;
3193	return true;
3194	}
3195	if (TryVal < CandVal) {
3196	if (Cand.Reason > Reason)
3197	Cand.Reason = Reason;
3198	return true;
3199	}
3200	return false;
3201	}
3202
3203	bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
3204	GenericSchedulerBase::SchedCandidate &Cand,
3205	SchedBoundary &Zone) {
3206	if (Zone.isTop()) {
3207	// Prefer the candidate with the lesser depth, but only if one of them has
3208	// depth greater than the total latency scheduled so far, otherwise either
3209	// of them could be scheduled now with no stall.
3210	if (std::max(a: TryCand.SU->getDepth(), b: Cand.SU->getDepth()) >
3211	Zone.getScheduledLatency()) {
3212	if (tryLess(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3213	TryCand, Cand, Reason: GenericSchedulerBase::TopDepthReduce))
3214	return true;
3215	}
3216	if (tryGreater(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3217	TryCand, Cand, Reason: GenericSchedulerBase::TopPathReduce))
3218	return true;
3219	} else {
3220	// Prefer the candidate with the lesser height, but only if one of them has
3221	// height greater than the total latency scheduled so far, otherwise either
3222	// of them could be scheduled now with no stall.
3223	if (std::max(a: TryCand.SU->getHeight(), b: Cand.SU->getHeight()) >
3224	Zone.getScheduledLatency()) {
3225	if (tryLess(TryVal: TryCand.SU->getHeight(), CandVal: Cand.SU->getHeight(),
3226	TryCand, Cand, Reason: GenericSchedulerBase::BotHeightReduce))
3227	return true;
3228	}
3229	if (tryGreater(TryVal: TryCand.SU->getDepth(), CandVal: Cand.SU->getDepth(),
3230	TryCand, Cand, Reason: GenericSchedulerBase::BotPathReduce))
3231	return true;
3232	}
3233	return false;
3234	}
3235	} // end namespace llvm
3236
3237	static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3238	LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3239	<< GenericSchedulerBase::getReasonStr(Reason) << `'\n'`);
3240	}
3241
3242	static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
3243	tracePick(Reason: Cand.Reason, IsTop: Cand.AtTop);
3244	}
3245
3246	void GenericScheduler::initialize(ScheduleDAGMI *dag) {
3247	assert(dag->hasVRegLiveness() &&
3248	"(PreRA)GenericScheduler needs vreg liveness");
3249	DAG = static_cast<ScheduleDAGMILive*>(dag);
3250	SchedModel = DAG->getSchedModel();
3251	TRI = DAG->TRI;
3252
3253	if (RegionPolicy.ComputeDFSResult)
3254	DAG->computeDFSResult();
3255
3256	Rem.init(DAG, SchedModel);
3257	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3258	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3259
3260	// Initialize resource counts.
3261
3262	// Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3263	// are disabled, then these HazardRecs will be disabled.
3264	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3265	if (!Top.HazardRec) {
3266	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3267	}
3268	if (!Bot.HazardRec) {
3269	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3270	}
3271	TopCand.SU = nullptr;
3272	BotCand.SU = nullptr;
3273	}
3274
3275	/// Initialize the per-region scheduling policy.
3276	void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3277	MachineBasicBlock::iterator End,
3278	unsigned NumRegionInstrs) {
3279	const MachineFunction &MF = *Begin ->getMF();
3280	const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3281
3282	// Avoid setting up the register pressure tracker for small regions to save
3283	// compile time. As a rough heuristic, only track pressure when the number of
3284	// schedulable instructions exceeds half the allocatable integer register file
3285	// that is the largest legal integer regiser type.
3286	RegionPolicy.ShouldTrackPressure = true;
3287	for (unsigned VT = MVT::i64; VT > (unsigned)MVT::i1; --VT) {
3288	MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
3289	if (TLI->isTypeLegal(VT: LegalIntVT)) {
3290	unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3291	RC: TLI->getRegClassFor(VT: LegalIntVT));
3292	RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / `2`);
3293	break;
3294	}
3295	}
3296
3297	// For generic targets, we default to bottom-up, because it's simpler and more
3298	// compile-time optimizations have been implemented in that direction.
3299	RegionPolicy.OnlyBottomUp = true;
3300
3301	// Allow the subtarget to override default policy.
3302	MF.getSubtarget().overrideSchedPolicy(Policy&: RegionPolicy, NumRegionInstrs);
3303
3304	// After subtarget overrides, apply command line options.
3305	if (!EnableRegPressure) {
3306	RegionPolicy.ShouldTrackPressure = false;
3307	RegionPolicy.ShouldTrackLaneMasks = false;
3308	}
3309
3310	// Check -misched-topdown/bottomup can force or unforce scheduling direction.
3311	// e.g. -misched-bottomup=false allows scheduling in both directions.
3312	assert((!ForceTopDown \|\| !ForceBottomUp) &&
3313	"-misched-topdown incompatible with -misched-bottomup");
3314	if (ForceBottomUp.getNumOccurrences() > `0`) {
3315	RegionPolicy.OnlyBottomUp = ForceBottomUp;
3316	if (RegionPolicy.OnlyBottomUp)
3317	RegionPolicy.OnlyTopDown = false;
3318	}
3319	if (ForceTopDown.getNumOccurrences() > `0`) {
3320	RegionPolicy.OnlyTopDown = ForceTopDown;
3321	if (RegionPolicy.OnlyTopDown)
3322	RegionPolicy.OnlyBottomUp = false;
3323	}
3324	}
3325
3326	void GenericScheduler::dumpPolicy() const {
3327	// Cannot completely remove virtual function even in release mode.
3328	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
3329	dbgs() << "GenericScheduler RegionPolicy: "
3330	<< " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3331	<< " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3332	<< " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3333	<< "\n";
3334	#endif
3335	}
3336
3337	/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3338	/// critical path by more cycles than it takes to drain the instruction buffer.
3339	/// We estimate an upper bounds on in-flight instructions as:
3340	///
3341	/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3342	/// InFlightIterations = AcyclicPath / CyclesPerIteration
3343	/// InFlightResources = InFlightIterations LoopResources*
3344	///
3345	/// TODO: Check execution resources in addition to IssueCount.
3346	void GenericScheduler::checkAcyclicLatency() {
3347	if (Rem.CyclicCritPath == `0` \|\| Rem.CyclicCritPath >= Rem.CriticalPath)
3348	return;
3349
3350	// Scaled number of cycles per loop iteration.
3351	unsigned IterCount =
3352	std::max(a: Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
3353	b: Rem.RemIssueCount);
3354	// Scaled acyclic critical path.
3355	unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3356	// InFlightCount = (AcyclicPath / IterCycles) InstrPerLoop*
3357	unsigned InFlightCount =
3358	(AcyclicCount * Rem.RemIssueCount + IterCount-`1`) / IterCount;
3359	unsigned BufferLimit =
3360	SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
3361
3362	Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3363
3364	LLVM_DEBUG(
3365	dbgs() << "IssueCycles="
3366	<< Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
3367	<< "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3368	<< "c NumIters=" << (AcyclicCount + IterCount - `1`) / IterCount
3369	<< " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3370	<< "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3371	if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3372	}
3373
3374	void GenericScheduler::registerRoots() {
3375	Rem.CriticalPath = DAG->ExitSU.getDepth();
3376
3377	// Some roots may not feed into ExitSU. Check all of them in case.
3378	for (const SUnit *SU : Bot.Available) {
3379	if (SU->getDepth() > Rem.CriticalPath)
3380	Rem.CriticalPath = SU->getDepth();
3381	}
3382	LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << `'\n'`);
3383	if (DumpCriticalPathLength) {
3384	errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3385	}
3386
3387	if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > `0`) {
3388	Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3389	checkAcyclicLatency();
3390	}
3391	}
3392
3393	namespace llvm {
3394	bool tryPressure(const PressureChange &TryP,
3395	const PressureChange &CandP,
3396	GenericSchedulerBase::SchedCandidate &TryCand,
3397	GenericSchedulerBase::SchedCandidate &Cand,
3398	GenericSchedulerBase::CandReason Reason,
3399	const TargetRegisterInfo *TRI,
3400	const MachineFunction &MF) {
3401	// If one candidate decreases and the other increases, go with it.
3402	// Invalid candidates have UnitInc==0.
3403	if (tryGreater(TryVal: TryP.getUnitInc() < `0`, CandVal: CandP.getUnitInc() < `0`, TryCand, Cand,
3404	Reason)) {
3405	return true;
3406	}
3407	// Do not compare the magnitude of pressure changes between top and bottom
3408	// boundary.
3409	if (Cand.AtTop != TryCand.AtTop)
3410	return false;
3411
3412	// If both candidates affect the same set in the same boundary, go with the
3413	// smallest increase.
3414	unsigned TryPSet = TryP.getPSetOrMax();
3415	unsigned CandPSet = CandP.getPSetOrMax();
3416	if (TryPSet == CandPSet) {
3417	return tryLess(TryVal: TryP.getUnitInc(), CandVal: CandP.getUnitInc(), TryCand, Cand,
3418	Reason);
3419	}
3420
3421	int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: TryPSet) :
3422	std::numeric_limits<int>::max();
3423
3424	int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, PSetID: CandPSet) :
3425	std::numeric_limits<int>::max();
3426
3427	// If the candidates are decreasing pressure, reverse priority.
3428	if (TryP.getUnitInc() < `0`)
3429	std::swap(a&: TryRank, b&: CandRank);
3430	return tryGreater(TryVal: TryRank, CandVal: CandRank, TryCand, Cand, Reason);
3431	}
3432
3433	unsigned getWeakLeft(const SUnit SU, bool* isTop) {
3434	return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3435	}
3436
3437	/// Minimize physical register live ranges. Regalloc wants them adjacent to
3438	/// their physreg def/use.
3439	///
3440	/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3441	/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3442	/// with the operation that produces or consumes the physreg. We'll do this when
3443	/// regalloc has support for parallel copies.
3444	int biasPhysReg(const SUnit SU, bool* isTop) {
3445	const MachineInstr *MI = SU->getInstr();
3446
3447	if (MI->isCopy()) {
3448	unsigned ScheduledOper = isTop ? `1` : `0`;
3449	unsigned UnscheduledOper = isTop ? `0` : `1`;
3450	// If we have already scheduled the physreg produce/consumer, immediately
3451	// schedule the copy.
3452	if (MI->getOperand(i: ScheduledOper).getReg().isPhysical())
3453	return `1`;
3454	// If the physreg is at the boundary, defer it. Otherwise schedule it
3455	// immediately to free the dependent. We can hoist the copy later.
3456	bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3457	if (MI->getOperand(i: UnscheduledOper).getReg().isPhysical())
3458	return AtBoundary ? -`1` : `1`;
3459	}
3460
3461	if (MI->isMoveImmediate()) {
3462	// If we have a move immediate and all successors have been assigned, bias
3463	// towards scheduling this later. Make sure all register defs are to
3464	// physical registers.
3465	bool DoBias = true;
3466	for (const MachineOperand &Op : MI->defs()) {
3467	if (Op.isReg() && !Op.getReg().isPhysical()) {
3468	DoBias = false;
3469	break;
3470	}
3471	}
3472
3473	if (DoBias)
3474	return isTop ? -`1` : `1`;
3475	}
3476
3477	return `0`;
3478	}
3479	} // end namespace llvm
3480
3481	void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
3482	bool AtTop,
3483	const RegPressureTracker &RPTracker,
3484	RegPressureTracker &TempTracker) {
3485	Cand.SU = SU;
3486	Cand.AtTop = AtTop;
3487	if (DAG->isTrackingPressure()) {
3488	if (AtTop) {
3489	TempTracker.getMaxDownwardPressureDelta(
3490	MI: Cand.SU->getInstr(),
3491	Delta&: Cand.RPDelta,
3492	CriticalPSets: DAG->getRegionCriticalPSets(),
3493	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3494	} else {
3495	if (VerifyScheduling) {
3496	TempTracker.getMaxUpwardPressureDelta(
3497	MI: Cand.SU->getInstr(),
3498	PDiff: &DAG->getPressureDiff(SU: Cand.SU),
3499	Delta&: Cand.RPDelta,
3500	CriticalPSets: DAG->getRegionCriticalPSets(),
3501	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3502	} else {
3503	RPTracker.getUpwardPressureDelta(
3504	MI: Cand.SU->getInstr(),
3505	PDiff&: DAG->getPressureDiff(SU: Cand.SU),
3506	Delta&: Cand.RPDelta,
3507	CriticalPSets: DAG->getRegionCriticalPSets(),
3508	MaxPressureLimit: DAG->getRegPressure().MaxSetPressure);
3509	}
3510	}
3511	}
3512	LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3513	<< " Try SU(" << Cand.SU->NodeNum << ") "
3514	<< TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3515	<< Cand.RPDelta.Excess.getUnitInc() << "\n");
3516	}
3517
3518	/// Apply a set of heuristics to a new candidate. Heuristics are currently
3519	/// hierarchical. This may be more efficient than a graduated cost model because
3520	/// we don't need to evaluate all aspects of the model for each node in the
3521	/// queue. But it's really done to make the heuristics easier to debug and
3522	/// statistically analyze.
3523	///
3524	/// \param Cand provides the policy and current best candidate.
3525	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3526	/// \param Zone describes the scheduled zone that we are extending, or nullptr
3527	/// if Cand is from a different zone than TryCand.
3528	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3529	bool GenericScheduler::tryCandidate(SchedCandidate &Cand,
3530	SchedCandidate &TryCand,
3531	SchedBoundary Zone) const* {
3532	// Initialize the candidate if needed.
3533	if (!Cand.isValid()) {
3534	TryCand.Reason = NodeOrder;
3535	return true;
3536	}
3537
3538	// Bias PhysReg Defs and copies to their uses and defined respectively.
3539	if (tryGreater(TryVal: biasPhysReg(SU: TryCand.SU, isTop: TryCand.AtTop),
3540	CandVal: biasPhysReg(SU: Cand.SU, isTop: Cand.AtTop), TryCand, Cand, Reason: PhysReg))
3541	return TryCand.Reason != NoCand;
3542
3543	// Avoid exceeding the target's limit.
3544	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.Excess,
3545	CandP: Cand.RPDelta.Excess,
3546	TryCand, Cand, Reason: RegExcess, TRI,
3547	MF: DAG->MF))
3548	return TryCand.Reason != NoCand;
3549
3550	// Avoid increasing the max critical pressure in the scheduled region.
3551	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CriticalMax,
3552	CandP: Cand.RPDelta.CriticalMax,
3553	TryCand, Cand, Reason: RegCritical, TRI,
3554	MF: DAG->MF))
3555	return TryCand.Reason != NoCand;
3556
3557	// We only compare a subset of features when comparing nodes between
3558	// Top and Bottom boundary. Some properties are simply incomparable, in many
3559	// other instances we should only override the other boundary if something
3560	// is a clear good pick on one boundary. Skip heuristics that are more
3561	// "tie-breaking" in nature.
3562	bool SameBoundary = Zone != nullptr;
3563	if (SameBoundary) {
3564	// For loops that are acyclic path limited, aggressively schedule for
3565	// latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3566	// heuristics to take precedence.
3567	if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3568	tryLatency(TryCand, Cand, Zone&: *Zone))
3569	return TryCand.Reason != NoCand;
3570
3571	// Prioritize instructions that read unbuffered resources by stall cycles.
3572	if (tryLess(TryVal: Zone->getLatencyStallCycles(SU: TryCand.SU),
3573	CandVal: Zone->getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3574	return TryCand.Reason != NoCand;
3575	}
3576
3577	// Keep clustered nodes together to encourage downstream peephole
3578	// optimizations which may reduce resource requirements.
3579	//
3580	// This is a best effort to set things up for a post-RA pass. Optimizations
3581	// like generating loads of multiple registers should ideally be done within
3582	// the scheduler pass by combining the loads during DAG postprocessing.
3583	const SUnit *CandNextClusterSU =
3584	Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3585	const SUnit *TryCandNextClusterSU =
3586	TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3587	if (tryGreater(TryVal: TryCand.SU == TryCandNextClusterSU,
3588	CandVal: Cand.SU == CandNextClusterSU,
3589	TryCand, Cand, Reason: Cluster))
3590	return TryCand.Reason != NoCand;
3591
3592	if (SameBoundary) {
3593	// Weak edges are for clustering and other constraints.
3594	if (tryLess(TryVal: getWeakLeft(SU: TryCand.SU, isTop: TryCand.AtTop),
3595	CandVal: getWeakLeft(SU: Cand.SU, isTop: Cand.AtTop),
3596	TryCand, Cand, Reason: Weak))
3597	return TryCand.Reason != NoCand;
3598	}
3599
3600	// Avoid increasing the max pressure of the entire region.
3601	if (DAG->isTrackingPressure() && tryPressure(TryP: TryCand.RPDelta.CurrentMax,
3602	CandP: Cand.RPDelta.CurrentMax,
3603	TryCand, Cand, Reason: RegMax, TRI,
3604	MF: DAG->MF))
3605	return TryCand.Reason != NoCand;
3606
3607	if (SameBoundary) {
3608	// Avoid critical resource consumption and balance the schedule.
3609	TryCand.initResourceDelta(DAG, SchedModel);
3610	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3611	TryCand, Cand, Reason: ResourceReduce))
3612	return TryCand.Reason != NoCand;
3613	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3614	CandVal: Cand.ResDelta.DemandedResources,
3615	TryCand, Cand, Reason: ResourceDemand))
3616	return TryCand.Reason != NoCand;
3617
3618	// Avoid serializing long latency dependence chains.
3619	// For acyclic path limited loops, latency was already checked above.
3620	if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3621	!Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone&: *Zone))
3622	return TryCand.Reason != NoCand;
3623
3624	// Fall through to original instruction order.
3625	if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3626	\|\| (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3627	TryCand.Reason = NodeOrder;
3628	return true;
3629	}
3630	}
3631
3632	return false;
3633	}
3634
3635	/// Pick the best candidate from the queue.
3636	///
3637	/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3638	/// DAG building. To adjust for the current scheduling location we need to
3639	/// maintain the number of vreg uses remaining to be top-scheduled.
3640	void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3641	const CandPolicy &ZonePolicy,
3642	const RegPressureTracker &RPTracker,
3643	SchedCandidate &Cand) {
3644	// getMaxPressureDelta temporarily modifies the tracker.
3645	RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3646
3647	ReadyQueue &Q = Zone.Available;
3648	for (SUnit *SU : Q) {
3649
3650	SchedCandidate TryCand(ZonePolicy);
3651	initCandidate(Cand&: TryCand, SU, AtTop: Zone.isTop(), RPTracker, TempTracker);
3652	// Pass SchedBoundary only when comparing nodes from the same boundary.
3653	SchedBoundary ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr*;
3654	if (tryCandidate(Cand, TryCand, Zone: ZoneArg)) {
3655	// Initialize resource delta if needed in case future heuristics query it.
3656	if (TryCand.ResDelta == SchedResourceDelta ())
3657	TryCand.initResourceDelta(DAG, SchedModel);
3658	Cand.setBest(TryCand);
3659	LLVM_DEBUG(traceCandidate(Cand));
3660	}
3661	}
3662	}
3663
3664	/// Pick the best candidate node from either the top or bottom queue.
3665	SUnit GenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
3666	// Schedule as far as possible in the direction of no choice. This is most
3667	// efficient, but also provides the best heuristics for CriticalPSets.
3668	if (SUnit *SU = Bot.pickOnlyChoice()) {
3669	IsTopNode = false;
3670	tracePick(Reason: Only1, IsTop: false);
3671	return SU;
3672	}
3673	if (SUnit *SU = Top.pickOnlyChoice()) {
3674	IsTopNode = true;
3675	tracePick(Reason: Only1, IsTop: true);
3676	return SU;
3677	}
3678	// Set the bottom-up policy based on the state of the current bottom zone and
3679	// the instructions outside the zone, including the top zone.
3680	CandPolicy BotPolicy;
3681	setPolicy(Policy&: BotPolicy, /IsPostRA=/false, CurrZone&: Bot, OtherZone: &Top);
3682	// Set the top-down policy based on the state of the current top zone and
3683	// the instructions outside the zone, including the bottom zone.
3684	CandPolicy TopPolicy;
3685	setPolicy(Policy&: TopPolicy, /IsPostRA=/false, CurrZone&: Top, OtherZone: &Bot);
3686
3687	// See if BotCand is still valid (because we previously scheduled from Top).
3688	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3689	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
3690	BotCand.Policy != BotPolicy) {
3691	BotCand.reset(NewPolicy: CandPolicy ());
3692	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3693	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3694	} else {
3695	LLVM_DEBUG(traceCandidate(BotCand));
3696	#ifndef NDEBUG
3697	if (VerifyScheduling) {
3698	SchedCandidate TCand;
3699	TCand.reset(NewPolicy: CandPolicy ());
3700	pickNodeFromQueue(Zone&: Bot, ZonePolicy: BotPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: TCand);
3701	assert(TCand.SU == BotCand.SU &&
3702	"Last pick result should correspond to re-picking right now");
3703	}
3704	#endif
3705	}
3706
3707	// Check if the top Q has a better candidate.
3708	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3709	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
3710	TopCand.Policy != TopPolicy) {
3711	TopCand.reset(NewPolicy: CandPolicy ());
3712	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3713	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3714	} else {
3715	LLVM_DEBUG(traceCandidate(TopCand));
3716	#ifndef NDEBUG
3717	if (VerifyScheduling) {
3718	SchedCandidate TCand;
3719	TCand.reset(NewPolicy: CandPolicy ());
3720	pickNodeFromQueue(Zone&: Top, ZonePolicy: TopPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TCand);
3721	assert(TCand.SU == TopCand.SU &&
3722	"Last pick result should correspond to re-picking right now");
3723	}
3724	#endif
3725	}
3726
3727	// Pick best from BotCand and TopCand.
3728	assert(BotCand.isValid());
3729	assert(TopCand.isValid());
3730	SchedCandidate Cand = BotCand;
3731	TopCand.Reason = NoCand;
3732	if (tryCandidate(Cand, TryCand&: TopCand, Zone: nullptr)) {
3733	Cand.setBest(TopCand);
3734	LLVM_DEBUG(traceCandidate(Cand));
3735	}
3736
3737	IsTopNode = Cand.AtTop;
3738	tracePick(Cand);
3739	return Cand.SU;
3740	}
3741
3742	/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3743	SUnit GenericScheduler::pickNode(bool* &IsTopNode) {
3744	if (DAG->top() == DAG->bottom()) {
3745	assert(Top.Available.empty() && Top.Pending.empty() &&
3746	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3747	return nullptr;
3748	}
3749	SUnit *SU;
3750	do {
3751	if (RegionPolicy.OnlyTopDown) {
3752	SU = Top.pickOnlyChoice();
3753	if (!SU) {
3754	CandPolicy NoPolicy;
3755	TopCand.reset(NewPolicy: NoPolicy);
3756	pickNodeFromQueue(Zone&: Top, ZonePolicy: NoPolicy, RPTracker: DAG->getTopRPTracker(), Cand&: TopCand);
3757	assert(TopCand.Reason != NoCand && "failed to find a candidate");
3758	tracePick(Cand: TopCand);
3759	SU = TopCand.SU;
3760	}
3761	IsTopNode = true;
3762	} else if (RegionPolicy.OnlyBottomUp) {
3763	SU = Bot.pickOnlyChoice();
3764	if (!SU) {
3765	CandPolicy NoPolicy;
3766	BotCand.reset(NewPolicy: NoPolicy);
3767	pickNodeFromQueue(Zone&: Bot, ZonePolicy: NoPolicy, RPTracker: DAG->getBotRPTracker(), Cand&: BotCand);
3768	assert(BotCand.Reason != NoCand && "failed to find a candidate");
3769	tracePick(Cand: BotCand);
3770	SU = BotCand.SU;
3771	}
3772	IsTopNode = false;
3773	} else {
3774	SU = pickNodeBidirectional(IsTopNode);
3775	}
3776	} while (SU->isScheduled);
3777
3778	if (SU->isTopReady())
3779	Top.removeReady(SU);
3780	if (SU->isBottomReady())
3781	Bot.removeReady(SU);
3782
3783	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3784	<< *SU->getInstr());
3785	return SU;
3786	}
3787
3788	void GenericScheduler::reschedulePhysReg(SUnit SU, bool* isTop) {
3789	MachineBasicBlock::iterator InsertPos = SU->getInstr();
3790	if (!isTop)
3791	++InsertPos;
3792	SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3793
3794	// Find already scheduled copies with a single physreg dependence and move
3795	// them just above the scheduled instruction.
3796	for (SDep &Dep : Deps) {
3797	if (Dep.getKind() != SDep::Data \|\|
3798	!Register::isPhysicalRegister(Reg: Dep.getReg()))
3799	continue;
3800	SUnit *DepSU = Dep.getSUnit();
3801	if (isTop ? DepSU->Succs.size() > `1` : DepSU->Preds.size() > `1`)
3802	continue;
3803	MachineInstr *Copy = DepSU->getInstr();
3804	if (!Copy->isCopy() && !Copy->isMoveImmediate())
3805	continue;
3806	LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3807	DAG->dumpNode(*Dep.getSUnit()));
3808	DAG->moveInstruction(MI: Copy, InsertPos);
3809	}
3810	}
3811
3812	/// Update the scheduler's state after scheduling a node. This is the same node
3813	/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3814	/// update it's state based on the current cycle before MachineSchedStrategy
3815	/// does.
3816	///
3817	/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3818	/// them here. See comments in biasPhysReg.
3819	void GenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
3820	if (IsTopNode) {
3821	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
3822	Top.bumpNode(SU);
3823	if (SU->hasPhysRegUses)
3824	reschedulePhysReg(SU, isTop: true);
3825	} else {
3826	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
3827	Bot.bumpNode(SU);
3828	if (SU->hasPhysRegDefs)
3829	reschedulePhysReg(SU, isTop: false);
3830	}
3831	}
3832
3833	/// Create the standard converging machine scheduler. This will be used as the
3834	/// default scheduler if the target does not set a default.
3835	ScheduleDAGMILive llvm::createGenericSchedLive(MachineSchedContext C) {
3836	ScheduleDAGMILive *DAG =
3837	new ScheduleDAGMILive (C, std::make_unique<GenericScheduler>(args&: C));
3838	// Register DAG post-processors.
3839	//
3840	// FIXME: extend the mutation API to allow earlier mutations to instantiate
3841	// data and pass it to later mutations. Have a single mutation that gathers
3842	// the interesting nodes in one pass.
3843	DAG->addMutation(Mutation: createCopyConstrainDAGMutation(TII: DAG->TII, TRI: DAG->TRI));
3844
3845	const TargetSubtargetInfo &STI = C->MF->getSubtarget();
3846	// Add MacroFusion mutation if fusions are not empty.
3847	const auto &MacroFusions = STI.getMacroFusions();
3848	if (!MacroFusions.empty())
3849	DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
3850	return DAG;
3851	}
3852
3853	static ScheduleDAGInstrs createConvergingSched(MachineSchedContext C) {
3854	return createGenericSchedLive(C);
3855	}
3856
3857	static MachineSchedRegistry
3858	GenericSchedRegistry("converge", "Standard converging scheduler.",
3859	createConvergingSched);
3860
3861	//===----------------------------------------------------------------------===//
3862	// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3863	//===----------------------------------------------------------------------===//
3864
3865	void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
3866	DAG = Dag;
3867	SchedModel = DAG->getSchedModel();
3868	TRI = DAG->TRI;
3869
3870	Rem.init(DAG, SchedModel);
3871	Top.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3872	Bot.init(dag: DAG, smodel: SchedModel, rem: &Rem);
3873
3874	// Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3875	// or are disabled, then these HazardRecs will be disabled.
3876	const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3877	if (!Top.HazardRec) {
3878	Top.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3879	}
3880	if (!Bot.HazardRec) {
3881	Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG);
3882	}
3883	}
3884
3885	void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
3886	MachineBasicBlock::iterator End,
3887	unsigned NumRegionInstrs) {
3888	if (PostRADirection == MISchedPostRASched::TopDown) {
3889	RegionPolicy.OnlyTopDown = true;
3890	RegionPolicy.OnlyBottomUp = false;
3891	} else if (PostRADirection == MISchedPostRASched::BottomUp) {
3892	RegionPolicy.OnlyTopDown = false;
3893	RegionPolicy.OnlyBottomUp = true;
3894	} else if (PostRADirection == MISchedPostRASched::Bidirectional) {
3895	RegionPolicy.OnlyBottomUp = false;
3896	RegionPolicy.OnlyTopDown = false;
3897	}
3898	}
3899
3900	void PostGenericScheduler::registerRoots() {
3901	Rem.CriticalPath = DAG->ExitSU.getDepth();
3902
3903	// Some roots may not feed into ExitSU. Check all of them in case.
3904	for (const SUnit *SU : Bot.Available) {
3905	if (SU->getDepth() > Rem.CriticalPath)
3906	Rem.CriticalPath = SU->getDepth();
3907	}
3908	LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << `'\n'`);
3909	if (DumpCriticalPathLength) {
3910	errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3911	}
3912	}
3913
3914	/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3915	///
3916	/// \param Cand provides the policy and current best candidate.
3917	/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3918	/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3919	bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
3920	SchedCandidate &TryCand) {
3921	// Initialize the candidate if needed.
3922	if (!Cand.isValid()) {
3923	TryCand.Reason = NodeOrder;
3924	return true;
3925	}
3926
3927	// Prioritize instructions that read unbuffered resources by stall cycles.
3928	if (tryLess(TryVal: Top.getLatencyStallCycles(SU: TryCand.SU),
3929	CandVal: Top.getLatencyStallCycles(SU: Cand.SU), TryCand, Cand, Reason: Stall))
3930	return TryCand.Reason != NoCand;
3931
3932	// Keep clustered nodes together.
3933	if (tryGreater(TryVal: TryCand.SU == DAG->getNextClusterSucc(),
3934	CandVal: Cand.SU == DAG->getNextClusterSucc(),
3935	TryCand, Cand, Reason: Cluster))
3936	return TryCand.Reason != NoCand;
3937
3938	// Avoid critical resource consumption and balance the schedule.
3939	if (tryLess(TryVal: TryCand.ResDelta.CritResources, CandVal: Cand.ResDelta.CritResources,
3940	TryCand, Cand, Reason: ResourceReduce))
3941	return TryCand.Reason != NoCand;
3942	if (tryGreater(TryVal: TryCand.ResDelta.DemandedResources,
3943	CandVal: Cand.ResDelta.DemandedResources,
3944	TryCand, Cand, Reason: ResourceDemand))
3945	return TryCand.Reason != NoCand;
3946
3947	// Avoid serializing long latency dependence chains.
3948	if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Zone&: Top)) {
3949	return TryCand.Reason != NoCand;
3950	}
3951
3952	// Fall through to original instruction order.
3953	if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3954	TryCand.Reason = NodeOrder;
3955	return true;
3956	}
3957
3958	return false;
3959	}
3960
3961	void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
3962	SchedCandidate &Cand) {
3963	ReadyQueue &Q = Zone.Available;
3964	for (SUnit *SU : Q) {
3965	SchedCandidate TryCand(Cand.Policy);
3966	TryCand.SU = SU;
3967	TryCand.AtTop = Zone.isTop();
3968	TryCand.initResourceDelta(DAG, SchedModel);
3969	if (tryCandidate(Cand, TryCand)) {
3970	Cand.setBest(TryCand);
3971	LLVM_DEBUG(traceCandidate(Cand));
3972	}
3973	}
3974	}
3975
3976	/// Pick the best candidate node from either the top or bottom queue.
3977	SUnit PostGenericScheduler::pickNodeBidirectional(bool* &IsTopNode) {
3978	// FIXME: This is similiar to GenericScheduler::pickNodeBidirectional. Factor
3979	// out common parts.
3980
3981	// Schedule as far as possible in the direction of no choice. This is most
3982	// efficient, but also provides the best heuristics for CriticalPSets.
3983	if (SUnit *SU = Bot.pickOnlyChoice()) {
3984	IsTopNode = false;
3985	tracePick(Reason: Only1, IsTop: false);
3986	return SU;
3987	}
3988	if (SUnit *SU = Top.pickOnlyChoice()) {
3989	IsTopNode = true;
3990	tracePick(Reason: Only1, IsTop: true);
3991	return SU;
3992	}
3993	// Set the bottom-up policy based on the state of the current bottom zone and
3994	// the instructions outside the zone, including the top zone.
3995	CandPolicy BotPolicy;
3996	setPolicy(Policy&: BotPolicy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: &Top);
3997	// Set the top-down policy based on the state of the current top zone and
3998	// the instructions outside the zone, including the bottom zone.
3999	CandPolicy TopPolicy;
4000	setPolicy(Policy&: TopPolicy, /IsPostRA=/true, CurrZone&: Top, OtherZone: &Bot);
4001
4002	// See if BotCand is still valid (because we previously scheduled from Top).
4003	LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
4004	if (!BotCand.isValid() \|\| BotCand.SU->isScheduled \|\|
4005	BotCand.Policy != BotPolicy) {
4006	BotCand.reset(NewPolicy: CandPolicy ());
4007	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4008	assert(BotCand.Reason != NoCand && "failed to find the first candidate");
4009	} else {
4010	LLVM_DEBUG(traceCandidate(BotCand));
4011	#ifndef NDEBUG
4012	if (VerifyScheduling) {
4013	SchedCandidate TCand;
4014	TCand.reset(NewPolicy: CandPolicy ());
4015	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4016	assert(TCand.SU == BotCand.SU &&
4017	"Last pick result should correspond to re-picking right now");
4018	}
4019	#endif
4020	}
4021
4022	// Check if the top Q has a better candidate.
4023	LLVM_DEBUG(dbgs() << "Picking from Top:\n");
4024	if (!TopCand.isValid() \|\| TopCand.SU->isScheduled \|\|
4025	TopCand.Policy != TopPolicy) {
4026	TopCand.reset(NewPolicy: CandPolicy ());
4027	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4028	assert(TopCand.Reason != NoCand && "failed to find the first candidate");
4029	} else {
4030	LLVM_DEBUG(traceCandidate(TopCand));
4031	#ifndef NDEBUG
4032	if (VerifyScheduling) {
4033	SchedCandidate TCand;
4034	TCand.reset(NewPolicy: CandPolicy ());
4035	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4036	assert(TCand.SU == TopCand.SU &&
4037	"Last pick result should correspond to re-picking right now");
4038	}
4039	#endif
4040	}
4041
4042	// Pick best from BotCand and TopCand.
4043	assert(BotCand.isValid());
4044	assert(TopCand.isValid());
4045	SchedCandidate Cand = BotCand;
4046	TopCand.Reason = NoCand;
4047	if (tryCandidate(Cand, TryCand&: TopCand)) {
4048	Cand.setBest(TopCand);
4049	LLVM_DEBUG(traceCandidate(Cand));
4050	}
4051
4052	IsTopNode = Cand.AtTop;
4053	tracePick(Cand);
4054	return Cand.SU;
4055	}
4056
4057	/// Pick the next node to schedule.
4058	SUnit PostGenericScheduler::pickNode(bool* &IsTopNode) {
4059	if (DAG->top() == DAG->bottom()) {
4060	assert(Top.Available.empty() && Top.Pending.empty() &&
4061	Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
4062	return nullptr;
4063	}
4064	SUnit *SU;
4065	do {
4066	if (RegionPolicy.OnlyBottomUp) {
4067	SU = Bot.pickOnlyChoice();
4068	if (SU) {
4069	tracePick(Reason: Only1, IsTop: true);
4070	} else {
4071	CandPolicy NoPolicy;
4072	BotCand.reset(NewPolicy: NoPolicy);
4073	// Set the bottom-up policy based on the state of the current bottom
4074	// zone and the instructions outside the zone, including the top zone.
4075	setPolicy(Policy&: BotCand.Policy, /IsPostRA=/true, CurrZone&: Bot, OtherZone: nullptr);
4076	pickNodeFromQueue(Zone&: Bot, Cand&: BotCand);
4077	assert(BotCand.Reason != NoCand && "failed to find a candidate");
4078	tracePick(Cand: BotCand);
4079	SU = BotCand.SU;
4080	}
4081	IsTopNode = false;
4082	} else if (RegionPolicy.OnlyTopDown) {
4083	SU = Top.pickOnlyChoice();
4084	if (SU) {
4085	tracePick(Reason: Only1, IsTop: true);
4086	} else {
4087	CandPolicy NoPolicy;
4088	TopCand.reset(NewPolicy: NoPolicy);
4089	// Set the top-down policy based on the state of the current top zone
4090	// and the instructions outside the zone, including the bottom zone.
4091	setPolicy(Policy&: TopCand.Policy, /IsPostRA=/true, CurrZone&: Top, OtherZone: nullptr);
4092	pickNodeFromQueue(Zone&: Top, Cand&: TopCand);
4093	assert(TopCand.Reason != NoCand && "failed to find a candidate");
4094	tracePick(Cand: TopCand);
4095	SU = TopCand.SU;
4096	}
4097	IsTopNode = true;
4098	} else {
4099	SU = pickNodeBidirectional(IsTopNode);
4100	}
4101	} while (SU->isScheduled);
4102
4103	if (SU->isTopReady())
4104	Top.removeReady(SU);
4105	if (SU->isBottomReady())
4106	Bot.removeReady(SU);
4107
4108	LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
4109	<< *SU->getInstr());
4110	return SU;
4111	}
4112
4113	/// Called after ScheduleDAGMI has scheduled an instruction and updated
4114	/// scheduled/remaining flags in the DAG nodes.
4115	void PostGenericScheduler::schedNode(SUnit SU, bool* IsTopNode) {
4116	if (IsTopNode) {
4117	SU->TopReadyCycle = std::max(a: SU->TopReadyCycle, b: Top.getCurrCycle());
4118	Top.bumpNode(SU);
4119	} else {
4120	SU->BotReadyCycle = std::max(a: SU->BotReadyCycle, b: Bot.getCurrCycle());
4121	Bot.bumpNode(SU);
4122	}
4123	}
4124
4125	ScheduleDAGMI llvm::createGenericSchedPostRA(MachineSchedContext C) {
4126	ScheduleDAGMI *DAG =
4127	new ScheduleDAGMI (C, std::make_unique<PostGenericScheduler>(args&: C),
4128	/RemoveKillFlags=/true);
4129	const TargetSubtargetInfo &STI = C->MF->getSubtarget();
4130	// Add MacroFusion mutation if fusions are not empty.
4131	const auto &MacroFusions = STI.getMacroFusions();
4132	if (!MacroFusions.empty())
4133	DAG->addMutation(Mutation: createMacroFusionDAGMutation(Predicates: MacroFusions));
4134	return DAG;
4135	}
4136
4137	//===----------------------------------------------------------------------===//
4138	// ILP Scheduler. Currently for experimental analysis of heuristics.
4139	//===----------------------------------------------------------------------===//
4140
4141	namespace {
4142
4143	/// Order nodes by the ILP metric.
4144	struct ILPOrder {
4145	const SchedDFSResult DFSResult = nullptr*;
4146	const BitVector ScheduledTrees = nullptr*;
4147	bool MaximizeILP;
4148
4149	ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
4150
4151	/// Apply a less-than relation on node priority.
4152	///
4153	/// (Return true if A comes after B in the Q.)
4154	bool operator()(const SUnit A, const* SUnit B) const* {
4155	unsigned SchedTreeA = DFSResult->getSubtreeID(SU: A);
4156	unsigned SchedTreeB = DFSResult->getSubtreeID(SU: B);
4157	if (SchedTreeA != SchedTreeB) {
4158	// Unscheduled trees have lower priority.
4159	if (ScheduledTrees->test(Idx: SchedTreeA) != ScheduledTrees->test(Idx: SchedTreeB))
4160	return ScheduledTrees->test(Idx: SchedTreeB);
4161
4162	// Trees with shallower connections have lower priority.
4163	if (DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4164	!= DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB)) {
4165	return DFSResult->getSubtreeLevel(SubtreeID: SchedTreeA)
4166	< DFSResult->getSubtreeLevel(SubtreeID: SchedTreeB);
4167	}
4168	}
4169	if (MaximizeILP)
4170	return DFSResult->getILP(SU: A) < DFSResult->getILP(SU: B);
4171	else
4172	return DFSResult->getILP(SU: A) > DFSResult->getILP(SU: B);
4173	}
4174	};
4175
4176	/// Schedule based on the ILP metric.
4177	class ILPScheduler : public MachineSchedStrategy {
4178	ScheduleDAGMILive DAG = nullptr*;
4179	ILPOrder Cmp;
4180
4181	std::vector<SUnit*> ReadyQ;
4182
4183	public:
4184	ILPScheduler(bool MaximizeILP) : Cmp (MaximizeILP) {}
4185
4186	void initialize(ScheduleDAGMI *dag) override {
4187	assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
4188	DAG = static_cast<ScheduleDAGMILive*>(dag);
4189	DAG->computeDFSResult();
4190	Cmp.DFSResult = DAG->getDFSResult();
4191	Cmp.ScheduledTrees = &DAG->getScheduledTrees();
4192	ReadyQ.clear();
4193	}
4194
4195	void registerRoots() override {
4196	// Restore the heap in ReadyQ with the updated DFS results.
4197	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4198	}
4199
4200	/// Implement MachineSchedStrategy interface.
4201	/// -----------------------------------------
4202
4203	/// Callback to select the highest priority node from the ready Q.
4204	SUnit pickNode(bool* &IsTopNode) override {
4205	if (ReadyQ.empty()) return nullptr;
4206	std::pop_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4207	SUnit *SU = ReadyQ.back();
4208	ReadyQ.pop_back();
4209	IsTopNode = false;
4210	LLVM_DEBUG(dbgs() << "Pick node "
4211	<< "SU(" << SU->NodeNum << ") "
4212	<< " ILP: " << DAG->getDFSResult()->getILP(SU)
4213	<< " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
4214	<< " @"
4215	<< DAG->getDFSResult()->getSubtreeLevel(
4216	DAG->getDFSResult()->getSubtreeID(SU))
4217	<< `'\n'`
4218	<< "Scheduling " << *SU->getInstr());
4219	return SU;
4220	}
4221
4222	/// Scheduler callback to notify that a new subtree is scheduled.
4223	void scheduleTree(unsigned SubtreeID) override {
4224	std::make_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4225	}
4226
4227	/// Callback after a node is scheduled. Mark a newly scheduled tree, notify
4228	/// DFSResults, and resort the priority Q.
4229	void schedNode(SUnit SU, bool* IsTopNode) override {
4230	assert(!IsTopNode && "SchedDFSResult needs bottom-up");
4231	}
4232
4233	void releaseTopNode(SUnit ) override { /only called for top roots/* }
4234
4235	void releaseBottomNode(SUnit *SU) override {
4236	ReadyQ.push_back(x: SU);
4237	std::push_heap(first: ReadyQ.begin(), last: ReadyQ.end(), comp: Cmp);
4238	}
4239	};
4240
4241	} // end anonymous namespace
4242
4243	static ScheduleDAGInstrs createILPMaxScheduler(MachineSchedContext C) {
4244	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: true));
4245	}
4246	static ScheduleDAGInstrs createILPMinScheduler(MachineSchedContext C) {
4247	return new ScheduleDAGMILive (C, std::make_unique<ILPScheduler>(args: false));
4248	}
4249
4250	static MachineSchedRegistry ILPMaxRegistry(
4251	"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
4252	static MachineSchedRegistry ILPMinRegistry(
4253	"ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
4254
4255	//===----------------------------------------------------------------------===//
4256	// Machine Instruction Shuffler for Correctness Testing
4257	//===----------------------------------------------------------------------===//
4258
4259	#ifndef NDEBUG
4260	namespace {
4261
4262	/// Apply a less-than relation on the node order, which corresponds to the
4263	/// instruction order prior to scheduling. IsReverse implements greater-than.
4264	template<bool IsReverse>
4265	struct SUnitOrder {
4266	bool operator()(SUnit A, SUnit B) const {
4267	if (IsReverse)
4268	return A->NodeNum > B->NodeNum;
4269	else
4270	return A->NodeNum < B->NodeNum;
4271	}
4272	};
4273
4274	/// Reorder instructions as much as possible.
4275	class InstructionShuffler : public MachineSchedStrategy {
4276	bool IsAlternating;
4277	bool IsTopDown;
4278
4279	// Using a less-than relation (SUnitOrder<false>) for the TopQ priority
4280	// gives nodes with a higher number higher priority causing the latest
4281	// instructions to be scheduled first.
4282	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<false>>
4283	TopQ;
4284
4285	// When scheduling bottom-up, use greater-than as the queue priority.
4286	PriorityQueue<SUnit, std::vector<SUnit>, SUnitOrder<true>>
4287	BottomQ;
4288
4289	public:
4290	InstructionShuffler(bool alternate, bool topdown)
4291	: IsAlternating(alternate), IsTopDown(topdown) {}
4292
4293	void initialize(ScheduleDAGMI*) override {
4294	TopQ.clear();
4295	BottomQ.clear();
4296	}
4297
4298	/// Implement MachineSchedStrategy interface.
4299	/// -----------------------------------------
4300
4301	SUnit pickNode(bool* &IsTopNode) override {
4302	SUnit *SU;
4303	if (IsTopDown) {
4304	do {
4305	if (TopQ.empty()) return nullptr;
4306	SU = TopQ.top();
4307	TopQ.pop();
4308	} while (SU->isScheduled);
4309	IsTopNode = true;
4310	} else {
4311	do {
4312	if (BottomQ.empty()) return nullptr;
4313	SU = BottomQ.top();
4314	BottomQ.pop();
4315	} while (SU->isScheduled);
4316	IsTopNode = false;
4317	}
4318	if (IsAlternating)
4319	IsTopDown = !IsTopDown;
4320	return SU;
4321	}
4322
4323	void schedNode(SUnit SU, bool* IsTopNode) override {}
4324
4325	void releaseTopNode(SUnit *SU) override {
4326	TopQ.push(x: SU);
4327	}
4328	void releaseBottomNode(SUnit *SU) override {
4329	BottomQ.push(x: SU);
4330	}
4331	};
4332
4333	} // end anonymous namespace
4334
4335	static ScheduleDAGInstrs createInstructionShuffler(MachineSchedContext C) {
4336	bool Alternate = !ForceTopDown && !ForceBottomUp;
4337	bool TopDown = !ForceBottomUp;
4338	assert((TopDown \|\| !ForceTopDown) &&
4339	"-misched-topdown incompatible with -misched-bottomup");
4340	return new ScheduleDAGMILive (
4341	C, std::make_unique<InstructionShuffler>(args&: Alternate, args&: TopDown));
4342	}
4343
4344	static MachineSchedRegistry ShufflerRegistry(
4345	"shuffle", "Shuffle machine instructions alternating directions",
4346	createInstructionShuffler);
4347	#endif // !NDEBUG
4348
4349	//===----------------------------------------------------------------------===//
4350	// GraphWriter support for ScheduleDAGMILive.
4351	//===----------------------------------------------------------------------===//
4352
4353	#ifndef NDEBUG
4354	namespace llvm {
4355
4356	template<> struct GraphTraits<
4357	ScheduleDAGMI> : public* GraphTraits<ScheduleDAG*> {};
4358
4359	template<>
4360	struct DOTGraphTraits<ScheduleDAGMI> : public* DefaultDOTGraphTraits {
4361	DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits (isSimple) {}
4362
4363	static std::string getGraphName(const ScheduleDAG *G) {
4364	return std::string (G->MF.getName());
4365	}
4366
4367	static bool renderGraphFromBottomUp() {
4368	return true;
4369	}
4370
4371	static bool isNodeHidden(const SUnit Node, const* ScheduleDAG *G) {
4372	if (ViewMISchedCutoff == `0`)
4373	return false;
4374	return (Node->Preds.size() > ViewMISchedCutoff
4375	\|\| Node->Succs.size() > ViewMISchedCutoff);
4376	}
4377
4378	/// If you want to override the dot attributes printed for a particular
4379	/// edge, override this method.
4380	static std::string getEdgeAttributes(const SUnit *Node,
4381	SUnitIterator EI,
4382	const ScheduleDAG *Graph) {
4383	if (EI.isArtificialDep())
4384	return "color=cyan,style=dashed";
4385	if (EI.isCtrlDep())
4386	return "color=blue,style=dashed";
4387	return "";
4388	}
4389
4390	static std::string getNodeLabel(const SUnit SU, const* ScheduleDAG *G) {
4391	std::string Str;
4392	raw_string_ostream SS(Str);
4393	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4394	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4395	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4396	SS << "SU:" << SU->NodeNum;
4397	if (DFS)
4398	SS << " I:" << DFS->getNumInstrs(SU);
4399	return SS.str();
4400	}
4401
4402	static std::string getNodeDescription(const SUnit SU, const* ScheduleDAG *G) {
4403	return G->getGraphNodeLabel(SU);
4404	}
4405
4406	static std::string getNodeAttributes(const SUnit N, const* ScheduleDAG *G) {
4407	std::string Str("shape=Mrecord");
4408	const ScheduleDAGMI DAG = static_cast<const* ScheduleDAGMI*>(G);
4409	const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4410	static_cast<const ScheduleDAGMILive>(G)->getDFSResult() : nullptr*;
4411	if (DFS) {
4412	Str += ",style=filled,fillcolor=\"#";
4413	Str += DOT::getColorString(NodeNumber: DFS->getSubtreeID(SU: N));
4414	Str += `'"'`;
4415	}
4416	return Str;
4417	}
4418	};
4419
4420	} // end namespace llvm
4421	#endif // NDEBUG
4422
4423	/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4424	/// rendered using 'dot'.
4425	void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4426	#ifndef NDEBUG
4427	ViewGraph(G: this, Name, ShortNames: false, Title);
4428	#else
4429	errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4430	<< "systems with Graphviz or gv!\n";
4431	#endif // NDEBUG
4432	}
4433
4434	/// Out-of-line implementation with no arguments is handy for gdb.
4435	void ScheduleDAGMI::viewGraph() {
4436	viewGraph(Name: getDAGName(), Title: "Scheduling-Units Graph for " + getDAGName());
4437	}
4438
4439	/// Sort predicate for the intervals stored in an instance of
4440	/// ResourceSegments. Intervals are always disjoint (no intersection
4441	/// for any pairs of intervals), therefore we can sort the totality of
4442	/// the intervals by looking only at the left boundary.
4443	static bool sortIntervals(const ResourceSegments::IntervalTy &A,
4444	const ResourceSegments::IntervalTy &B) {
4445	return A.first < B.first;
4446	}
4447
4448	unsigned ResourceSegments::getFirstAvailableAt(
4449	unsigned CurrCycle, unsigned AcquireAtCycle, unsigned ReleaseAtCycle,
4450	std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
4451	IntervalBuilder) const {
4452	assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4453	sortIntervals) &&
4454	"Cannot execute on an un-sorted set of intervals.");
4455
4456	// Zero resource usage is allowed by TargetSchedule.td but we do not construct
4457	// a ResourceSegment interval for that situation.
4458	if (AcquireAtCycle == ReleaseAtCycle)
4459	return CurrCycle;
4460
4461	unsigned RetCycle = CurrCycle;
4462	ResourceSegments::IntervalTy NewInterval =
4463	IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4464	for (auto &Interval : _Intervals) {
4465	if (!intersects(A: NewInterval, B: Interval))
4466	continue;
4467
4468	// Move the interval right next to the top of the one it
4469	// intersects.
4470	assert(Interval.second > NewInterval.first &&
4471	"Invalid intervals configuration.");
4472	RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
4473	NewInterval = IntervalBuilder (RetCycle, AcquireAtCycle, ReleaseAtCycle);
4474	}
4475	return RetCycle;
4476	}
4477
4478	void ResourceSegments::add(ResourceSegments::IntervalTy A,
4479	const unsigned CutOff) {
4480	assert(A.first <= A.second && "Cannot add negative resource usage");
4481	assert(CutOff > `0` && "0-size interval history has no use.");
4482	// Zero resource usage is allowed by TargetSchedule.td, in the case that the
4483	// instruction needed the resource to be available but does not use it.
4484	// However, ResourceSegment represents an interval that is closed on the left
4485	// and open on the right. It is impossible to represent an empty interval when
4486	// the left is closed. Do not add it to Intervals.
4487	if (A.first == A.second)
4488	return;
4489
4490	assert(all_of(_Intervals,
4491	[&A](const ResourceSegments::IntervalTy &Interval) -> bool {
4492	return !intersects(A, Interval);
4493	}) &&
4494	"A resource is being overwritten");
4495	_Intervals.push_back(x: A);
4496
4497	sortAndMerge();
4498
4499	// Do not keep the full history of the intervals, just the
4500	// latest #CutOff.
4501	while (_Intervals.size() > CutOff)
4502	_Intervals.pop_front();
4503	}
4504
4505	bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
4506	ResourceSegments::IntervalTy B) {
4507	assert(A.first <= A.second && "Invalid interval");
4508	assert(B.first <= B.second && "Invalid interval");
4509
4510	// Share one boundary.
4511	if ((A.first == B.first) \|\| (A.second == B.second))
4512	return true;
4513
4514	// full intersersect: [ ** ) B*
4515	// [*) A
4516	if ((A.first > B.first) && (A.second < B.second))
4517	return true;
4518
4519	// right intersect: [ *) B
4520	// [** ) A*
4521	if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
4522	return true;
4523
4524	// left intersect: [** ) B*
4525	// [ *) A
4526	if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
4527	return true;
4528
4529	return false;
4530	}
4531
4532	void ResourceSegments::sortAndMerge() {
4533	if (_Intervals.size() <= `1`)
4534	return;
4535
4536	// First sort the collection.
4537	_Intervals.sort(comp: sortIntervals);
4538
4539	// can use next because I have at least 2 elements in the list
4540	auto next = std::next(x: std::begin(cont&: _Intervals));
4541	auto E = std::end(cont&: _Intervals);
4542	for (; next != E; ++next) {
4543	if (std::prev(x: next)->second >= next ->first) {
4544	next ->first = std::prev(x: next)->first;
4545	_Intervals.erase(position: std::prev(x: next));
4546	continue;
4547	}
4548	}
4549	}
4550

source code of llvm/lib/CodeGen/MachineScheduler.cpp