ScheduleDAGInstrs.cpp source code [llvm/lib/CodeGen/ScheduleDAGInstrs.cpp]

1	//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	/// \file This implements the ScheduleDAGInstrs class, which implements
10	/// re-scheduling of MachineInstrs.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/CodeGen/ScheduleDAGInstrs.h"
15
16	#include "llvm/ADT/IntEqClasses.h"
17	#include "llvm/ADT/MapVector.h"
18	#include "llvm/ADT/SmallVector.h"
19	#include "llvm/ADT/SparseSet.h"
20	#include "llvm/ADT/iterator_range.h"
21	#include "llvm/Analysis/AliasAnalysis.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/CodeGen/LiveIntervals.h"
24	#include "llvm/CodeGen/LivePhysRegs.h"
25	#include "llvm/CodeGen/MachineBasicBlock.h"
26	#include "llvm/CodeGen/MachineFrameInfo.h"
27	#include "llvm/CodeGen/MachineFunction.h"
28	#include "llvm/CodeGen/MachineInstr.h"
29	#include "llvm/CodeGen/MachineInstrBundle.h"
30	#include "llvm/CodeGen/MachineMemOperand.h"
31	#include "llvm/CodeGen/MachineOperand.h"
32	#include "llvm/CodeGen/MachineRegisterInfo.h"
33	#include "llvm/CodeGen/PseudoSourceValue.h"
34	#include "llvm/CodeGen/RegisterPressure.h"
35	#include "llvm/CodeGen/ScheduleDAG.h"
36	#include "llvm/CodeGen/ScheduleDFS.h"
37	#include "llvm/CodeGen/SlotIndexes.h"
38	#include "llvm/CodeGen/TargetRegisterInfo.h"
39	#include "llvm/CodeGen/TargetSubtargetInfo.h"
40	#include "llvm/Config/llvm-config.h"
41	#include "llvm/IR/Constants.h"
42	#include "llvm/IR/Function.h"
43	#include "llvm/IR/Type.h"
44	#include "llvm/IR/Value.h"
45	#include "llvm/MC/LaneBitmask.h"
46	#include "llvm/MC/MCRegisterInfo.h"
47	#include "llvm/Support/Casting.h"
48	#include "llvm/Support/CommandLine.h"
49	#include "llvm/Support/Compiler.h"
50	#include "llvm/Support/Debug.h"
51	#include "llvm/Support/ErrorHandling.h"
52	#include "llvm/Support/Format.h"
53	#include "llvm/Support/raw_ostream.h"
54	#include <algorithm>
55	#include <cassert>
56	#include <iterator>
57	#include <utility>
58	#include <vector>
59
60	using namespace llvm;
61
62	#define DEBUG_TYPE "machine-scheduler"
63
64	static cl::opt<bool>
65	EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
66	cl::desc ("Enable use of AA during MI DAG construction"));
67
68	static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
69	cl::init(Val: true), cl::desc ("Enable use of TBAA during MI DAG construction"));
70
71	// Note: the two options below might be used in tuning compile time vs
72	// output quality. Setting HugeRegion so large that it will never be
73	// reached means best-effort, but may be slow.
74
75	// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
76	// together hold this many SUs, a reduction of maps will be done.
77	static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
78	cl::init(Val: `1000`), cl::desc ("The limit to use while constructing the DAG "
79	"prior to scheduling, at which point a trade-off "
80	"is made to avoid excessive compile time."));
81
82	static cl::opt<unsigned> ReductionSize(
83	"dag-maps-reduction-size", cl::Hidden,
84	cl::desc ("A huge scheduling region will have maps reduced by this many "
85	"nodes at a time. Defaults to HugeRegion / 2."));
86
87	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
88	static cl::opt<bool> SchedPrintCycles(
89	"sched-print-cycles", cl::Hidden, cl::init(Val: false),
90	cl::desc ("Report top/bottom cycles when dumping SUnit instances"));
91	#endif
92
93	static unsigned getReductionSize() {
94	// Always reduce a huge region with half of the elements, except
95	// when user sets this number explicitly.
96	if (ReductionSize.getNumOccurrences() == `0`)
97	return HugeRegion / `2`;
98	return ReductionSize;
99	}
100
101	static void dumpSUList(const ScheduleDAGInstrs::SUList &L) {
102	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
103	dbgs() << "{ ";
104	for (const SUnit *SU : L) {
105	dbgs() << "SU(" << SU->NodeNum << ")";
106	if (SU != L.back())
107	dbgs() << ", ";
108	}
109	dbgs() << "}\n";
110	#endif
111	}
112
113	ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
114	const MachineLoopInfo *mli,
115	bool RemoveKillFlags)
116	: ScheduleDAG (mf), MLI(mli), MFI(mf.getFrameInfo()),
117	RemoveKillFlags(RemoveKillFlags),
118	UnknownValue(UndefValue::get(
119	T: Type::getVoidTy(C&: mf.getFunction().getContext()))), Topo (SUnits, &ExitSU) {
120	DbgValues.clear();
121
122	const TargetSubtargetInfo &ST = mf.getSubtarget();
123	SchedModel.init(TSInfo: &ST);
124	}
125
126	/// If this machine instr has memory reference information and it can be
127	/// tracked to a normal reference to a known object, return the Value
128	/// for that object. This function returns false the memory location is
129	/// unknown or may alias anything.
130	static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
131	const MachineFrameInfo &MFI,
132	UnderlyingObjectsVector &Objects,
133	const DataLayout &DL) {
134	auto AllMMOsOkay = [&]() {
135	for (const MachineMemOperand *MMO : MI->memoperands()) {
136	// TODO: Figure out whether isAtomic is really necessary (see D57601).
137	if (MMO->isVolatile() \|\| MMO->isAtomic())
138	return false;
139
140	if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
141	// Function that contain tail calls don't have unique PseudoSourceValue
142	// objects. Two PseudoSourceValues might refer to the same or
143	// overlapping locations. The client code calling this function assumes
144	// this is not the case. So return a conservative answer of no known
145	// object.
146	if (MFI.hasTailCall())
147	return false;
148
149	// For now, ignore PseudoSourceValues which may alias LLVM IR values
150	// because the code that uses this function has no way to cope with
151	// such aliases.
152	if (PSV->isAliased(&MFI))
153	return false;
154
155	bool MayAlias = PSV->mayAlias(&MFI);
156	Objects.emplace_back(Args&: PSV, Args&: MayAlias);
157	} else if (const Value *V = MMO->getValue()) {
158	SmallVector<Value *, `4`> Objs;
159	if (!getUnderlyingObjectsForCodeGen(V, Objects&: Objs))
160	return false;
161
162	for (Value *V : Objs) {
163	assert(isIdentifiedObject(V));
164	Objects.emplace_back(Args&: V, Args: true);
165	}
166	} else
167	return false;
168	}
169	return true;
170	};
171
172	if (!AllMMOsOkay ()) {
173	Objects.clear();
174	return false;
175	}
176
177	return true;
178	}
179
180	void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
181	BB = bb;
182	}
183
184	void ScheduleDAGInstrs::finishBlock() {
185	// Subclasses should no longer refer to the old block.
186	BB = nullptr;
187	}
188
189	void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
190	MachineBasicBlock::iterator begin,
191	MachineBasicBlock::iterator end,
192	unsigned regioninstrs) {
193	assert(bb == BB && "startBlock should set BB");
194	RegionBegin = begin;
195	RegionEnd = end;
196	NumRegionInstrs = regioninstrs;
197	}
198
199	void ScheduleDAGInstrs::exitRegion() {
200	// Nothing to do.
201	}
202
203	void ScheduleDAGInstrs::addSchedBarrierDeps() {
204	MachineInstr *ExitMI =
205	RegionEnd != BB->end()
206	? &*skipDebugInstructionsBackward(It: RegionEnd, Begin: RegionBegin)
207	: nullptr;
208	ExitSU.setInstr(ExitMI);
209	// Add dependencies on the defs and uses of the instruction.
210	if (ExitMI) {
211	for (const MachineOperand &MO : ExitMI->all_uses()) {
212	Register Reg = MO.getReg();
213	if (Reg.isPhysical()) {
214	for (MCRegUnit Unit : TRI->regunits(Reg))
215	Uses.insert(Val: PhysRegSUOper (&ExitSU, -`1`, Unit));
216	} else if (Reg.isVirtual() && MO.readsReg()) {
217	addVRegUseDeps(SU: &ExitSU, OperIdx: MO.getOperandNo());
218	}
219	}
220	}
221	if (!ExitMI \|\| (!ExitMI->isCall() && !ExitMI->isBarrier())) {
222	// For others, e.g. fallthrough, conditional branch, assume the exit
223	// uses all the registers that are livein to the successor blocks.
224	for (const MachineBasicBlock *Succ : BB->successors()) {
225	for (const auto &LI : Succ->liveins()) {
226	for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
227	auto [Unit, Mask] = *U;
228	if ((Mask & LI.LaneMask).any() && !Uses.contains(Key: Unit))
229	Uses.insert(Val: PhysRegSUOper (&ExitSU, -`1`, Unit));
230	}
231	}
232	}
233	}
234	}
235
236	/// MO is an operand of SU's instruction that defines a physical register. Adds
237	/// data dependencies from SU to any uses of the physical register.
238	void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit SU, unsigned* OperIdx) {
239	const MachineOperand &MO = SU->getInstr()->getOperand(i: OperIdx);
240	assert(MO.isDef() && "expect physreg def");
241	Register Reg = MO.getReg();
242
243	// Ask the target if address-backscheduling is desirable, and if so how much.
244	const TargetSubtargetInfo &ST = MF.getSubtarget();
245
246	// Only use any non-zero latency for real defs/uses, in contrast to
247	// "fake" operands added by regalloc.
248	const MCInstrDesc &DefMIDesc = SU->getInstr()->getDesc();
249	bool ImplicitPseudoDef = (OperIdx >= DefMIDesc.getNumOperands() &&
250	!DefMIDesc.hasImplicitDefOfPhysReg(Reg));
251	for (MCRegUnit Unit : TRI->regunits(Reg)) {
252	for (RegUnit2SUnitsMap::iterator I = Uses.find(Key: Unit); I != Uses.end();
253	++I) {
254	SUnit *UseSU = I ->SU;
255	if (UseSU == SU)
256	continue;
257
258	// Adjust the dependence latency using operand def/use information,
259	// then allow the target to perform its own adjustments.
260	MachineInstr UseInstr = nullptr*;
261	int UseOpIdx = I ->OpIdx;
262	bool ImplicitPseudoUse = false;
263	SDep Dep;
264	if (UseOpIdx < `0`) {
265	Dep = SDep (SU, SDep::Artificial);
266	} else {
267	// Set the hasPhysRegDefs only for physreg defs that have a use within
268	// the scheduling region.
269	SU->hasPhysRegDefs = true;
270
271	UseInstr = UseSU->getInstr();
272	Register UseReg = UseInstr->getOperand(i: UseOpIdx).getReg();
273	const MCInstrDesc &UseMIDesc = UseInstr->getDesc();
274	ImplicitPseudoUse = UseOpIdx >= ((int)UseMIDesc.getNumOperands()) &&
275	!UseMIDesc.hasImplicitUseOfPhysReg(Reg: UseReg);
276
277	Dep = SDep (SU, SDep::Data, UseReg);
278	}
279	if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
280	Dep.setLatency(SchedModel.computeOperandLatency(DefMI: SU->getInstr(), DefOperIdx: OperIdx,
281	UseMI: UseInstr, UseOperIdx: UseOpIdx));
282	} else {
283	Dep.setLatency(`0`);
284	}
285	ST.adjustSchedDependency(Def: SU, DefOpIdx: OperIdx, Use: UseSU, UseOpIdx, Dep, SchedModel: &SchedModel);
286	UseSU->addPred(D: Dep);
287	}
288	}
289	}
290
291	/// Adds register dependencies (data, anti, and output) from this SUnit
292	/// to following instructions in the same scheduling region that depend the
293	/// physical register referenced at OperIdx.
294	void ScheduleDAGInstrs::addPhysRegDeps(SUnit SU, unsigned* OperIdx) {
295	MachineInstr *MI = SU->getInstr();
296	MachineOperand &MO = MI->getOperand(i: OperIdx);
297	Register Reg = MO.getReg();
298	// We do not need to track any dependencies for constant registers.
299	if (MRI.isConstantPhysReg(PhysReg: Reg))
300	return;
301
302	const TargetSubtargetInfo &ST = MF.getSubtarget();
303
304	// Optionally add output and anti dependencies. For anti
305	// dependencies we use a latency of 0 because for a multi-issue
306	// target we want to allow the defining instruction to issue
307	// in the same cycle as the using instruction.
308	// TODO: Using a latency of 1 here for output dependencies assumes
309	// there's no cost for reusing registers.
310	SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
311	for (MCRegUnit Unit : TRI->regunits(Reg)) {
312	for (RegUnit2SUnitsMap::iterator I = Defs.find(Key: Unit); I != Defs.end();
313	++I) {
314	SUnit *DefSU = I ->SU;
315	if (DefSU == &ExitSU)
316	continue;
317	MachineInstr *DefInstr = DefSU->getInstr();
318	MachineOperand &DefMO = DefInstr->getOperand(i: I ->OpIdx);
319	if (DefSU != SU &&
320	(Kind != SDep::Output \|\| !MO.isDead() \|\| !DefMO.isDead())) {
321	SDep Dep(SU, Kind, DefMO.getReg());
322	if (Kind != SDep::Anti) {
323	Dep.setLatency(
324	SchedModel.computeOutputLatency(DefMI: MI, DefOperIdx: OperIdx, DepMI: DefInstr));
325	}
326	ST.adjustSchedDependency(Def: SU, DefOpIdx: OperIdx, Use: DefSU, UseOpIdx: I ->OpIdx, Dep,
327	SchedModel: &SchedModel);
328	DefSU->addPred(D: Dep);
329	}
330	}
331	}
332
333	if (MO.isUse()) {
334	SU->hasPhysRegUses = true;
335	// Either insert a new Reg2SUnits entry with an empty SUnits list, or
336	// retrieve the existing SUnits list for this register's uses.
337	// Push this SUnit on the use list.
338	for (MCRegUnit Unit : TRI->regunits(Reg))
339	Uses.insert(Val: PhysRegSUOper (SU, OperIdx, Unit));
340	if (RemoveKillFlags)
341	MO.setIsKill(false);
342	} else {
343	addPhysRegDataDeps(SU, OperIdx);
344
345	// Clear previous uses and defs of this register and its subregisters.
346	for (MCRegUnit Unit : TRI->regunits(Reg)) {
347	Uses.eraseAll(K: Unit);
348	if (!MO.isDead())
349	Defs.eraseAll(K: Unit);
350	}
351
352	if (MO.isDead() && SU->isCall) {
353	// Calls will not be reordered because of chain dependencies (see
354	// below). Since call operands are dead, calls may continue to be added
355	// to the DefList making dependence checking quadratic in the size of
356	// the block. Instead, we leave only one call at the back of the
357	// DefList.
358	for (MCRegUnit Unit : TRI->regunits(Reg)) {
359	RegUnit2SUnitsMap::RangePair P = Defs.equal_range(K: Unit);
360	RegUnit2SUnitsMap::iterator B = P.first;
361	RegUnit2SUnitsMap::iterator I = P.second;
362	for (bool isBegin = I == B; !isBegin; / empty /) {
363	isBegin = (--I) == B;
364	if (!I ->SU->isCall)
365	break;
366	I = Defs.erase(I);
367	}
368	}
369	}
370
371	// Defs are pushed in the order they are visited and never reordered.
372	for (MCRegUnit Unit : TRI->regunits(Reg))
373	Defs.insert(Val: PhysRegSUOper (SU, OperIdx, Unit));
374	}
375	}
376
377	LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
378	{
379	Register Reg = MO.getReg();
380	// No point in tracking lanemasks if we don't have interesting subregisters.
381	const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
382	if (!RC.HasDisjunctSubRegs)
383	return LaneBitmask::getAll();
384
385	unsigned SubReg = MO.getSubReg();
386	if (SubReg == `0`)
387	return RC.getLaneMask();
388	return TRI->getSubRegIndexLaneMask(SubIdx: SubReg);
389	}
390
391	bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
392	auto RegUse = CurrentVRegUses.find(Key: MO.getReg());
393	if (RegUse == CurrentVRegUses.end())
394	return true;
395	return (RegUse ->LaneMask & getLaneMaskForMO(MO)).none();
396	}
397
398	/// Adds register output and data dependencies from this SUnit to instructions
399	/// that occur later in the same scheduling region if they read from or write to
400	/// the virtual register defined at OperIdx.
401	///
402	/// TODO: Hoist loop induction variable increments. This has to be
403	/// reevaluated. Generally, IV scheduling should be done before coalescing.
404	void ScheduleDAGInstrs::addVRegDefDeps(SUnit SU, unsigned* OperIdx) {
405	MachineInstr *MI = SU->getInstr();
406	MachineOperand &MO = MI->getOperand(i: OperIdx);
407	Register Reg = MO.getReg();
408
409	LaneBitmask DefLaneMask;
410	LaneBitmask KillLaneMask;
411	if (TrackLaneMasks) {
412	bool IsKill = MO.getSubReg() == `0` \|\| MO.isUndef();
413	DefLaneMask = getLaneMaskForMO(MO);
414	// If we have a <read-undef> flag, none of the lane values comes from an
415	// earlier instruction.
416	KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
417
418	if (MO.getSubReg() != `0` && MO.isUndef()) {
419	// There may be other subregister defs on the same instruction of the same
420	// register in later operands. The lanes of other defs will now be live
421	// after this instruction, so these should not be treated as killed by the
422	// instruction even though they appear to be killed in this one operand.
423	for (const MachineOperand &OtherMO :
424	llvm::drop_begin(RangeOrContainer: MI->operands(), N: OperIdx + `1`))
425	if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
426	KillLaneMask &= ~getLaneMaskForMO(MO: OtherMO);
427	}
428
429	// Clear undef flag, we'll re-add it later once we know which subregister
430	// Def is first.
431	MO.setIsUndef(false);
432	} else {
433	DefLaneMask = LaneBitmask::getAll();
434	KillLaneMask = LaneBitmask::getAll();
435	}
436
437	if (MO.isDead()) {
438	assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
439	} else {
440	// Add data dependence to all uses we found so far.
441	const TargetSubtargetInfo &ST = MF.getSubtarget();
442	for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Key: Reg),
443	E = CurrentVRegUses.end(); I != E; /empty/) {
444	LaneBitmask LaneMask = I ->LaneMask;
445	// Ignore uses of other lanes.
446	if ((LaneMask & KillLaneMask).none()) {
447	++I;
448	continue;
449	}
450
451	if ((LaneMask & DefLaneMask).any()) {
452	SUnit *UseSU = I ->SU;
453	MachineInstr *Use = UseSU->getInstr();
454	SDep Dep(SU, SDep::Data, Reg);
455	Dep.setLatency(SchedModel.computeOperandLatency(DefMI: MI, DefOperIdx: OperIdx, UseMI: Use,
456	UseOperIdx: I ->OperandIndex));
457	ST.adjustSchedDependency(Def: SU, DefOpIdx: OperIdx, Use: UseSU, UseOpIdx: I ->OperandIndex, Dep,
458	SchedModel: &SchedModel);
459	UseSU->addPred(D: Dep);
460	}
461
462	LaneMask &= ~KillLaneMask;
463	// If we found a Def for all lanes of this use, remove it from the list.
464	if (LaneMask.any()) {
465	I ->LaneMask = LaneMask;
466	++I;
467	} else
468	I = CurrentVRegUses.erase(I);
469	}
470	}
471
472	// Shortcut: Singly defined vregs do not have output/anti dependencies.
473	if (MRI.hasOneDef(RegNo: Reg))
474	return;
475
476	// Add output dependence to the next nearest defs of this vreg.
477	//
478	// Unless this definition is dead, the output dependence should be
479	// transitively redundant with antidependencies from this definition's
480	// uses. We're conservative for now until we have a way to guarantee the uses
481	// are not eliminated sometime during scheduling. The output dependence edge
482	// is also useful if output latency exceeds def-use latency.
483	LaneBitmask LaneMask = DefLaneMask;
484	for (VReg2SUnit &V2SU : make_range(x: CurrentVRegDefs.find(Key: Reg),
485	y: CurrentVRegDefs.end())) {
486	// Ignore defs for other lanes.
487	if ((V2SU.LaneMask & LaneMask).none())
488	continue;
489	// Add an output dependence.
490	SUnit *DefSU = V2SU.SU;
491	// Ignore additional defs of the same lanes in one instruction. This can
492	// happen because lanemasks are shared for targets with too many
493	// subregisters. We also use some representration tricks/hacks where we
494	// add super-register defs/uses, to imply that although we only access parts
495	// of the reg we care about the full one.
496	if (DefSU == SU)
497	continue;
498	SDep Dep(SU, SDep::Output, Reg);
499	Dep.setLatency(
500	SchedModel.computeOutputLatency(DefMI: MI, DefOperIdx: OperIdx, DepMI: DefSU->getInstr()));
501	DefSU->addPred(D: Dep);
502
503	// Update current definition. This can get tricky if the def was about a
504	// bigger lanemask before. We then have to shrink it and create a new
505	// VReg2SUnit for the non-overlapping part.
506	LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
507	LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
508	V2SU.SU = SU;
509	V2SU.LaneMask = OverlapMask;
510	if (NonOverlapMask.any())
511	CurrentVRegDefs.insert(Val: VReg2SUnit (Reg, NonOverlapMask, DefSU));
512	}
513	// If there was no CurrentVRegDefs entry for some lanes yet, create one.
514	if (LaneMask.any())
515	CurrentVRegDefs.insert(Val: VReg2SUnit (Reg, LaneMask, SU));
516	}
517
518	/// Adds a register data dependency if the instruction that defines the
519	/// virtual register used at OperIdx is mapped to an SUnit. Add a register
520	/// antidependency from this SUnit to instructions that occur later in the same
521	/// scheduling region if they write the virtual register.
522	///
523	/// TODO: Handle ExitSU "uses" properly.
524	void ScheduleDAGInstrs::addVRegUseDeps(SUnit SU, unsigned* OperIdx) {
525	const MachineInstr *MI = SU->getInstr();
526	assert(!MI->isDebugOrPseudoInstr());
527
528	const MachineOperand &MO = MI->getOperand(i: OperIdx);
529	Register Reg = MO.getReg();
530
531	// Remember the use. Data dependencies will be added when we find the def.
532	LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
533	: LaneBitmask::getAll();
534	CurrentVRegUses.insert(Val: VReg2SUnitOperIdx (Reg, LaneMask, OperIdx, SU));
535
536	// Add antidependences to the following defs of the vreg.
537	for (VReg2SUnit &V2SU : make_range(x: CurrentVRegDefs.find(Key: Reg),
538	y: CurrentVRegDefs.end())) {
539	// Ignore defs for unrelated lanes.
540	LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
541	if ((PrevDefLaneMask & LaneMask).none())
542	continue;
543	if (V2SU.SU == SU)
544	continue;
545
546	V2SU.SU->addPred(D: SDep (SU, SDep::Anti, Reg));
547	}
548	}
549
550	/// Returns true if MI is an instruction we are unable to reason about
551	/// (like a call or something with unmodeled side effects).
552	static inline bool isGlobalMemoryObject(MachineInstr *MI) {
553	return MI->isCall() \|\| MI->hasUnmodeledSideEffects() \|\|
554	(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
555	}
556
557	void ScheduleDAGInstrs::addChainDependency (SUnit SUa, SUnit SUb,
558	unsigned Latency) {
559	if (SUa->getInstr()->mayAlias(AA: AAForDep, Other: *SUb->getInstr(), UseTBAA)) {
560	SDep Dep(SUa, SDep::MayAliasMem);
561	Dep.setLatency(Latency);
562	SUb->addPred(D: Dep);
563	}
564	}
565
566	/// Creates an SUnit for each real instruction, numbered in top-down
567	/// topological order. The instruction order A < B, implies that no edge exists
568	/// from B to A.
569	///
570	/// Map each real instruction to its SUnit.
571	///
572	/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
573	/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
574	/// instead of pointers.
575	///
576	/// MachineScheduler relies on initSUnits numbering the nodes by their order in
577	/// the original instruction list.
578	void ScheduleDAGInstrs::initSUnits() {
579	// We'll be allocating one SUnit for each real instruction in the region,
580	// which is contained within a basic block.
581	SUnits.reserve(n: NumRegionInstrs);
582
583	for (MachineInstr &MI : make_range(x: RegionBegin, y: RegionEnd)) {
584	if (MI.isDebugOrPseudoInstr())
585	continue;
586
587	SUnit *SU = newSUnit(MI: &MI);
588	MISUnitMap [&MI] = SU;
589
590	SU->isCall = MI.isCall();
591	SU->isCommutable = MI.isCommutable();
592
593	// Assign the Latency field of SU using target-provided information.
594	SU->Latency = SchedModel.computeInstrLatency(MI: SU->getInstr());
595
596	// If this SUnit uses a reserved or unbuffered resource, mark it as such.
597	//
598	// Reserved resources block an instruction from issuing and stall the
599	// entire pipeline. These are identified by BufferSize=0.
600	//
601	// Unbuffered resources prevent execution of subsequent instructions that
602	// require the same resources. This is used for in-order execution pipelines
603	// within an out-of-order core. These are identified by BufferSize=1.
604	if (SchedModel.hasInstrSchedModel()) {
605	const MCSchedClassDesc *SC = getSchedClass(SU);
606	for (const MCWriteProcResEntry &PRE :
607	make_range(x: SchedModel.getWriteProcResBegin(SC),
608	y: SchedModel.getWriteProcResEnd(SC))) {
609	switch (SchedModel.getProcResource(PIdx: PRE.ProcResourceIdx)->BufferSize) {
610	case `0`:
611	SU->hasReservedResource = true;
612	break;
613	case `1`:
614	SU->isUnbuffered = true;
615	break;
616	default:
617	break;
618	}
619	}
620	}
621	}
622	}
623
624	class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
625	/// Current total number of SUs in map.
626	unsigned NumNodes = `0`;
627
628	/// 1 for loads, 0 for stores. (see comment in SUList)
629	unsigned TrueMemOrderLatency;
630
631	public:
632	Value2SUsMap(unsigned lat = `0`) : TrueMemOrderLatency(lat) {}
633
634	/// To keep NumNodes up to date, insert() is used instead of
635	/// this operator w/ push_back().
636	ValueType &operator[](const SUList &Key) {
637	llvm_unreachable("Don't use. Use insert() instead."); };
638
639	/// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
640	/// reduce().
641	void inline insert(SUnit *SU, ValueType V) {
642	MapVector::operator[](Key: V).push_back(x: SU);
643	NumNodes++;
644	}
645
646	/// Clears the list of SUs mapped to V.
647	void inline clearList(ValueType V) {
648	iterator Itr = find(Key: V);
649	if (Itr != end()) {
650	assert(NumNodes >= Itr->second.size());
651	NumNodes -= Itr->second.size();
652
653	Itr->second.clear();
654	}
655	}
656
657	/// Clears map from all contents.
658	void clear() {
659	MapVector<ValueType, SUList>::clear();
660	NumNodes = `0`;
661	}
662
663	unsigned inline size() const { return NumNodes; }
664
665	/// Counts the number of SUs in this map after a reduction.
666	void reComputeSize() {
667	NumNodes = `0`;
668	for (auto &I : *this)
669	NumNodes += I.second.size();
670	}
671
672	unsigned inline getTrueMemOrderLatency() const {
673	return TrueMemOrderLatency;
674	}
675
676	void dump();
677	};
678
679	void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
680	Value2SUsMap &Val2SUsMap) {
681	for (auto &I : Val2SUsMap)
682	addChainDependencies(SU, SUs&: I.second,
683	Latency: Val2SUsMap.getTrueMemOrderLatency());
684	}
685
686	void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
687	Value2SUsMap &Val2SUsMap,
688	ValueType V) {
689	Value2SUsMap::iterator Itr = Val2SUsMap.find(Key: V);
690	if (Itr != Val2SUsMap.end())
691	addChainDependencies(SU, SUs&: Itr->second,
692	Latency: Val2SUsMap.getTrueMemOrderLatency());
693	}
694
695	void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
696	assert(BarrierChain != nullptr);
697
698	for (auto &[V, SUs] : map) {
699	(void)V;
700	for (auto *SU : SUs)
701	SU->addPredBarrier(SU: BarrierChain);
702	}
703	map.clear();
704	}
705
706	void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
707	assert(BarrierChain != nullptr);
708
709	// Go through all lists of SUs.
710	for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
711	Value2SUsMap::iterator CurrItr = I++;
712	SUList &sus = CurrItr->second;
713	SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
714	for (; SUItr != SUEE; ++SUItr) {
715	// Stop on BarrierChain or any instruction above it.
716	if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
717	break;
718
719	(*SUItr)->addPredBarrier(SU: BarrierChain);
720	}
721
722	// Remove also the BarrierChain from list if present.
723	if (SUItr != SUEE && *SUItr == BarrierChain)
724	SUItr ++;
725
726	// Remove all SUs that are now successors of BarrierChain.
727	if (SUItr != sus.begin())
728	sus.erase(first: sus.begin(), last: SUItr);
729	}
730
731	// Remove all entries with empty su lists.
732	map.remove_if(Pred: [&](std::pair<ValueType, SUList> &mapEntry) {
733	return (mapEntry.second.empty()); });
734
735	// Recompute the size of the map (NumNodes).
736	map.reComputeSize();
737	}
738
739	void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
740	RegPressureTracker *RPTracker,
741	PressureDiffs *PDiffs,
742	LiveIntervals *LIS,
743	bool TrackLaneMasks) {
744	const TargetSubtargetInfo &ST = MF.getSubtarget();
745	bool UseAA = EnableAASchedMI.getNumOccurrences() > `0` ? EnableAASchedMI
746	: ST.useAA();
747	AAForDep = UseAA ? AA : nullptr;
748
749	BarrierChain = nullptr;
750
751	this->TrackLaneMasks = TrackLaneMasks;
752	MISUnitMap.clear();
753	ScheduleDAG::clearDAG();
754
755	// Create an SUnit for each real instruction.
756	initSUnits();
757
758	if (PDiffs)
759	PDiffs->init(N: SUnits.size());
760
761	// We build scheduling units by walking a block's instruction list
762	// from bottom to top.
763
764	// Each MIs' memory operand(s) is analyzed to a list of underlying
765	// objects. The SU is then inserted in the SUList(s) mapped from the
766	// Value(s). Each Value thus gets mapped to lists of SUs depending
767	// on it, stores and loads kept separately. Two SUs are trivially
768	// non-aliasing if they both depend on only identified Values and do
769	// not share any common Value.
770	Value2SUsMap Stores, Loads(`1` /TrueMemOrderLatency/);
771
772	// Certain memory accesses are known to not alias any SU in Stores
773	// or Loads, and have therefore their own 'NonAlias'
774	// domain. E.g. spill / reload instructions never alias LLVM I/R
775	// Values. It would be nice to assume that this type of memory
776	// accesses always have a proper memory operand modelling, and are
777	// therefore never unanalyzable, but this is conservatively not
778	// done.
779	Value2SUsMap NonAliasStores, NonAliasLoads(`1` /TrueMemOrderLatency/);
780
781	// Track all instructions that may raise floating-point exceptions.
782	// These do not depend on one other (or normal loads or stores), but
783	// must not be rescheduled across global barriers. Note that we don't
784	// really need a "map" here since we don't track those MIs by value;
785	// using the same Value2SUsMap data type here is simply a matter of
786	// convenience.
787	Value2SUsMap FPExceptions;
788
789	// Remove any stale debug info; sometimes BuildSchedGraph is called again
790	// without emitting the info from the previous call.
791	DbgValues.clear();
792	FirstDbgValue = nullptr;
793
794	assert(Defs.empty() && Uses.empty() &&
795	"Only BuildGraph should update Defs/Uses");
796	Defs.setUniverse(TRI->getNumRegs());
797	Uses.setUniverse(TRI->getNumRegs());
798
799	assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
800	assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
801	unsigned NumVirtRegs = MRI.getNumVirtRegs();
802	CurrentVRegDefs.setUniverse(NumVirtRegs);
803	CurrentVRegUses.setUniverse(NumVirtRegs);
804
805	// Model data dependencies between instructions being scheduled and the
806	// ExitSU.
807	addSchedBarrierDeps();
808
809	// Walk the list of instructions, from bottom moving up.
810	MachineInstr DbgMI = nullptr*;
811	for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
812	MII != MIE; --MII) {
813	MachineInstr &MI = *std::prev(x: MII);
814	if (DbgMI) {
815	DbgValues.emplace_back(args&: DbgMI, args: &MI);
816	DbgMI = nullptr;
817	}
818
819	if (MI.isDebugValue() \|\| MI.isDebugPHI()) {
820	DbgMI = &MI;
821	continue;
822	}
823
824	if (MI.isDebugLabel() \|\| MI.isDebugRef() \|\| MI.isPseudoProbe())
825	continue;
826
827	SUnit *SU = MISUnitMap [&MI];
828	assert(SU && "No SUnit mapped to this MI");
829
830	if (RPTracker) {
831	RegisterOperands RegOpers;
832	RegOpers.collect(MI, TRI: TRI, MRI, TrackLaneMasks, IgnoreDead: false*);
833	if (TrackLaneMasks) {
834	SlotIndex SlotIdx = LIS->getInstructionIndex(Instr: MI);
835	RegOpers.adjustLaneLiveness(LIS: *LIS, MRI, Pos: SlotIdx);
836	}
837	if (PDiffs != nullptr)
838	PDiffs->addInstruction(Idx: SU->NodeNum, RegOpers, MRI);
839
840	if (RPTracker->getPos() == RegionEnd \|\| &*RPTracker->getPos() != &MI)
841	RPTracker->recedeSkipDebugValues();
842	assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
843	RPTracker->recede(RegOpers);
844	}
845
846	assert(
847	(CanHandleTerminators \|\| (!MI.isTerminator() && !MI.isPosition())) &&
848	"Cannot schedule terminators or labels!");
849
850	// Add register-based dependencies (data, anti, and output).
851	// For some instructions (calls, returns, inline-asm, etc.) there can
852	// be explicit uses and implicit defs, in which case the use will appear
853	// on the operand list before the def. Do two passes over the operand
854	// list to make sure that defs are processed before any uses.
855	bool HasVRegDef = false;
856	for (unsigned j = `0`, n = MI.getNumOperands(); j != n; ++j) {
857	const MachineOperand &MO = MI.getOperand(i: j);
858	if (!MO.isReg() \|\| !MO.isDef())
859	continue;
860	Register Reg = MO.getReg();
861	if (Reg.isPhysical()) {
862	addPhysRegDeps(SU, OperIdx: j);
863	} else if (Reg.isVirtual()) {
864	HasVRegDef = true;
865	addVRegDefDeps(SU, OperIdx: j);
866	}
867	}
868	// Now process all uses.
869	for (unsigned j = `0`, n = MI.getNumOperands(); j != n; ++j) {
870	const MachineOperand &MO = MI.getOperand(i: j);
871	// Only look at use operands.
872	// We do not need to check for MO.readsReg() here because subsequent
873	// subregister defs will get output dependence edges and need no
874	// additional use dependencies.
875	if (!MO.isReg() \|\| !MO.isUse())
876	continue;
877	Register Reg = MO.getReg();
878	if (Reg.isPhysical()) {
879	addPhysRegDeps(SU, OperIdx: j);
880	} else if (Reg.isVirtual() && MO.readsReg()) {
881	addVRegUseDeps(SU, OperIdx: j);
882	}
883	}
884
885	// If we haven't seen any uses in this scheduling region, create a
886	// dependence edge to ExitSU to model the live-out latency. This is required
887	// for vreg defs with no in-region use, and prefetches with no vreg def.
888	//
889	// FIXME: NumDataSuccs would be more precise than NumSuccs here. This
890	// check currently relies on being called before adding chain deps.
891	if (SU->NumSuccs == `0` && SU->Latency > `1` && (HasVRegDef \|\| MI.mayLoad())) {
892	SDep Dep(SU, SDep::Artificial);
893	Dep.setLatency(SU->Latency - `1`);
894	ExitSU.addPred(D: Dep);
895	}
896
897	// Add memory dependencies (Note: isStoreToStackSlot and
898	// isLoadFromStackSLot are not usable after stack slots are lowered to
899	// actual addresses).
900
901	// This is a barrier event that acts as a pivotal node in the DAG.
902	if (isGlobalMemoryObject(MI: &MI)) {
903
904	// Become the barrier chain.
905	if (BarrierChain)
906	BarrierChain->addPredBarrier(SU);
907	BarrierChain = SU;
908
909	LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
910	<< BarrierChain->NodeNum << ").\n";);
911
912	// Add dependencies against everything below it and clear maps.
913	addBarrierChain(map&: Stores);
914	addBarrierChain(map&: Loads);
915	addBarrierChain(map&: NonAliasStores);
916	addBarrierChain(map&: NonAliasLoads);
917	addBarrierChain(map&: FPExceptions);
918
919	continue;
920	}
921
922	// Instructions that may raise FP exceptions may not be moved
923	// across any global barriers.
924	if (MI.mayRaiseFPException()) {
925	if (BarrierChain)
926	BarrierChain->addPredBarrier(SU);
927
928	FPExceptions.insert(SU, V: UnknownValue);
929
930	if (FPExceptions.size() >= HugeRegion) {
931	LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
932	Value2SUsMap empty;
933	reduceHugeMemNodeMaps(stores&: FPExceptions, loads&: empty, N: getReductionSize());
934	}
935	}
936
937	// If it's not a store or a variant load, we're done.
938	if (!MI.mayStore() &&
939	!(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
940	continue;
941
942	// Always add dependecy edge to BarrierChain if present.
943	if (BarrierChain)
944	BarrierChain->addPredBarrier(SU);
945
946	// Find the underlying objects for MI. The Objs vector is either
947	// empty, or filled with the Values of memory locations which this
948	// SU depends on.
949	UnderlyingObjectsVector Objs;
950	bool ObjsFound = getUnderlyingObjectsForInstr(MI: &MI, MFI, Objects&: Objs,
951	DL: MF.getDataLayout());
952
953	if (MI.mayStore()) {
954	if (!ObjsFound) {
955	// An unknown store depends on all stores and loads.
956	addChainDependencies(SU, Val2SUsMap&: Stores);
957	addChainDependencies(SU, Val2SUsMap&: NonAliasStores);
958	addChainDependencies(SU, Val2SUsMap&: Loads);
959	addChainDependencies(SU, Val2SUsMap&: NonAliasLoads);
960
961	// Map this store to 'UnknownValue'.
962	Stores.insert(SU, V: UnknownValue);
963	} else {
964	// Add precise dependencies against all previously seen memory
965	// accesses mapped to the same Value(s).
966	for (const UnderlyingObject &UnderlObj : Objs) {
967	ValueType V = UnderlObj.getValue();
968	bool ThisMayAlias = UnderlObj.mayAlias();
969
970	// Add dependencies to previous stores and loads mapped to V.
971	addChainDependencies(SU, Val2SUsMap&: (ThisMayAlias ? Stores : NonAliasStores), V);
972	addChainDependencies(SU, Val2SUsMap&: (ThisMayAlias ? Loads : NonAliasLoads), V);
973	}
974	// Update the store map after all chains have been added to avoid adding
975	// self-loop edge if multiple underlying objects are present.
976	for (const UnderlyingObject &UnderlObj : Objs) {
977	ValueType V = UnderlObj.getValue();
978	bool ThisMayAlias = UnderlObj.mayAlias();
979
980	// Map this store to V.
981	(ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
982	}
983	// The store may have dependencies to unanalyzable loads and
984	// stores.
985	addChainDependencies(SU, Val2SUsMap&: Loads, V: UnknownValue);
986	addChainDependencies(SU, Val2SUsMap&: Stores, V: UnknownValue);
987	}
988	} else { // SU is a load.
989	if (!ObjsFound) {
990	// An unknown load depends on all stores.
991	addChainDependencies(SU, Val2SUsMap&: Stores);
992	addChainDependencies(SU, Val2SUsMap&: NonAliasStores);
993
994	Loads.insert(SU, V: UnknownValue);
995	} else {
996	for (const UnderlyingObject &UnderlObj : Objs) {
997	ValueType V = UnderlObj.getValue();
998	bool ThisMayAlias = UnderlObj.mayAlias();
999
1000	// Add precise dependencies against all previously seen stores
1001	// mapping to the same Value(s).
1002	addChainDependencies(SU, Val2SUsMap&: (ThisMayAlias ? Stores : NonAliasStores), V);
1003
1004	// Map this load to V.
1005	(ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
1006	}
1007	// The load may have dependencies to unanalyzable stores.
1008	addChainDependencies(SU, Val2SUsMap&: Stores, V: UnknownValue);
1009	}
1010	}
1011
1012	// Reduce maps if they grow huge.
1013	if (Stores.size() + Loads.size() >= HugeRegion) {
1014	LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1015	reduceHugeMemNodeMaps(stores&: Stores, loads&: Loads, N: getReductionSize());
1016	}
1017	if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1018	LLVM_DEBUG(
1019	dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1020	reduceHugeMemNodeMaps(stores&: NonAliasStores, loads&: NonAliasLoads, N: getReductionSize());
1021	}
1022	}
1023
1024	if (DbgMI)
1025	FirstDbgValue = DbgMI;
1026
1027	Defs.clear();
1028	Uses.clear();
1029	CurrentVRegDefs.clear();
1030	CurrentVRegUses.clear();
1031
1032	Topo.MarkDirty();
1033	}
1034
1035	raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
1036	PSV->printCustom(O&: OS);
1037	return OS;
1038	}
1039
1040	void ScheduleDAGInstrs::Value2SUsMap::dump() {
1041	for (const auto &[ValType, SUs] : *this) {
1042	if (isa<const Value *>(Val: ValType)) {
1043	const Value V = cast<const* Value *>(Val: ValType);
1044	if (isa<UndefValue>(Val: V))
1045	dbgs() << "Unknown";
1046	else
1047	V->printAsOperand(O&: dbgs());
1048	} else if (isa<const PseudoSourceValue *>(Val: ValType))
1049	dbgs() << cast<const PseudoSourceValue *>(Val: ValType);
1050	else
1051	llvm_unreachable("Unknown Value type.");
1052
1053	dbgs() << " : ";
1054	dumpSUList(L: SUs);
1055	}
1056	}
1057
1058	void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
1059	Value2SUsMap &loads, unsigned N) {
1060	LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1061	dbgs() << "Loading SUnits:\n"; loads.dump());
1062
1063	// Insert all SU's NodeNums into a vector and sort it.
1064	std::vector<unsigned> NodeNums;
1065	NodeNums.reserve(n: stores.size() + loads.size());
1066	for (const auto &[V, SUs] : stores) {
1067	(void)V;
1068	for (const auto *SU : SUs)
1069	NodeNums.push_back(x: SU->NodeNum);
1070	}
1071	for (const auto &[V, SUs] : loads) {
1072	(void)V;
1073	for (const auto *SU : SUs)
1074	NodeNums.push_back(x: SU->NodeNum);
1075	}
1076	llvm::sort(C&: NodeNums);
1077
1078	// The N last elements in NodeNums will be removed, and the SU with
1079	// the lowest NodeNum of them will become the new BarrierChain to
1080	// let the not yet seen SUs have a dependency to the removed SUs.
1081	assert(N <= NodeNums.size());
1082	SUnit newBarrierChain = &SUnits [(NodeNums.end() - N)];
1083	if (BarrierChain) {
1084	// The aliasing and non-aliasing maps reduce independently of each
1085	// other, but share a common BarrierChain. Check if the
1086	// newBarrierChain is above the former one. If it is not, it may
1087	// introduce a loop to use newBarrierChain, so keep the old one.
1088	if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1089	BarrierChain->addPredBarrier(SU: newBarrierChain);
1090	BarrierChain = newBarrierChain;
1091	LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1092	<< BarrierChain->NodeNum << ").\n";);
1093	}
1094	else
1095	LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1096	<< BarrierChain->NodeNum << ").\n";);
1097	}
1098	else
1099	BarrierChain = newBarrierChain;
1100
1101	insertBarrierChain(map&: stores);
1102	insertBarrierChain(map&: loads);
1103
1104	LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1105	dbgs() << "Loading SUnits:\n"; loads.dump());
1106	}
1107
1108	static void toggleKills(const MachineRegisterInfo &MRI, LiveRegUnits &LiveRegs,
1109	MachineInstr &MI, bool addToLiveRegs) {
1110	for (MachineOperand &MO : MI.operands()) {
1111	if (!MO.isReg() \|\| !MO.readsReg())
1112	continue;
1113	Register Reg = MO.getReg();
1114	if (!Reg)
1115	continue;
1116
1117	// Things that are available after the instruction are killed by it.
1118	bool IsKill = LiveRegs.available(Reg);
1119
1120	// Exception: Do not kill reserved registers
1121	MO.setIsKill(IsKill && !MRI.isReserved(PhysReg: Reg));
1122	if (addToLiveRegs)
1123	LiveRegs.addReg(Reg);
1124	}
1125	}
1126
1127	void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
1128	LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << `'\n'`);
1129
1130	LiveRegs.init(TRI: *TRI);
1131	LiveRegs.addLiveOuts(MBB);
1132
1133	// Examine block from end to start...
1134	for (MachineInstr &MI : llvm::reverse(C&: MBB)) {
1135	if (MI.isDebugOrPseudoInstr())
1136	continue;
1137
1138	// Update liveness. Registers that are defed but not used in this
1139	// instruction are now dead. Mark register and all subregs as they
1140	// are completely defined.
1141	for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1142	const MachineOperand &MO = *O;
1143	if (MO.isReg()) {
1144	if (!MO.isDef())
1145	continue;
1146	Register Reg = MO.getReg();
1147	if (!Reg)
1148	continue;
1149	LiveRegs.removeReg(Reg);
1150	} else if (MO.isRegMask()) {
1151	LiveRegs.removeRegsNotPreserved(RegMask: MO.getRegMask());
1152	}
1153	}
1154
1155	// If there is a bundle header fix it up first.
1156	if (!MI.isBundled()) {
1157	toggleKills(MRI, LiveRegs, MI, addToLiveRegs: true);
1158	} else {
1159	MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1160	if (MI.isBundle())
1161	toggleKills(MRI, LiveRegs, MI, addToLiveRegs: false);
1162
1163	// Some targets make the (questionable) assumtion that the instructions
1164	// inside the bundle are ordered and consequently only the last use of
1165	// a register inside the bundle can kill it.
1166	MachineBasicBlock::instr_iterator I = std::next(x: Bundle);
1167	while (I ->isBundledWithSucc())
1168	++I;
1169	do {
1170	if (!I ->isDebugOrPseudoInstr())
1171	toggleKills(MRI, LiveRegs, MI&: I, addToLiveRegs: true*);
1172	--I;
1173	} while (I != Bundle);
1174	}
1175	}
1176	}
1177
1178	void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1179	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1180	dumpNodeName(SU);
1181	if (SchedPrintCycles)
1182	dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1183	<< ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1184	dbgs() << ": ";
1185	SU.getInstr()->dump();
1186	#endif
1187	}
1188
1189	void ScheduleDAGInstrs::dump() const {
1190	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1191	if (EntrySU.getInstr() != nullptr)
1192	dumpNodeAll(SU: EntrySU);
1193	for (const SUnit &SU : SUnits)
1194	dumpNodeAll(SU);
1195	if (ExitSU.getInstr() != nullptr)
1196	dumpNodeAll(SU: ExitSU);
1197	#endif
1198	}
1199
1200	std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit SU) const* {
1201	std::string s;
1202	raw_string_ostream oss(s);
1203	if (SU == &EntrySU)
1204	oss << "<entry>";
1205	else if (SU == &ExitSU)
1206	oss << "<exit>";
1207	else
1208	SU->getInstr()->print(OS&: oss, /IsStandalone=/true);
1209	return oss.str();
1210	}
1211
1212	/// Return the basic block label. It is not necessarilly unique because a block
1213	/// contains multiple scheduling regions. But it is fine for visualization.
1214	std::string ScheduleDAGInstrs::getDAGName() const {
1215	return "dag." + BB->getFullName();
1216	}
1217
1218	bool ScheduleDAGInstrs::canAddEdge(SUnit SuccSU, SUnit PredSU) {
1219	return SuccSU == &ExitSU \|\| !Topo.IsReachable(SU: PredSU, TargetSU: SuccSU);
1220	}
1221
1222	bool ScheduleDAGInstrs::addEdge(SUnit SuccSU, const* SDep &PredDep) {
1223	if (SuccSU != &ExitSU) {
1224	// Do not use WillCreateCycle, it assumes SD scheduling.
1225	// If Pred is reachable from Succ, then the edge creates a cycle.
1226	if (Topo.IsReachable(SU: PredDep.getSUnit(), TargetSU: SuccSU))
1227	return false;
1228	Topo.AddPredQueued(Y: SuccSU, X: PredDep.getSUnit());
1229	}
1230	SuccSU->addPred(D: PredDep, /Required=/!PredDep.isArtificial());
1231	// Return true regardless of whether a new edge needed to be inserted.
1232	return true;
1233	}
1234
1235	//===----------------------------------------------------------------------===//
1236	// SchedDFSResult Implementation
1237	//===----------------------------------------------------------------------===//
1238
1239	namespace llvm {
1240
1241	/// Internal state used to compute SchedDFSResult.
1242	class SchedDFSImpl {
1243	SchedDFSResult &R;
1244
1245	/// Join DAG nodes into equivalence classes by their subtree.
1246	IntEqClasses SubtreeClasses;
1247	/// List PredSU, SuccSU pairs that represent data edges between subtrees.
1248	std::vector<std::pair<const SUnit , const* SUnit*>> ConnectionPairs;
1249
1250	struct RootData {
1251	unsigned NodeID;
1252	unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1253	unsigned SubInstrCount = `0`; ///< Instr count in this tree only, not
1254	/// children.
1255
1256	RootData(unsigned id): NodeID(id),
1257	ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1258
1259	unsigned getSparseSetIndex() const { return NodeID; }
1260	};
1261
1262	SparseSet<RootData> RootSet;
1263
1264	public:
1265	SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses (R.DFSNodeData.size()) {
1266	RootSet.setUniverse(R.DFSNodeData.size());
1267	}
1268
1269	/// Returns true if this node been visited by the DFS traversal.
1270	///
1271	/// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1272	/// ID. Later, SubtreeID is updated but remains valid.
1273	bool isVisited(const SUnit SU) const* {
1274	return R.DFSNodeData [SU->NodeNum].SubtreeID
1275	!= SchedDFSResult::InvalidSubtreeID;
1276	}
1277
1278	/// Initializes this node's instruction count. We don't need to flag the node
1279	/// visited until visitPostorder because the DAG cannot have cycles.
1280	void visitPreorder(const SUnit *SU) {
1281	R.DFSNodeData [SU->NodeNum].InstrCount =
1282	SU->getInstr()->isTransient() ? `0` : `1`;
1283	}
1284
1285	/// Called once for each node after all predecessors are visited. Revisit this
1286	/// node's predecessors and potentially join them now that we know the ILP of
1287	/// the other predecessors.
1288	void visitPostorderNode(const SUnit *SU) {
1289	// Mark this node as the root of a subtree. It may be joined with its
1290	// successors later.
1291	R.DFSNodeData [SU->NodeNum].SubtreeID = SU->NodeNum;
1292	RootData RData(SU->NodeNum);
1293	RData.SubInstrCount = SU->getInstr()->isTransient() ? `0` : `1`;
1294
1295	// If any predecessors are still in their own subtree, they either cannot be
1296	// joined or are large enough to remain separate. If this parent node's
1297	// total instruction count is not greater than a child subtree by at least
1298	// the subtree limit, then try to join it now since splitting subtrees is
1299	// only useful if multiple high-pressure paths are possible.
1300	unsigned InstrCount = R.DFSNodeData [SU->NodeNum].InstrCount;
1301	for (const SDep &PredDep : SU->Preds) {
1302	if (PredDep.getKind() != SDep::Data)
1303	continue;
1304	unsigned PredNum = PredDep.getSUnit()->NodeNum;
1305	if ((InstrCount - R.DFSNodeData [PredNum].InstrCount) < R.SubtreeLimit)
1306	joinPredSubtree(PredDep, Succ: SU, /CheckLimit=/false);
1307
1308	// Either link or merge the TreeData entry from the child to the parent.
1309	if (R.DFSNodeData [PredNum].SubtreeID == PredNum) {
1310	// If the predecessor's parent is invalid, this is a tree edge and the
1311	// current node is the parent.
1312	if (RootSet [PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1313	RootSet [PredNum].ParentNodeID = SU->NodeNum;
1314	}
1315	else if (RootSet.count(Key: PredNum)) {
1316	// The predecessor is not a root, but is still in the root set. This
1317	// must be the new parent that it was just joined to. Note that
1318	// RootSet[PredNum].ParentNodeID may either be invalid or may still be
1319	// set to the original parent.
1320	RData.SubInstrCount += RootSet [PredNum].SubInstrCount;
1321	RootSet.erase(Key: PredNum);
1322	}
1323	}
1324	RootSet [SU->NodeNum] = RData;
1325	}
1326
1327	/// Called once for each tree edge after calling visitPostOrderNode on
1328	/// the predecessor. Increment the parent node's instruction count and
1329	/// preemptively join this subtree to its parent's if it is small enough.
1330	void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1331	R.DFSNodeData [Succ->NodeNum].InstrCount
1332	+= R.DFSNodeData [PredDep.getSUnit()->NodeNum].InstrCount;
1333	joinPredSubtree(PredDep, Succ);
1334	}
1335
1336	/// Adds a connection for cross edges.
1337	void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1338	ConnectionPairs.emplace_back(args: PredDep.getSUnit(), args&: Succ);
1339	}
1340
1341	/// Sets each node's subtree ID to the representative ID and record
1342	/// connections between trees.
1343	void finalize() {
1344	SubtreeClasses.compress();
1345	R.DFSTreeData.resize(N: SubtreeClasses.getNumClasses());
1346	assert(SubtreeClasses.getNumClasses() == RootSet.size()
1347	&& "number of roots should match trees");
1348	for (const RootData &Root : RootSet) {
1349	unsigned TreeID = SubtreeClasses [Root.NodeID];
1350	if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1351	R.DFSTreeData [TreeID].ParentTreeID = SubtreeClasses [Root.ParentNodeID];
1352	R.DFSTreeData [TreeID].SubInstrCount = Root.SubInstrCount;
1353	// Note that SubInstrCount may be greater than InstrCount if we joined
1354	// subtrees across a cross edge. InstrCount will be attributed to the
1355	// original parent, while SubInstrCount will be attributed to the joined
1356	// parent.
1357	}
1358	R.SubtreeConnections.resize(new_size: SubtreeClasses.getNumClasses());
1359	R.SubtreeConnectLevels.resize(new_size: SubtreeClasses.getNumClasses());
1360	LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1361	for (unsigned Idx = `0`, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1362	R.DFSNodeData [Idx].SubtreeID = SubtreeClasses [Idx];
1363	LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1364	<< R.DFSNodeData[Idx].SubtreeID << `'\n'`);
1365	}
1366	for (const auto &[Pred, Succ] : ConnectionPairs) {
1367	unsigned PredTree = SubtreeClasses [Pred->NodeNum];
1368	unsigned SuccTree = SubtreeClasses [Succ->NodeNum];
1369	if (PredTree == SuccTree)
1370	continue;
1371	unsigned Depth = Pred->getDepth();
1372	addConnection(FromTree: PredTree, ToTree: SuccTree, Depth);
1373	addConnection(FromTree: SuccTree, ToTree: PredTree, Depth);
1374	}
1375	}
1376
1377	protected:
1378	/// Joins the predecessor subtree with the successor that is its DFS parent.
1379	/// Applies some heuristics before joining.
1380	bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1381	bool CheckLimit = true) {
1382	assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1383
1384	// Check if the predecessor is already joined.
1385	const SUnit *PredSU = PredDep.getSUnit();
1386	unsigned PredNum = PredSU->NodeNum;
1387	if (R.DFSNodeData [PredNum].SubtreeID != PredNum)
1388	return false;
1389
1390	// Four is the magic number of successors before a node is considered a
1391	// pinch point.
1392	unsigned NumDataSucs = `0`;
1393	for (const SDep &SuccDep : PredSU->Succs) {
1394	if (SuccDep.getKind() == SDep::Data) {
1395	if (++NumDataSucs >= `4`)
1396	return false;
1397	}
1398	}
1399	if (CheckLimit && R.DFSNodeData [PredNum].InstrCount > R.SubtreeLimit)
1400	return false;
1401	R.DFSNodeData [PredNum].SubtreeID = Succ->NodeNum;
1402	SubtreeClasses.join(a: Succ->NodeNum, b: PredNum);
1403	return true;
1404	}
1405
1406	/// Called by finalize() to record a connection between trees.
1407	void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1408	if (!Depth)
1409	return;
1410
1411	do {
1412	SmallVectorImpl<SchedDFSResult::Connection> &Connections =
1413	R.SubtreeConnections [FromTree];
1414	for (SchedDFSResult::Connection &C : Connections) {
1415	if (C.TreeID == ToTree) {
1416	C.Level = std::max(a: C.Level, b: Depth);
1417	return;
1418	}
1419	}
1420	Connections.push_back(Elt: SchedDFSResult::Connection (ToTree, Depth));
1421	FromTree = R.DFSTreeData [FromTree].ParentTreeID;
1422	} while (FromTree != SchedDFSResult::InvalidSubtreeID);
1423	}
1424	};
1425
1426	} // end namespace llvm
1427
1428	namespace {
1429
1430	/// Manage the stack used by a reverse depth-first search over the DAG.
1431	class SchedDAGReverseDFS {
1432	std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1433
1434	public:
1435	bool isComplete() const { return DFSStack.empty(); }
1436
1437	void follow(const SUnit *SU) {
1438	DFSStack.emplace_back(args&: SU, args: SU->Preds.begin());
1439	}
1440	void advance() { ++DFSStack.back().second; }
1441
1442	const SDep *backtrack() {
1443	DFSStack.pop_back();
1444	return DFSStack.empty() ? nullptr : std::prev(x: DFSStack.back().second);
1445	}
1446
1447	const SUnit getCurr() const* { return DFSStack.back().first; }
1448
1449	SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1450
1451	SUnit::const_pred_iterator getPredEnd() const {
1452	return getCurr()->Preds.end();
1453	}
1454	};
1455
1456	} // end anonymous namespace
1457
1458	static bool hasDataSucc(const SUnit *SU) {
1459	for (const SDep &SuccDep : SU->Succs) {
1460	if (SuccDep.getKind() == SDep::Data &&
1461	!SuccDep.getSUnit()->isBoundaryNode())
1462	return true;
1463	}
1464	return false;
1465	}
1466
1467	/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1468	/// search from this root.
1469	void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
1470	if (!IsBottomUp)
1471	llvm_unreachable("Top-down ILP metric is unimplemented");
1472
1473	SchedDFSImpl Impl(*this);
1474	for (const SUnit &SU : SUnits) {
1475	if (Impl.isVisited(SU: &SU) \|\| hasDataSucc(SU: &SU))
1476	continue;
1477
1478	SchedDAGReverseDFS DFS;
1479	Impl.visitPreorder(SU: &SU);
1480	DFS.follow(SU: &SU);
1481	while (true) {
1482	// Traverse the leftmost path as far as possible.
1483	while (DFS.getPred() != DFS.getPredEnd()) {
1484	const SDep &PredDep = *DFS.getPred();
1485	DFS.advance();
1486	// Ignore non-data edges.
1487	if (PredDep.getKind() != SDep::Data
1488	\|\| PredDep.getSUnit()->isBoundaryNode()) {
1489	continue;
1490	}
1491	// An already visited edge is a cross edge, assuming an acyclic DAG.
1492	if (Impl.isVisited(SU: PredDep.getSUnit())) {
1493	Impl.visitCrossEdge(PredDep, Succ: DFS.getCurr());
1494	continue;
1495	}
1496	Impl.visitPreorder(SU: PredDep.getSUnit());
1497	DFS.follow(SU: PredDep.getSUnit());
1498	}
1499	// Visit the top of the stack in postorder and backtrack.
1500	const SUnit *Child = DFS.getCurr();
1501	const SDep *PredDep = DFS.backtrack();
1502	Impl.visitPostorderNode(SU: Child);
1503	if (PredDep)
1504	Impl.visitPostorderEdge(PredDep: *PredDep, Succ: DFS.getCurr());
1505	if (DFS.isComplete())
1506	break;
1507	}
1508	}
1509	Impl.finalize();
1510	}
1511
1512	/// The root of the given SubtreeID was just scheduled. For all subtrees
1513	/// connected to this tree, record the depth of the connection so that the
1514	/// nearest connected subtrees can be prioritized.
1515	void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1516	for (const Connection &C : SubtreeConnections [SubtreeID]) {
1517	SubtreeConnectLevels [C.TreeID] =
1518	std::max(a: SubtreeConnectLevels [C.TreeID], b: C.Level);
1519	LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1520	<< SubtreeConnectLevels[C.TreeID] << `'\n'`);
1521	}
1522	}
1523
1524	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1525	LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const {
1526	OS << InstrCount << " / " << Length << " = ";
1527	if (!Length)
1528	OS << "BADILP";
1529	else
1530	OS << format(Fmt: "%g", Vals: ((double)InstrCount / Length));
1531	}
1532
1533	LLVM_DUMP_METHOD void ILPValue::dump() const {
1534	dbgs() << *this << `'\n'`;
1535	}
1536
1537	namespace llvm {
1538
1539	LLVM_ATTRIBUTE_UNUSED
1540	raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
1541	Val.print(OS);
1542	return OS;
1543	}
1544
1545	} // end namespace llvm
1546
1547	#endif
1548

source code of llvm/lib/CodeGen/ScheduleDAGInstrs.cpp