HexagonVLIWPacketizer.cpp source code [llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp]

1	//===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This implements a simple VLIW packetizer using DFA. The packetizer works on
10	// machine basic blocks. For each instruction I in BB, the packetizer consults
11	// the DFA to see if machine resources are available to execute I. If so, the
12	// packetizer checks if I depends on any instruction J in the current packet.
13	// If no dependency is found, I is added to current packet and machine resource
14	// is marked as taken. If any dependency is found, a target API call is made to
15	// prune the dependence.
16	//
17	//===----------------------------------------------------------------------===//
18
19	#include "HexagonVLIWPacketizer.h"
20	#include "Hexagon.h"
21	#include "HexagonInstrInfo.h"
22	#include "HexagonRegisterInfo.h"
23	#include "HexagonSubtarget.h"
24	#include "llvm/ADT/BitVector.h"
25	#include "llvm/ADT/DenseSet.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/ADT/StringExtras.h"
28	#include "llvm/Analysis/AliasAnalysis.h"
29	#include "llvm/CodeGen/MachineBasicBlock.h"
30	#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
31	#include "llvm/CodeGen/MachineDominators.h"
32	#include "llvm/CodeGen/MachineFrameInfo.h"
33	#include "llvm/CodeGen/MachineFunction.h"
34	#include "llvm/CodeGen/MachineFunctionPass.h"
35	#include "llvm/CodeGen/MachineInstr.h"
36	#include "llvm/CodeGen/MachineInstrBundle.h"
37	#include "llvm/CodeGen/MachineLoopInfo.h"
38	#include "llvm/CodeGen/MachineOperand.h"
39	#include "llvm/CodeGen/ScheduleDAG.h"
40	#include "llvm/CodeGen/TargetRegisterInfo.h"
41	#include "llvm/CodeGen/TargetSubtargetInfo.h"
42	#include "llvm/IR/DebugLoc.h"
43	#include "llvm/InitializePasses.h"
44	#include "llvm/MC/MCInstrDesc.h"
45	#include "llvm/Pass.h"
46	#include "llvm/Support/CommandLine.h"
47	#include "llvm/Support/Debug.h"
48	#include "llvm/Support/ErrorHandling.h"
49	#include "llvm/Support/raw_ostream.h"
50	#include <cassert>
51	#include <cstdint>
52	#include <iterator>
53
54	using namespace llvm;
55
56	#define DEBUG_TYPE "packets"
57
58	static cl::opt<bool>
59	DisablePacketizer("disable-packetizer", cl::Hidden,
60	cl::desc ("Disable Hexagon packetizer pass"));
61
62	static cl::opt<bool> Slot1Store("slot1-store-slot0-load", cl::Hidden,
63	cl::init(Val: true),
64	cl::desc ("Allow slot1 store and slot0 load"));
65
66	static cl::opt<bool> PacketizeVolatiles(
67	"hexagon-packetize-volatiles", cl::Hidden, cl::init(Val: true),
68	cl::desc ("Allow non-solo packetization of volatile memory references"));
69
70	static cl::opt<bool>
71	EnableGenAllInsnClass("enable-gen-insn", cl::Hidden,
72	cl::desc ("Generate all instruction with TC"));
73
74	static cl::opt<bool>
75	DisableVecDblNVStores("disable-vecdbl-nv-stores", cl::Hidden,
76	cl::desc ("Disable vector double new-value-stores"));
77
78	extern cl::opt<bool> ScheduleInlineAsm;
79
80	namespace llvm {
81
82	FunctionPass createHexagonPacketizer(bool* Minimal);
83	void initializeHexagonPacketizerPass(PassRegistry&);
84
85	} // end namespace llvm
86
87	namespace {
88
89	class HexagonPacketizer : public MachineFunctionPass {
90	public:
91	static char ID;
92
93	HexagonPacketizer(bool Min = false)
94	: MachineFunctionPass (ID), Minimal(Min) {}
95
96	void getAnalysisUsage(AnalysisUsage &AU) const override {
97	AU.setPreservesCFG();
98	AU.addRequired<AAResultsWrapperPass>();
99	AU.addRequired<MachineBranchProbabilityInfo>();
100	AU.addRequired<MachineDominatorTree>();
101	AU.addRequired<MachineLoopInfo>();
102	AU.addPreserved<MachineDominatorTree>();
103	AU.addPreserved<MachineLoopInfo>();
104	MachineFunctionPass::getAnalysisUsage(AU);
105	}
106
107	StringRef getPassName() const override { return "Hexagon Packetizer"; }
108	bool runOnMachineFunction(MachineFunction &Fn) override;
109
110	MachineFunctionProperties getRequiredProperties() const override {
111	return MachineFunctionProperties ().set(
112	MachineFunctionProperties::Property::NoVRegs);
113	}
114
115	private:
116	const HexagonInstrInfo HII = nullptr*;
117	const HexagonRegisterInfo HRI = nullptr*;
118	const bool Minimal = false;
119	};
120
121	} // end anonymous namespace
122
123	char HexagonPacketizer::ID = `0`;
124
125	INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
126	"Hexagon Packetizer", false, false)
127	INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
128	INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
129	INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
130	INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
131	INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
132	"Hexagon Packetizer", false, false)
133
134	HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
135	MachineLoopInfo &MLI, AAResults *AA,
136	const MachineBranchProbabilityInfo MBPI, bool* Minimal)
137	: VLIWPacketizerList (MF, MLI, AA), MBPI(MBPI), MLI(&MLI),
138	Minimal(Minimal) {
139	HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
140	HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
141
142	addMutation(Mutation: std::make_unique<HexagonSubtarget::UsrOverflowMutation>());
143	addMutation(Mutation: std::make_unique<HexagonSubtarget::HVXMemLatencyMutation>());
144	addMutation(Mutation: std::make_unique<HexagonSubtarget::BankConflictMutation>());
145	}
146
147	// Check if FirstI modifies a register that SecondI reads.
148	static bool hasWriteToReadDep(const MachineInstr &FirstI,
149	const MachineInstr &SecondI,
150	const TargetRegisterInfo *TRI) {
151	for (auto &MO : FirstI.operands()) {
152	if (!MO.isReg() \|\| !MO.isDef())
153	continue;
154	Register R = MO.getReg();
155	if (SecondI.readsRegister(Reg: R, TRI))
156	return true;
157	}
158	return false;
159	}
160
161
162	static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
163	MachineBasicBlock::iterator BundleIt, bool Before) {
164	MachineBasicBlock::instr_iterator InsertPt;
165	if (Before)
166	InsertPt = BundleIt.getInstrIterator();
167	else
168	InsertPt = std::next(x: BundleIt).getInstrIterator();
169
170	MachineBasicBlock &B = *MI.getParent();
171	// The instruction should at least be bundled with the preceding instruction
172	// (there will always be one, i.e. BUNDLE, if nothing else).
173	assert(MI.isBundledWithPred());
174	if (MI.isBundledWithSucc()) {
175	MI.clearFlag(Flag: MachineInstr::BundledSucc);
176	MI.clearFlag(Flag: MachineInstr::BundledPred);
177	} else {
178	// If it's not bundled with the successor (i.e. it is the last one
179	// in the bundle), then we can simply unbundle it from the predecessor,
180	// which will take care of updating the predecessor's flag.
181	MI.unbundleFromPred();
182	}
183	B.splice(Where: InsertPt, Other: &B, From: MI.getIterator());
184
185	// Get the size of the bundle without asserting.
186	MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
187	MachineBasicBlock::const_instr_iterator E = B.instr_end();
188	unsigned Size = `0`;
189	for (++I; I != E && I ->isBundledWithPred(); ++I)
190	++Size;
191
192	// If there are still two or more instructions, then there is nothing
193	// else to be done.
194	if (Size > `1`)
195	return BundleIt;
196
197	// Otherwise, extract the single instruction out and delete the bundle.
198	MachineBasicBlock::iterator NextIt = std::next(x: BundleIt);
199	MachineInstr &SingleI = *BundleIt ->getNextNode();
200	SingleI.unbundleFromPred();
201	assert(!SingleI.isBundledWithSucc());
202	BundleIt ->eraseFromParent();
203	return NextIt;
204	}
205
206	bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
207	// FIXME: This pass causes verification failures.
208	MF.getProperties().set(
209	MachineFunctionProperties::Property::FailsVerification);
210
211	auto &HST = MF.getSubtarget<HexagonSubtarget>();
212	HII = HST.getInstrInfo();
213	HRI = HST.getRegisterInfo();
214	auto &MLI = getAnalysis<MachineLoopInfo>();
215	auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
216	auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
217
218	if (EnableGenAllInsnClass)
219	HII->genAllInsnTimingClasses(MF);
220
221	// Instantiate the packetizer.
222	bool MinOnly = Minimal \|\| DisablePacketizer \|\| !HST.usePackets() \|\|
223	skipFunction(F: MF.getFunction());
224	HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI, MinOnly);
225
226	// DFA state table should not be empty.
227	assert(Packetizer.getResourceTracker() && "Empty DFA table!");
228
229	// Loop over all basic blocks and remove KILL pseudo-instructions
230	// These instructions confuse the dependence analysis. Consider:
231	// D0 = ... (Insn 0)
232	// R0 = KILL R0, D0 (Insn 1)
233	// R0 = ... (Insn 2)
234	// Here, Insn 1 will result in the dependence graph not emitting an output
235	// dependence between Insn 0 and Insn 2. This can lead to incorrect
236	// packetization
237	for (MachineBasicBlock &MB : MF) {
238	for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MB))
239	if (MI.isKill())
240	MB.erase(I: &MI);
241	}
242
243	// TinyCore with Duplexes: Translate to big-instructions.
244	if (HST.isTinyCoreWithDuplex())
245	HII->translateInstrsForDup(MF, ToBigInstrs: true);
246
247	// Loop over all of the basic blocks.
248	for (auto &MB : MF) {
249	auto Begin = MB.begin(), End = MB.end();
250	while (Begin != End) {
251	// Find the first non-boundary starting from the end of the last
252	// scheduling region.
253	MachineBasicBlock::iterator RB = Begin;
254	while (RB != End && HII->isSchedulingBoundary(MI: *RB, MBB: &MB, MF))
255	++RB;
256	// Find the first boundary starting from the beginning of the new
257	// region.
258	MachineBasicBlock::iterator RE = RB;
259	while (RE != End && !HII->isSchedulingBoundary(MI: *RE, MBB: &MB, MF))
260	++RE;
261	// Add the scheduling boundary if it's not block end.
262	if (RE != End)
263	++RE;
264	// If RB == End, then RE == End.
265	if (RB != End)
266	Packetizer.PacketizeMIs(MBB: &MB, BeginItr: RB, EndItr: RE);
267
268	Begin = RE;
269	}
270	}
271
272	// TinyCore with Duplexes: Translate to tiny-instructions.
273	if (HST.isTinyCoreWithDuplex())
274	HII->translateInstrsForDup(MF, ToBigInstrs: false);
275
276	Packetizer.unpacketizeSoloInstrs(MF);
277	return true;
278	}
279
280	// Reserve resources for a constant extender. Trigger an assertion if the
281	// reservation fails.
282	void HexagonPacketizerList::reserveResourcesForConstExt() {
283	if (!tryAllocateResourcesForConstExt(Reserve: true))
284	llvm_unreachable("Resources not available");
285	}
286
287	bool HexagonPacketizerList::canReserveResourcesForConstExt() {
288	return tryAllocateResourcesForConstExt(Reserve: false);
289	}
290
291	// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
292	// return true, otherwise, return false.
293	bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
294	auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
295	bool Avail = ResourceTracker->canReserveResources(*ExtMI);
296	if (Reserve && Avail)
297	ResourceTracker->reserveResources(*ExtMI);
298	MF.deleteMachineInstr(MI: ExtMI);
299	return Avail;
300	}
301
302	bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
303	SDep::Kind DepType, unsigned DepReg) {
304	// Check for LR dependence.
305	if (DepReg == HRI->getRARegister())
306	return true;
307
308	if (HII->isDeallocRet(MI))
309	if (DepReg == HRI->getFrameRegister() \|\| DepReg == HRI->getStackRegister())
310	return true;
311
312	// Call-like instructions can be packetized with preceding instructions
313	// that define registers implicitly used or modified by the call. Explicit
314	// uses are still prohibited, as in the case of indirect calls:
315	// r0 = ...
316	// J2_jumpr r0
317	if (DepType == SDep::Data) {
318	for (const MachineOperand &MO : MI.operands())
319	if (MO.isReg() && MO.getReg() == DepReg && !MO.isImplicit())
320	return true;
321	}
322
323	return false;
324	}
325
326	static bool isRegDependence(const SDep::Kind DepType) {
327	return DepType == SDep::Data \|\| DepType == SDep::Anti \|\|
328	DepType == SDep::Output;
329	}
330
331	static bool isDirectJump(const MachineInstr &MI) {
332	return MI.getOpcode() == Hexagon::J2_jump;
333	}
334
335	static bool isSchedBarrier(const MachineInstr &MI) {
336	switch (MI.getOpcode()) {
337	case Hexagon::Y2_barrier:
338	return true;
339	}
340	return false;
341	}
342
343	static bool isControlFlow(const MachineInstr &MI) {
344	return MI.getDesc().isTerminator() \|\| MI.getDesc().isCall();
345	}
346
347	/// Returns true if the instruction modifies a callee-saved register.
348	static bool doesModifyCalleeSavedReg(const MachineInstr &MI,
349	const TargetRegisterInfo *TRI) {
350	const MachineFunction &MF = *MI.getParent()->getParent();
351	for (auto CSR = TRI->getCalleeSavedRegs(MF: &MF); CSR && CSR; ++CSR)
352	if (MI.modifiesRegister(Reg: *CSR, TRI))
353	return true;
354	return false;
355	}
356
357	// Returns true if an instruction can be promoted to .new predicate or
358	// new-value store.
359	bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI,
360	const TargetRegisterClass *NewRC) {
361	// Vector stores can be predicated, and can be new-value stores, but
362	// they cannot be predicated on a .new predicate value.
363	if (NewRC == &Hexagon::PredRegsRegClass) {
364	if (HII->isHVXVec(MI) && MI.mayStore())
365	return false;
366	return HII->isPredicated(MI) && HII->getDotNewPredOp(MI, MBPI: nullptr) > `0`;
367	}
368	// If the class is not PredRegs, it could only apply to new-value stores.
369	return HII->mayBeNewStore(MI);
370	}
371
372	// Promote an instructiont to its .cur form.
373	// At this time, we have already made a call to canPromoteToDotCur and made
374	// sure that it can indeed* be promoted.*
375	bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI,
376	SDep::Kind DepType, MachineBasicBlock::iterator &MII,
377	const TargetRegisterClass* RC) {
378	assert(DepType == SDep::Data);
379	int CurOpcode = HII->getDotCurOp(MI);
380	MI.setDesc(HII->get(CurOpcode));
381	return true;
382	}
383
384	void HexagonPacketizerList::cleanUpDotCur() {
385	MachineInstr MI = nullptr*;
386	for (auto *BI : CurrentPacketMIs) {
387	LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
388	if (HII->isDotCurInst(MI: *BI)) {
389	MI = BI;
390	continue;
391	}
392	if (MI) {
393	for (auto &MO : BI->operands())
394	if (MO.isReg() && MO.getReg() == MI->getOperand(i: `0`).getReg())
395	return;
396	}
397	}
398	if (!MI)
399	return;
400	// We did not find a use of the CUR, so de-cur it.
401	MI->setDesc(HII->get(HII->getNonDotCurOp(MI: *MI)));
402	LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
403	}
404
405	// Check to see if an instruction can be dot cur.
406	bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
407	const SUnit PacketSU, unsigned* DepReg, MachineBasicBlock::iterator &MII,
408	const TargetRegisterClass *RC) {
409	if (!HII->isHVXVec(MI))
410	return false;
411	if (!HII->isHVXVec(MI: *MII))
412	return false;
413
414	// Already a dot new instruction.
415	if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
416	return false;
417
418	if (!HII->mayBeCurLoad(MI))
419	return false;
420
421	// The "cur value" cannot come from inline asm.
422	if (PacketSU->getInstr()->isInlineAsm())
423	return false;
424
425	// Make sure candidate instruction uses cur.
426	LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump();
427	dbgs() << "in packet\n";);
428	MachineInstr &MJ = *MII;
429	LLVM_DEBUG({
430	dbgs() << "Checking CUR against ";
431	MJ.dump();
432	});
433	Register DestReg = MI.getOperand(i: `0`).getReg();
434	bool FoundMatch = false;
435	for (auto &MO : MJ.operands())
436	if (MO.isReg() && MO.getReg() == DestReg)
437	FoundMatch = true;
438	if (!FoundMatch)
439	return false;
440
441	// Check for existing uses of a vector register within the packet which
442	// would be affected by converting a vector load into .cur formt.
443	for (auto *BI : CurrentPacketMIs) {
444	LLVM_DEBUG(dbgs() << "packet has "; BI->dump(););
445	if (BI->readsRegister(Reg: DepReg, TRI: MF.getSubtarget().getRegisterInfo()))
446	return false;
447	}
448
449	LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
450	// We can convert the opcode into a .cur.
451	return true;
452	}
453
454	// Promote an instruction to its .new form. At this time, we have already
455	// made a call to canPromoteToDotNew and made sure that it can indeed* be*
456	// promoted.
457	bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
458	SDep::Kind DepType, MachineBasicBlock::iterator &MII,
459	const TargetRegisterClass* RC) {
460	assert(DepType == SDep::Data);
461	int NewOpcode;
462	if (RC == &Hexagon::PredRegsRegClass)
463	NewOpcode = HII->getDotNewPredOp(MI, MBPI);
464	else
465	NewOpcode = HII->getDotNewOp(MI);
466	MI.setDesc(HII->get(NewOpcode));
467	return true;
468	}
469
470	bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
471	int NewOpcode = HII->getDotOldOp(MI);
472	MI.setDesc(HII->get(NewOpcode));
473	return true;
474	}
475
476	bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) {
477	unsigned Opc = MI.getOpcode();
478	switch (Opc) {
479	case Hexagon::S2_storerd_io:
480	case Hexagon::S2_storeri_io:
481	case Hexagon::S2_storerh_io:
482	case Hexagon::S2_storerb_io:
483	break;
484	default:
485	llvm_unreachable("Unexpected instruction");
486	}
487	unsigned FrameSize = MF.getFrameInfo().getStackSize();
488	MachineOperand &Off = MI.getOperand(i: `1`);
489	int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE);
490	if (HII->isValidOffset(Opc, NewOff, HRI)) {
491	Off.setImm(NewOff);
492	return true;
493	}
494	return false;
495	}
496
497	void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) {
498	unsigned Opc = MI.getOpcode();
499	switch (Opc) {
500	case Hexagon::S2_storerd_io:
501	case Hexagon::S2_storeri_io:
502	case Hexagon::S2_storerh_io:
503	case Hexagon::S2_storerb_io:
504	break;
505	default:
506	llvm_unreachable("Unexpected instruction");
507	}
508	unsigned FrameSize = MF.getFrameInfo().getStackSize();
509	MachineOperand &Off = MI.getOperand(i: `1`);
510	Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
511	}
512
513	/// Return true if we can update the offset in MI so that MI and MJ
514	/// can be packetized together.
515	bool HexagonPacketizerList::updateOffset(SUnit SUI, SUnit SUJ) {
516	assert(SUI->getInstr() && SUJ->getInstr());
517	MachineInstr &MI = *SUI->getInstr();
518	MachineInstr &MJ = *SUJ->getInstr();
519
520	unsigned BPI, OPI;
521	if (!HII->getBaseAndOffsetPosition(MI, BasePos&: BPI, OffsetPos&: OPI))
522	return false;
523	unsigned BPJ, OPJ;
524	if (!HII->getBaseAndOffsetPosition(MI: MJ, BasePos&: BPJ, OffsetPos&: OPJ))
525	return false;
526	Register Reg = MI.getOperand(i: BPI).getReg();
527	if (Reg != MJ.getOperand(i: BPJ).getReg())
528	return false;
529	// Make sure that the dependences do not restrict adding MI to the packet.
530	// That is, ignore anti dependences, and make sure the only data dependence
531	// involves the specific register.
532	for (const auto &PI : SUI->Preds)
533	if (PI.getKind() != SDep::Anti &&
534	(PI.getKind() != SDep::Data \|\| PI.getReg() != Reg))
535	return false;
536	int Incr;
537	if (!HII->getIncrementValue(MI: MJ, Value&: Incr))
538	return false;
539
540	int64_t Offset = MI.getOperand(i: OPI).getImm();
541	if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI))
542	return false;
543
544	MI.getOperand(i: OPI).setImm(Offset + Incr);
545	ChangedOffset = Offset;
546	return true;
547	}
548
549	/// Undo the changed offset. This is needed if the instruction cannot be
550	/// added to the current packet due to a different instruction.
551	void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) {
552	unsigned BP, OP;
553	if (!HII->getBaseAndOffsetPosition(MI, BasePos&: BP, OffsetPos&: OP))
554	llvm_unreachable("Unable to find base and offset operands.");
555	MI.getOperand(i: OP).setImm(ChangedOffset);
556	}
557
558	enum PredicateKind {
559	PK_False,
560	PK_True,
561	PK_Unknown
562	};
563
564	/// Returns true if an instruction is predicated on p0 and false if it's
565	/// predicated on !p0.
566	static PredicateKind getPredicateSense(const MachineInstr &MI,
567	const HexagonInstrInfo *HII) {
568	if (!HII->isPredicated(MI))
569	return PK_Unknown;
570	if (HII->isPredicatedTrue(MI))
571	return PK_True;
572	return PK_False;
573	}
574
575	static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI,
576	const HexagonInstrInfo *HII) {
577	assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
578	#ifndef NDEBUG
579	// Post Increment means duplicates. Use dense map to find duplicates in the
580	// list. Caution: Densemap initializes with the minimum of 64 buckets,
581	// whereas there are at most 5 operands in the post increment.
582	DenseSet<unsigned> DefRegsSet;
583	for (auto &MO : MI.operands())
584	if (MO.isReg() && MO.isDef())
585	DefRegsSet.insert(V: MO.getReg());
586
587	for (auto &MO : MI.operands())
588	if (MO.isReg() && MO.isUse() && DefRegsSet.count(V: MO.getReg()))
589	return MO;
590	#else
591	if (MI.mayLoad()) {
592	const MachineOperand &Op1 = MI.getOperand(`1`);
593	// The 2nd operand is always the post increment operand in load.
594	assert(Op1.isReg() && "Post increment operand has be to a register.");
595	return Op1;
596	}
597	if (MI.getDesc().mayStore()) {
598	const MachineOperand &Op0 = MI.getOperand(`0`);
599	// The 1st operand is always the post increment operand in store.
600	assert(Op0.isReg() && "Post increment operand has be to a register.");
601	return Op0;
602	}
603	#endif
604	// we should never come here.
605	llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
606	}
607
608	// Get the value being stored.
609	static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) {
610	// value being stored is always the last operand.
611	return MI.getOperand(i: MI.getNumOperands()-`1`);
612	}
613
614	static bool isLoadAbsSet(const MachineInstr &MI) {
615	unsigned Opc = MI.getOpcode();
616	switch (Opc) {
617	case Hexagon::L4_loadrd_ap:
618	case Hexagon::L4_loadrb_ap:
619	case Hexagon::L4_loadrh_ap:
620	case Hexagon::L4_loadrub_ap:
621	case Hexagon::L4_loadruh_ap:
622	case Hexagon::L4_loadri_ap:
623	return true;
624	}
625	return false;
626	}
627
628	static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) {
629	assert(isLoadAbsSet(MI));
630	return MI.getOperand(i: `1`);
631	}
632
633	// Can be new value store?
634	// Following restrictions are to be respected in convert a store into
635	// a new value store.
636	// 1. If an instruction uses auto-increment, its address register cannot
637	// be a new-value register. Arch Spec 5.4.2.1
638	// 2. If an instruction uses absolute-set addressing mode, its address
639	// register cannot be a new-value register. Arch Spec 5.4.2.1.
640	// 3. If an instruction produces a 64-bit result, its registers cannot be used
641	// as new-value registers. Arch Spec 5.4.2.2.
642	// 4. If the instruction that sets the new-value register is conditional, then
643	// the instruction that uses the new-value register must also be conditional,
644	// and both must always have their predicates evaluate identically.
645	// Arch Spec 5.4.2.3.
646	// 5. There is an implied restriction that a packet cannot have another store,
647	// if there is a new value store in the packet. Corollary: if there is
648	// already a store in a packet, there can not be a new value store.
649	// Arch Spec: 3.4.4.2
650	bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
651	const MachineInstr &PacketMI, unsigned DepReg) {
652	// Make sure we are looking at the store, that can be promoted.
653	if (!HII->mayBeNewStore(MI))
654	return false;
655
656	// Make sure there is dependency and can be new value'd.
657	const MachineOperand &Val = getStoreValueOperand(MI);
658	if (Val.isReg() && Val.getReg() != DepReg)
659	return false;
660
661	const MCInstrDesc& MCID = PacketMI.getDesc();
662
663	// First operand is always the result.
664	const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, `0`, HRI, MF);
665	// Double regs can not feed into new value store: PRM section: 5.4.2.2.
666	if (PacketRC == &Hexagon::DoubleRegsRegClass)
667	return false;
668
669	// New-value stores are of class NV (slot 0), dual stores require class ST
670	// in slot 0 (PRM 5.5).
671	for (auto *I : CurrentPacketMIs) {
672	SUnit *PacketSU = MIToSUnit.find(x: I)->second;
673	if (PacketSU->getInstr()->mayStore())
674	return false;
675	}
676
677	// Make sure it's NOT the post increment register that we are going to
678	// new value.
679	if (HII->isPostIncrement(MI) &&
680	getPostIncrementOperand(MI, HII).getReg() == DepReg) {
681	return false;
682	}
683
684	if (HII->isPostIncrement(MI: PacketMI) && PacketMI.mayLoad() &&
685	getPostIncrementOperand(MI: PacketMI, HII).getReg() == DepReg) {
686	// If source is post_inc, or absolute-set addressing, it can not feed
687	// into new value store
688	// r3 = memw(r2++#4)
689	// memw(r30 + #-1404) = r2.new -> can not be new value store
690	// arch spec section: 5.4.2.1.
691	return false;
692	}
693
694	if (isLoadAbsSet(MI: PacketMI) && getAbsSetOperand(MI: PacketMI).getReg() == DepReg)
695	return false;
696
697	// If the source that feeds the store is predicated, new value store must
698	// also be predicated.
699	if (HII->isPredicated(MI: PacketMI)) {
700	if (!HII->isPredicated(MI))
701	return false;
702
703	// Check to make sure that they both will have their predicates
704	// evaluate identically.
705	unsigned predRegNumSrc = `0`;
706	unsigned predRegNumDst = `0`;
707	const TargetRegisterClass* predRegClass = nullptr;
708
709	// Get predicate register used in the source instruction.
710	for (auto &MO : PacketMI.operands()) {
711	if (!MO.isReg())
712	continue;
713	predRegNumSrc = MO.getReg();
714	predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
715	if (predRegClass == &Hexagon::PredRegsRegClass)
716	break;
717	}
718	assert((predRegClass == &Hexagon::PredRegsRegClass) &&
719	"predicate register not found in a predicated PacketMI instruction");
720
721	// Get predicate register used in new-value store instruction.
722	for (auto &MO : MI.operands()) {
723	if (!MO.isReg())
724	continue;
725	predRegNumDst = MO.getReg();
726	predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
727	if (predRegClass == &Hexagon::PredRegsRegClass)
728	break;
729	}
730	assert((predRegClass == &Hexagon::PredRegsRegClass) &&
731	"predicate register not found in a predicated MI instruction");
732
733	// New-value register producer and user (store) need to satisfy these
734	// constraints:
735	// 1) Both instructions should be predicated on the same register.
736	// 2) If producer of the new-value register is .new predicated then store
737	// should also be .new predicated and if producer is not .new predicated
738	// then store should not be .new predicated.
739	// 3) Both new-value register producer and user should have same predicate
740	// sense, i.e, either both should be negated or both should be non-negated.
741	if (predRegNumDst != predRegNumSrc \|\|
742	HII->isDotNewInst(MI: PacketMI) != HII->isDotNewInst(MI) \|\|
743	getPredicateSense(MI, HII) != getPredicateSense(MI: PacketMI, HII))
744	return false;
745	}
746
747	// Make sure that other than the new-value register no other store instruction
748	// register has been modified in the same packet. Predicate registers can be
749	// modified by they should not be modified between the producer and the store
750	// instruction as it will make them both conditional on different values.
751	// We already know this to be true for all the instructions before and
752	// including PacketMI. Howerver, we need to perform the check for the
753	// remaining instructions in the packet.
754
755	unsigned StartCheck = `0`;
756
757	for (auto *I : CurrentPacketMIs) {
758	SUnit *TempSU = MIToSUnit.find(x: I)->second;
759	MachineInstr &TempMI = *TempSU->getInstr();
760
761	// Following condition is true for all the instructions until PacketMI is
762	// reached (StartCheck is set to 0 before the for loop).
763	// StartCheck flag is 1 for all the instructions after PacketMI.
764	if (&TempMI != &PacketMI && !StartCheck) // Start processing only after
765	continue; // encountering PacketMI.
766
767	StartCheck = `1`;
768	if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence.
769	continue;
770
771	for (auto &MO : MI.operands())
772	if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
773	return false;
774	}
775
776	// Make sure that for non-POST_INC stores:
777	// 1. The only use of reg is DepReg and no other registers.
778	// This handles base+index registers.
779	// The following store can not be dot new.
780	// Eg. r0 = add(r0, #3)
781	// memw(r1+r0<<#2) = r0
782	if (!HII->isPostIncrement(MI)) {
783	for (unsigned opNum = `0`; opNum < MI.getNumOperands()-`1`; opNum++) {
784	const MachineOperand &MO = MI.getOperand(i: opNum);
785	if (MO.isReg() && MO.getReg() == DepReg)
786	return false;
787	}
788	}
789
790	// If data definition is because of implicit definition of the register,
791	// do not newify the store. Eg.
792	// %r9 = ZXTH %r12, implicit %d6, implicit-def %r12
793	// S2_storerh_io %r8, 2, killed %r12; mem:ST2[%scevgep343]
794	for (auto &MO : PacketMI.operands()) {
795	if (MO.isRegMask() && MO.clobbersPhysReg(PhysReg: DepReg))
796	return false;
797	if (!MO.isReg() \|\| !MO.isDef() \|\| !MO.isImplicit())
798	continue;
799	Register R = MO.getReg();
800	if (R == DepReg \|\| HRI->isSuperRegister(DepReg, R))
801	return false;
802	}
803
804	// Handle imp-use of super reg case. There is a target independent side
805	// change that should prevent this situation but I am handling it for
806	// just-in-case. For example, we cannot newify R2 in the following case:
807	// %r3 = A2_tfrsi 0;
808	// S2_storeri_io killed %r0, 0, killed %r2, implicit killed %d1;
809	for (auto &MO : MI.operands()) {
810	if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
811	return false;
812	}
813
814	// Can be dot new store.
815	return true;
816	}
817
818	// Can this MI to promoted to either new value store or new value jump.
819	bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
820	const SUnit PacketSU, unsigned* DepReg,
821	MachineBasicBlock::iterator &MII) {
822	if (!HII->mayBeNewStore(MI))
823	return false;
824
825	// Check to see the store can be new value'ed.
826	MachineInstr &PacketMI = *PacketSU->getInstr();
827	if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
828	return true;
829
830	// Check to see the compare/jump can be new value'ed.
831	// This is done as a pass on its own. Don't need to check it here.
832	return false;
833	}
834
835	static bool isImplicitDependency(const MachineInstr &I, bool CheckDef,
836	unsigned DepReg) {
837	for (auto &MO : I.operands()) {
838	if (CheckDef && MO.isRegMask() && MO.clobbersPhysReg(PhysReg: DepReg))
839	return true;
840	if (!MO.isReg() \|\| MO.getReg() != DepReg \|\| !MO.isImplicit())
841	continue;
842	if (CheckDef == MO.isDef())
843	return true;
844	}
845	return false;
846	}
847
848	// Check to see if an instruction can be dot new.
849	bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
850	const SUnit PacketSU, unsigned* DepReg, MachineBasicBlock::iterator &MII,
851	const TargetRegisterClass* RC) {
852	// Already a dot new instruction.
853	if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
854	return false;
855
856	if (!isNewifiable(MI, NewRC: RC))
857	return false;
858
859	const MachineInstr &PI = *PacketSU->getInstr();
860
861	// The "new value" cannot come from inline asm.
862	if (PI.isInlineAsm())
863	return false;
864
865	// IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
866	// sense.
867	if (PI.isImplicitDef())
868	return false;
869
870	// If dependency is trough an implicitly defined register, we should not
871	// newify the use.
872	if (isImplicitDependency(I: PI, CheckDef: true, DepReg) \|\|
873	isImplicitDependency(I: MI, CheckDef: false, DepReg))
874	return false;
875
876	const MCInstrDesc& MCID = PI.getDesc();
877	const TargetRegisterClass *VecRC = HII->getRegClass(MCID, `0`, HRI, MF);
878	if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
879	return false;
880
881	// predicate .new
882	if (RC == &Hexagon::PredRegsRegClass)
883	return HII->predCanBeUsedAsDotNew(MI: PI, PredReg: DepReg);
884
885	if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
886	return false;
887
888	// Create a dot new machine instruction to see if resources can be
889	// allocated. If not, bail out now.
890	int NewOpcode = (RC != &Hexagon::PredRegsRegClass) ? HII->getDotNewOp(MI) :
891	HII->getDotNewPredOp(MI, MBPI);
892	const MCInstrDesc &D = HII->get(NewOpcode);
893	MachineInstr *NewMI = MF.CreateMachineInstr(MCID: D, DL: DebugLoc ());
894	bool ResourcesAvailable = ResourceTracker->canReserveResources(MI&: *NewMI);
895	MF.deleteMachineInstr(MI: NewMI);
896	if (!ResourcesAvailable)
897	return false;
898
899	// New Value Store only. New Value Jump generated as a separate pass.
900	if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
901	return false;
902
903	return true;
904	}
905
906	// Go through the packet instructions and search for an anti dependency between
907	// them and DepReg from MI. Consider this case:
908	// Trying to add
909	// a) %r1 = TFRI_cdNotPt %p3, 2
910	// to this packet:
911	// {
912	// b) %p0 = C2_or killed %p3, killed %p0
913	// c) %p3 = C2_tfrrp %r23
914	// d) %r1 = C2_cmovenewit %p3, 4
915	// }
916	// The P3 from a) and d) will be complements after
917	// a)'s P3 is converted to .new form
918	// Anti-dep between c) and b) is irrelevant for this case
919	bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI,
920	unsigned DepReg) {
921	SUnit *PacketSUDep = MIToSUnit.find(x: &MI)->second;
922
923	for (auto *I : CurrentPacketMIs) {
924	// We only care for dependencies to predicated instructions
925	if (!HII->isPredicated(MI: *I))
926	continue;
927
928	// Scheduling Unit for current insn in the packet
929	SUnit *PacketSU = MIToSUnit.find(x: I)->second;
930
931	// Look at dependencies between current members of the packet and
932	// predicate defining instruction MI. Make sure that dependency is
933	// on the exact register we care about.
934	if (PacketSU->isSucc(N: PacketSUDep)) {
935	for (unsigned i = `0`; i < PacketSU->Succs.size(); ++i) {
936	auto &Dep = PacketSU->Succs [i];
937	if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
938	Dep.getReg() == DepReg)
939	return true;
940	}
941	}
942	}
943
944	return false;
945	}
946
947	/// Gets the predicate register of a predicated instruction.
948	static unsigned getPredicatedRegister(MachineInstr &MI,
949	const HexagonInstrInfo *QII) {
950	/// We use the following rule: The first predicate register that is a use is
951	/// the predicate register of a predicated instruction.
952	assert(QII->isPredicated(MI) && "Must be predicated instruction");
953
954	for (auto &Op : MI.operands()) {
955	if (Op.isReg() && Op.getReg() && Op.isUse() &&
956	Hexagon::PredRegsRegClass.contains(Op.getReg()))
957	return Op.getReg();
958	}
959
960	llvm_unreachable("Unknown instruction operand layout");
961	return `0`;
962	}
963
964	// Given two predicated instructions, this function detects whether
965	// the predicates are complements.
966	bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
967	MachineInstr &MI2) {
968	// If we don't know the predicate sense of the instructions bail out early, we
969	// need it later.
970	if (getPredicateSense(MI: MI1, HII) == PK_Unknown \|\|
971	getPredicateSense(MI: MI2, HII) == PK_Unknown)
972	return false;
973
974	// Scheduling unit for candidate.
975	SUnit *SU = MIToSUnit [&MI1];
976
977	// One corner case deals with the following scenario:
978	// Trying to add
979	// a) %r24 = A2_tfrt %p0, %r25
980	// to this packet:
981	// {
982	// b) %r25 = A2_tfrf %p0, %r24
983	// c) %p0 = C2_cmpeqi %r26, 1
984	// }
985	//
986	// On general check a) and b) are complements, but presence of c) will
987	// convert a) to .new form, and then it is not a complement.
988	// We attempt to detect it by analyzing existing dependencies in the packet.
989
990	// Analyze relationships between all existing members of the packet.
991	// Look for Anti dependecy on the same predicate reg as used in the
992	// candidate.
993	for (auto *I : CurrentPacketMIs) {
994	// Scheduling Unit for current insn in the packet.
995	SUnit *PacketSU = MIToSUnit.find(x: I)->second;
996
997	// If this instruction in the packet is succeeded by the candidate...
998	if (PacketSU->isSucc(N: SU)) {
999	for (unsigned i = `0`; i < PacketSU->Succs.size(); ++i) {
1000	auto Dep = PacketSU->Succs [i];
1001	// The corner case exist when there is true data dependency between
1002	// candidate and one of current packet members, this dep is on
1003	// predicate reg, and there already exist anti dep on the same pred in
1004	// the packet.
1005	if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
1006	Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
1007	// Here I know that I is predicate setting instruction with true
1008	// data dep to candidate on the register we care about - c) in the
1009	// above example. Now I need to see if there is an anti dependency
1010	// from c) to any other instruction in the same packet on the pred
1011	// reg of interest.
1012	if (restrictingDepExistInPacket(MI&: *I, DepReg: Dep.getReg()))
1013	return false;
1014	}
1015	}
1016	}
1017	}
1018
1019	// If the above case does not apply, check regular complement condition.
1020	// Check that the predicate register is the same and that the predicate
1021	// sense is different We also need to differentiate .old vs. .new: !p0
1022	// is not complementary to p0.new.
1023	unsigned PReg1 = getPredicatedRegister(MI&: MI1, QII: HII);
1024	unsigned PReg2 = getPredicatedRegister(MI&: MI2, QII: HII);
1025	return PReg1 == PReg2 &&
1026	Hexagon::PredRegsRegClass.contains(PReg1) &&
1027	Hexagon::PredRegsRegClass.contains(PReg2) &&
1028	getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
1029	HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
1030	}
1031
1032	// Initialize packetizer flags.
1033	void HexagonPacketizerList::initPacketizerState() {
1034	Dependence = false;
1035	PromotedToDotNew = false;
1036	GlueToNewValueJump = false;
1037	GlueAllocframeStore = false;
1038	FoundSequentialDependence = false;
1039	ChangedOffset = INT64_MAX;
1040	}
1041
1042	// Ignore bundling of pseudo instructions.
1043	bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
1044	const MachineBasicBlock *) {
1045	if (MI.isDebugInstr())
1046	return true;
1047
1048	if (MI.isCFIInstruction())
1049	return false;
1050
1051	// We must print out inline assembly.
1052	if (MI.isInlineAsm())
1053	return false;
1054
1055	if (MI.isImplicitDef())
1056	return false;
1057
1058	// We check if MI has any functional units mapped to it. If it doesn't,
1059	// we ignore the instruction.
1060	const MCInstrDesc& TID = MI.getDesc();
1061	auto *IS = ResourceTracker->getInstrItins()->beginStage(ItinClassIndx: TID.getSchedClass());
1062	return !IS->getUnits();
1063	}
1064
1065	bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
1066	// Ensure any bundles created by gather packetize remain separate.
1067	if (MI.isBundle())
1068	return true;
1069
1070	if (MI.isEHLabel() \|\| MI.isCFIInstruction())
1071	return true;
1072
1073	// Consider inline asm to not be a solo instruction by default.
1074	// Inline asm will be put in a packet temporarily, but then it will be
1075	// removed, and placed outside of the packet (before or after, depending
1076	// on dependencies). This is to reduce the impact of inline asm as a
1077	// "packet splitting" instruction.
1078	if (MI.isInlineAsm() && !ScheduleInlineAsm)
1079	return true;
1080
1081	if (isSchedBarrier(MI))
1082	return true;
1083
1084	if (HII->isSolo(MI))
1085	return true;
1086
1087	if (MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_ENTER \|\|
1088	MI.getOpcode() == Hexagon::PATCHABLE_FUNCTION_EXIT \|\|
1089	MI.getOpcode() == Hexagon::PATCHABLE_TAIL_CALL)
1090	return true;
1091
1092	if (MI.getOpcode() == Hexagon::A2_nop)
1093	return true;
1094
1095	return false;
1096	}
1097
1098	// Quick check if instructions MI and MJ cannot coexist in the same packet.
1099	// Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm",
1100	// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
1101	// For full test call this function twice:
1102	// cannotCoexistAsymm(MI, MJ) \|\| cannotCoexistAsymm(MJ, MI)
1103	// Doing the test only one way saves the amount of code in this function,
1104	// since every test would need to be repeated with the MI and MJ reversed.
1105	static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
1106	const HexagonInstrInfo &HII) {
1107	const MachineFunction *MF = MI.getParent()->getParent();
1108	if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() &&
1109	HII.isHVXMemWithAIndirect(I: MI, J: MJ))
1110	return true;
1111
1112	// Don't allow a store and an instruction that must be in slot0 and
1113	// doesn't allow a slot1 instruction.
1114	if (MI.mayStore() && HII.isRestrictNoSlot1Store(MI: MJ) && HII.isPureSlot0(MI: MJ))
1115	return true;
1116
1117	// An inline asm cannot be together with a branch, because we may not be
1118	// able to remove the asm out after packetizing (i.e. if the asm must be
1119	// moved past the bundle). Similarly, two asms cannot be together to avoid
1120	// complications when determining their relative order outside of a bundle.
1121	if (MI.isInlineAsm())
1122	return MJ.isInlineAsm() \|\| MJ.isBranch() \|\| MJ.isBarrier() \|\|
1123	MJ.isCall() \|\| MJ.isTerminator();
1124
1125	// New-value stores cannot coexist with any other stores.
1126	if (HII.isNewValueStore(MI) && MJ.mayStore())
1127	return true;
1128
1129	switch (MI.getOpcode()) {
1130	case Hexagon::S2_storew_locked:
1131	case Hexagon::S4_stored_locked:
1132	case Hexagon::L2_loadw_locked:
1133	case Hexagon::L4_loadd_locked:
1134	case Hexagon::Y2_dccleana:
1135	case Hexagon::Y2_dccleaninva:
1136	case Hexagon::Y2_dcinva:
1137	case Hexagon::Y2_dczeroa:
1138	case Hexagon::Y4_l2fetch:
1139	case Hexagon::Y5_l2fetch: {
1140	// These instructions can only be grouped with ALU32 or non-floating-point
1141	// XTYPE instructions. Since there is no convenient way of identifying fp
1142	// XTYPE instructions, only allow grouping with ALU32 for now.
1143	unsigned TJ = HII.getType(MI: MJ);
1144	if (TJ != HexagonII::TypeALU32_2op &&
1145	TJ != HexagonII::TypeALU32_3op &&
1146	TJ != HexagonII::TypeALU32_ADDI)
1147	return true;
1148	break;
1149	}
1150	default:
1151	break;
1152	}
1153
1154	// "False" really means that the quick check failed to determine if
1155	// I and J cannot coexist.
1156	return false;
1157	}
1158
1159	// Full, symmetric check.
1160	bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
1161	const MachineInstr &MJ) {
1162	return cannotCoexistAsymm(MI, MJ, HII: HII) \|\| cannotCoexistAsymm(MI: MJ, MJ: MI, HII: HII);
1163	}
1164
1165	void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
1166	for (auto &B : MF) {
1167	MachineBasicBlock::iterator BundleIt;
1168	for (MachineInstr &MI : llvm::make_early_inc_range(Range: B.instrs())) {
1169	if (MI.isBundle())
1170	BundleIt = MI.getIterator();
1171	if (!MI.isInsideBundle())
1172	continue;
1173
1174	// Decide on where to insert the instruction that we are pulling out.
1175	// Debug instructions always go before the bundle, but the placement of
1176	// INLINE_ASM depends on potential dependencies. By default, try to
1177	// put it before the bundle, but if the asm writes to a register that
1178	// other instructions in the bundle read, then we need to place it
1179	// after the bundle (to preserve the bundle semantics).
1180	bool InsertBeforeBundle;
1181	if (MI.isInlineAsm())
1182	InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI);
1183	else if (MI.isDebugInstr())
1184	InsertBeforeBundle = true;
1185	else
1186	continue;
1187
1188	BundleIt = moveInstrOut(MI, BundleIt, Before: InsertBeforeBundle);
1189	}
1190	}
1191	}
1192
1193	// Check if a given instruction is of class "system".
1194	static bool isSystemInstr(const MachineInstr &MI) {
1195	unsigned Opc = MI.getOpcode();
1196	switch (Opc) {
1197	case Hexagon::Y2_barrier:
1198	case Hexagon::Y2_dcfetchbo:
1199	case Hexagon::Y4_l2fetch:
1200	case Hexagon::Y5_l2fetch:
1201	return true;
1202	}
1203	return false;
1204	}
1205
1206	bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I,
1207	const MachineInstr &J) {
1208	// The dependence graph may not include edges between dead definitions,
1209	// so without extra checks, we could end up packetizing two instruction
1210	// defining the same (dead) register.
1211	if (I.isCall() \|\| J.isCall())
1212	return false;
1213	if (HII->isPredicated(MI: I) \|\| HII->isPredicated(MI: J))
1214	return false;
1215
1216	BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
1217	for (auto &MO : I.operands()) {
1218	if (!MO.isReg() \|\| !MO.isDef() \|\| !MO.isDead())
1219	continue;
1220	DeadDefs[MO.getReg()] = true;
1221	}
1222
1223	for (auto &MO : J.operands()) {
1224	if (!MO.isReg() \|\| !MO.isDef() \|\| !MO.isDead())
1225	continue;
1226	Register R = MO.getReg();
1227	if (R != Hexagon::USR_OVF && DeadDefs[R])
1228	return true;
1229	}
1230	return false;
1231	}
1232
1233	bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
1234	const MachineInstr &J) {
1235	// A save callee-save register function call can only be in a packet
1236	// with instructions that don't write to the callee-save registers.
1237	if ((HII->isSaveCalleeSavedRegsCall(MI: I) &&
1238	doesModifyCalleeSavedReg(J, HRI)) \|\|
1239	(HII->isSaveCalleeSavedRegsCall(MI: J) &&
1240	doesModifyCalleeSavedReg(I, HRI)))
1241	return true;
1242
1243	// Two control flow instructions cannot go in the same packet.
1244	if (isControlFlow(MI: I) && isControlFlow(MI: J))
1245	return true;
1246
1247	// \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
1248	// contain a speculative indirect jump,
1249	// a new-value compare jump or a dealloc_return.
1250	auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool {
1251	if (MI.isCall() \|\| HII->isDeallocRet(MI) \|\| HII->isNewValueJump(MI))
1252	return true;
1253	if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
1254	return true;
1255	return false;
1256	};
1257
1258	if (HII->isLoopN(MI: I) && isBadForLoopN (J))
1259	return true;
1260	if (HII->isLoopN(MI: J) && isBadForLoopN (I))
1261	return true;
1262
1263	// dealloc_return cannot appear in the same packet as a conditional or
1264	// unconditional jump.
1265	return HII->isDeallocRet(MI: I) &&
1266	(J.isBranch() \|\| J.isCall() \|\| J.isBarrier());
1267	}
1268
1269	bool HexagonPacketizerList::hasRegMaskDependence(const MachineInstr &I,
1270	const MachineInstr &J) {
1271	// Adding I to a packet that has J.
1272
1273	// Regmasks are not reflected in the scheduling dependency graph, so
1274	// we need to check them manually. This code assumes that regmasks only
1275	// occur on calls, and the problematic case is when we add an instruction
1276	// defining a register R to a packet that has a call that clobbers R via
1277	// a regmask. Those cannot be packetized together, because the call will
1278	// be executed last. That's also a reson why it is ok to add a call
1279	// clobbering R to a packet that defines R.
1280
1281	// Look for regmasks in J.
1282	for (const MachineOperand &OpJ : J.operands()) {
1283	if (!OpJ.isRegMask())
1284	continue;
1285	assert((J.isCall() \|\| HII->isTailCall(J)) && "Regmask on a non-call");
1286	for (const MachineOperand &OpI : I.operands()) {
1287	if (OpI.isReg()) {
1288	if (OpJ.clobbersPhysReg(PhysReg: OpI.getReg()))
1289	return true;
1290	} else if (OpI.isRegMask()) {
1291	// Both are regmasks. Assume that they intersect.
1292	return true;
1293	}
1294	}
1295	}
1296	return false;
1297	}
1298
1299	bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
1300	const MachineInstr &J) {
1301	bool SysI = isSystemInstr(MI: I), SysJ = isSystemInstr(MI: J);
1302	bool StoreI = I.mayStore(), StoreJ = J.mayStore();
1303	if ((SysI && StoreJ) \|\| (SysJ && StoreI))
1304	return true;
1305
1306	if (StoreI && StoreJ) {
1307	if (HII->isNewValueInst(MI: J) \|\| HII->isMemOp(MI: J) \|\| HII->isMemOp(MI: I))
1308	return true;
1309	} else {
1310	// A memop cannot be in the same packet with another memop or a store.
1311	// Two stores can be together, but here I and J cannot both be stores.
1312	bool MopStI = HII->isMemOp(MI: I) \|\| StoreI;
1313	bool MopStJ = HII->isMemOp(MI: J) \|\| StoreJ;
1314	if (MopStI && MopStJ)
1315	return true;
1316	}
1317
1318	return (StoreJ && HII->isDeallocRet(MI: I)) \|\| (StoreI && HII->isDeallocRet(MI: J));
1319	}
1320
1321	// SUI is the current instruction that is outside of the current packet.
1322	// SUJ is the current instruction inside the current packet against which that
1323	// SUI will be packetized.
1324	bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit SUI, SUnit SUJ) {
1325	assert(SUI->getInstr() && SUJ->getInstr());
1326	MachineInstr &I = *SUI->getInstr();
1327	MachineInstr &J = *SUJ->getInstr();
1328
1329	// Clear IgnoreDepMIs when Packet starts.
1330	if (CurrentPacketMIs.size() == `1`)
1331	IgnoreDepMIs.clear();
1332
1333	MachineBasicBlock::iterator II = I.getIterator();
1334
1335	// Solo instructions cannot go in the packet.
1336	assert(!isSoloInstruction(I) && "Unexpected solo instr!");
1337
1338	if (cannotCoexist(MI: I, MJ: J))
1339	return false;
1340
1341	Dependence = hasDeadDependence(I, J) \|\| hasControlDependence(I, J);
1342	if (Dependence)
1343	return false;
1344
1345	// Regmasks are not accounted for in the scheduling graph, so we need
1346	// to explicitly check for dependencies caused by them. They should only
1347	// appear on calls, so it's not too pessimistic to reject all regmask
1348	// dependencies.
1349	Dependence = hasRegMaskDependence(I, J);
1350	if (Dependence)
1351	return false;
1352
1353	// Dual-store does not allow second store, if the first store is not
1354	// in SLOT0. New value store, new value jump, dealloc_return and memop
1355	// always take SLOT0. Arch spec 3.4.4.2.
1356	Dependence = hasDualStoreDependence(I, J);
1357	if (Dependence)
1358	return false;
1359
1360	// If an instruction feeds new value jump, glue it.
1361	MachineBasicBlock::iterator NextMII = I.getIterator();
1362	++NextMII;
1363	if (NextMII != I.getParent()->end() && HII->isNewValueJump(MI: *NextMII)) {
1364	MachineInstr &NextMI = *NextMII;
1365
1366	bool secondRegMatch = false;
1367	const MachineOperand &NOp0 = NextMI.getOperand(i: `0`);
1368	const MachineOperand &NOp1 = NextMI.getOperand(i: `1`);
1369
1370	if (NOp1.isReg() && I.getOperand(i: `0`).getReg() == NOp1.getReg())
1371	secondRegMatch = true;
1372
1373	for (MachineInstr *PI : CurrentPacketMIs) {
1374	// NVJ can not be part of the dual jump - Arch Spec: section 7.8.
1375	if (PI->isCall()) {
1376	Dependence = true;
1377	break;
1378	}
1379	// Validate:
1380	// 1. Packet does not have a store in it.
1381	// 2. If the first operand of the nvj is newified, and the second
1382	// operand is also a reg, it (second reg) is not defined in
1383	// the same packet.
1384	// 3. If the second operand of the nvj is newified, (which means
1385	// first operand is also a reg), first reg is not defined in
1386	// the same packet.
1387	if (PI->getOpcode() == Hexagon::S2_allocframe \|\| PI->mayStore() \|\|
1388	HII->isLoopN(*PI)) {
1389	Dependence = true;
1390	break;
1391	}
1392	// Check #2/#3.
1393	const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
1394	if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
1395	Dependence = true;
1396	break;
1397	}
1398	}
1399
1400	GlueToNewValueJump = true;
1401	if (Dependence)
1402	return false;
1403	}
1404
1405	// There no dependency between a prolog instruction and its successor.
1406	if (!SUJ->isSucc(N: SUI))
1407	return true;
1408
1409	for (unsigned i = `0`; i < SUJ->Succs.size(); ++i) {
1410	if (FoundSequentialDependence)
1411	break;
1412
1413	if (SUJ->Succs [i].getSUnit() != SUI)
1414	continue;
1415
1416	SDep::Kind DepType = SUJ->Succs [i].getKind();
1417	// For direct calls:
1418	// Ignore register dependences for call instructions for packetization
1419	// purposes except for those due to r31 and predicate registers.
1420	//
1421	// For indirect calls:
1422	// Same as direct calls + check for true dependences to the register
1423	// used in the indirect call.
1424	//
1425	// We completely ignore Order dependences for call instructions.
1426	//
1427	// For returns:
1428	// Ignore register dependences for return instructions like jumpr,
1429	// dealloc return unless we have dependencies on the explicit uses
1430	// of the registers used by jumpr (like r31) or dealloc return
1431	// (like r29 or r30).
1432	unsigned DepReg = `0`;
1433	const TargetRegisterClass RC = nullptr*;
1434	if (DepType == SDep::Data) {
1435	DepReg = SUJ->Succs [i].getReg();
1436	RC = HRI->getMinimalPhysRegClass(DepReg);
1437	}
1438
1439	if (I.isCall() \|\| HII->isJumpR(MI: I) \|\| I.isReturn() \|\| HII->isTailCall(MI: I)) {
1440	if (!isRegDependence(DepType))
1441	continue;
1442	if (!isCallDependent(MI: I, DepType, DepReg: SUJ->Succs [i].getReg()))
1443	continue;
1444	}
1445
1446	if (DepType == SDep::Data) {
1447	if (canPromoteToDotCur(MI: J, PacketSU: SUJ, DepReg, MII&: II, RC))
1448	if (promoteToDotCur(MI&: J, DepType, MII&: II, RC))
1449	continue;
1450	}
1451
1452	// Data dpendence ok if we have load.cur.
1453	if (DepType == SDep::Data && HII->isDotCurInst(MI: J)) {
1454	if (HII->isHVXVec(MI: I))
1455	continue;
1456	}
1457
1458	// For instructions that can be promoted to dot-new, try to promote.
1459	if (DepType == SDep::Data) {
1460	if (canPromoteToDotNew(MI: I, PacketSU: SUJ, DepReg, MII&: II, RC)) {
1461	if (promoteToDotNew(MI&: I, DepType, MII&: II, RC)) {
1462	PromotedToDotNew = true;
1463	if (cannotCoexist(MI: I, MJ: J))
1464	FoundSequentialDependence = true;
1465	continue;
1466	}
1467	}
1468	if (HII->isNewValueJump(MI: I))
1469	continue;
1470	}
1471
1472	// For predicated instructions, if the predicates are complements then
1473	// there can be no dependence.
1474	if (HII->isPredicated(MI: I) && HII->isPredicated(MI: J) &&
1475	arePredicatesComplements(MI1&: I, MI2&: J)) {
1476	// Not always safe to do this translation.
1477	// DAG Builder attempts to reduce dependence edges using transitive
1478	// nature of dependencies. Here is an example:
1479	//
1480	// r0 = tfr_pt ... (1)
1481	// r0 = tfr_pf ... (2)
1482	// r0 = tfr_pt ... (3)
1483	//
1484	// There will be an output dependence between (1)->(2) and (2)->(3).
1485	// However, there is no dependence edge between (1)->(3). This results
1486	// in all 3 instructions going in the same packet. We ignore dependce
1487	// only once to avoid this situation.
1488	auto Itr = find(Range&: IgnoreDepMIs, Val: &J);
1489	if (Itr != IgnoreDepMIs.end()) {
1490	Dependence = true;
1491	return false;
1492	}
1493	IgnoreDepMIs.push_back(x: &I);
1494	continue;
1495	}
1496
1497	// Ignore Order dependences between unconditional direct branches
1498	// and non-control-flow instructions.
1499	if (isDirectJump(MI: I) && !J.isBranch() && !J.isCall() &&
1500	DepType == SDep::Order)
1501	continue;
1502
1503	// Ignore all dependences for jumps except for true and output
1504	// dependences.
1505	if (I.isConditionalBranch() && DepType != SDep::Data &&
1506	DepType != SDep::Output)
1507	continue;
1508
1509	if (DepType == SDep::Output) {
1510	FoundSequentialDependence = true;
1511	break;
1512	}
1513
1514	// For Order dependences:
1515	// 1. Volatile loads/stores can be packetized together, unless other
1516	// rules prevent is.
1517	// 2. Store followed by a load is not allowed.
1518	// 3. Store followed by a store is valid.
1519	// 4. Load followed by any memory operation is allowed.
1520	if (DepType == SDep::Order) {
1521	if (!PacketizeVolatiles) {
1522	bool OrdRefs = I.hasOrderedMemoryRef() \|\| J.hasOrderedMemoryRef();
1523	if (OrdRefs) {
1524	FoundSequentialDependence = true;
1525	break;
1526	}
1527	}
1528	// J is first, I is second.
1529	bool LoadJ = J.mayLoad(), StoreJ = J.mayStore();
1530	bool LoadI = I.mayLoad(), StoreI = I.mayStore();
1531	bool NVStoreJ = HII->isNewValueStore(MI: J);
1532	bool NVStoreI = HII->isNewValueStore(MI: I);
1533	bool IsVecJ = HII->isHVXVec(MI: J);
1534	bool IsVecI = HII->isHVXVec(MI: I);
1535
1536	// Don't reorder the loads if there is an order dependence. This would
1537	// occur if the first instruction must go in slot0.
1538	if (LoadJ && LoadI && HII->isPureSlot0(MI: J)) {
1539	FoundSequentialDependence = true;
1540	break;
1541	}
1542
1543	if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() &&
1544	((LoadJ && StoreI && !NVStoreI) \|\|
1545	(StoreJ && LoadI && !NVStoreJ)) &&
1546	(J.getOpcode() != Hexagon::S2_allocframe &&
1547	I.getOpcode() != Hexagon::S2_allocframe) &&
1548	(J.getOpcode() != Hexagon::L2_deallocframe &&
1549	I.getOpcode() != Hexagon::L2_deallocframe) &&
1550	(!HII->isMemOp(J) && !HII->isMemOp(I)) && (!IsVecJ && !IsVecI))
1551	setmemShufDisabled(true);
1552	else
1553	if (StoreJ && LoadI && alias(MI1: J, MI2: I)) {
1554	FoundSequentialDependence = true;
1555	break;
1556	}
1557
1558	if (!StoreJ)
1559	if (!LoadJ \|\| (!LoadI && !StoreI)) {
1560	// If J is neither load nor store, assume a dependency.
1561	// If J is a load, but I is neither, also assume a dependency.
1562	FoundSequentialDependence = true;
1563	break;
1564	}
1565	// Store followed by store: not OK on V2.
1566	// Store followed by load: not OK on all.
1567	// Load followed by store: OK on all.
1568	// Load followed by load: OK on all.
1569	continue;
1570	}
1571
1572	// Special case for ALLOCFRAME: even though there is dependency
1573	// between ALLOCFRAME and subsequent store, allow it to be packetized
1574	// in a same packet. This implies that the store is using the caller's
1575	// SP. Hence, offset needs to be updated accordingly.
1576	if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) {
1577	unsigned Opc = I.getOpcode();
1578	switch (Opc) {
1579	case Hexagon::S2_storerd_io:
1580	case Hexagon::S2_storeri_io:
1581	case Hexagon::S2_storerh_io:
1582	case Hexagon::S2_storerb_io:
1583	if (I.getOperand(i: `0`).getReg() == HRI->getStackRegister()) {
1584	// Since this store is to be glued with allocframe in the same
1585	// packet, it will use SP of the previous stack frame, i.e.
1586	// caller's SP. Therefore, we need to recalculate offset
1587	// according to this change.
1588	GlueAllocframeStore = useCallersSP(MI&: I);
1589	if (GlueAllocframeStore)
1590	continue;
1591	}
1592	break;
1593	default:
1594	break;
1595	}
1596	}
1597
1598	// There are certain anti-dependencies that cannot be ignored.
1599	// Specifically:
1600	// J2_call ... implicit-def %r0 ; SUJ
1601	// R0 = ... ; SUI
1602	// Those cannot be packetized together, since the call will observe
1603	// the effect of the assignment to R0.
1604	if ((DepType == SDep::Anti \|\| DepType == SDep::Output) && J.isCall()) {
1605	// Check if I defines any volatile register. We should also check
1606	// registers that the call may read, but these happen to be a
1607	// subset of the volatile register set.
1608	for (const MachineOperand &Op : I.operands()) {
1609	if (Op.isReg() && Op.isDef()) {
1610	Register R = Op.getReg();
1611	if (!J.readsRegister(R, HRI) && !J.modifiesRegister(R, HRI))
1612	continue;
1613	} else if (!Op.isRegMask()) {
1614	// If I has a regmask assume dependency.
1615	continue;
1616	}
1617	FoundSequentialDependence = true;
1618	break;
1619	}
1620	}
1621
1622	// Skip over remaining anti-dependences. Two instructions that are
1623	// anti-dependent can share a packet, since in most such cases all
1624	// operands are read before any modifications take place.
1625	// The exceptions are branch and call instructions, since they are
1626	// executed after all other instructions have completed (at least
1627	// conceptually).
1628	if (DepType != SDep::Anti) {
1629	FoundSequentialDependence = true;
1630	break;
1631	}
1632	}
1633
1634	if (FoundSequentialDependence) {
1635	Dependence = true;
1636	return false;
1637	}
1638
1639	return true;
1640	}
1641
1642	bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit SUI, SUnit SUJ) {
1643	assert(SUI->getInstr() && SUJ->getInstr());
1644	MachineInstr &I = *SUI->getInstr();
1645	MachineInstr &J = *SUJ->getInstr();
1646
1647	bool Coexist = !cannotCoexist(MI: I, MJ: J);
1648
1649	if (Coexist && !Dependence)
1650	return true;
1651
1652	// Check if the instruction was promoted to a dot-new. If so, demote it
1653	// back into a dot-old.
1654	if (PromotedToDotNew)
1655	demoteToDotOld(MI&: I);
1656
1657	cleanUpDotCur();
1658	// Check if the instruction (must be a store) was glued with an allocframe
1659	// instruction. If so, restore its offset to its original value, i.e. use
1660	// current SP instead of caller's SP.
1661	if (GlueAllocframeStore) {
1662	useCalleesSP(MI&: I);
1663	GlueAllocframeStore = false;
1664	}
1665
1666	if (ChangedOffset != INT64_MAX)
1667	undoChangedOffset(MI&: I);
1668
1669	if (GlueToNewValueJump) {
1670	// Putting I and J together would prevent the new-value jump from being
1671	// packetized with the producer. In that case I and J must be separated.
1672	GlueToNewValueJump = false;
1673	return false;
1674	}
1675
1676	if (!Coexist)
1677	return false;
1678
1679	if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) {
1680	FoundSequentialDependence = false;
1681	Dependence = false;
1682	return true;
1683	}
1684
1685	return false;
1686	}
1687
1688
1689	bool HexagonPacketizerList::foundLSInPacket() {
1690	bool FoundLoad = false;
1691	bool FoundStore = false;
1692
1693	for (auto *MJ : CurrentPacketMIs) {
1694	unsigned Opc = MJ->getOpcode();
1695	if (Opc == Hexagon::S2_allocframe \|\| Opc == Hexagon::L2_deallocframe)
1696	continue;
1697	if (HII->isMemOp(MI: *MJ))
1698	continue;
1699	if (MJ->mayLoad())
1700	FoundLoad = true;
1701	if (MJ->mayStore() && !HII->isNewValueStore(MI: *MJ))
1702	FoundStore = true;
1703	}
1704	return FoundLoad && FoundStore;
1705	}
1706
1707
1708	MachineBasicBlock::iterator
1709	HexagonPacketizerList::addToPacket(MachineInstr &MI) {
1710	MachineBasicBlock::iterator MII = MI.getIterator();
1711	MachineBasicBlock *MBB = MI.getParent();
1712
1713	if (CurrentPacketMIs.empty()) {
1714	PacketStalls = false;
1715	PacketStallCycles = `0`;
1716	}
1717	PacketStalls \|= producesStall(MI);
1718	PacketStallCycles = std::max(a: PacketStallCycles, b: calcStall(MI));
1719
1720	if (MI.isImplicitDef()) {
1721	// Add to the packet to allow subsequent instructions to be checked
1722	// properly.
1723	CurrentPacketMIs.push_back(x: &MI);
1724	return MII;
1725	}
1726	assert(ResourceTracker->canReserveResources(MI));
1727
1728	bool ExtMI = HII->isExtended(MI) \|\| HII->isConstExtended(MI);
1729	bool Good = true;
1730
1731	if (GlueToNewValueJump) {
1732	MachineInstr &NvjMI = *++MII;
1733	// We need to put both instructions in the same packet: MI and NvjMI.
1734	// Either of them can require a constant extender. Try to add both to
1735	// the current packet, and if that fails, end the packet and start a
1736	// new one.
1737	ResourceTracker->reserveResources(MI);
1738	if (ExtMI)
1739	Good = tryAllocateResourcesForConstExt(Reserve: true);
1740
1741	bool ExtNvjMI = HII->isExtended(MI: NvjMI) \|\| HII->isConstExtended(MI: NvjMI);
1742	if (Good) {
1743	if (ResourceTracker->canReserveResources(MI&: NvjMI))
1744	ResourceTracker->reserveResources(MI&: NvjMI);
1745	else
1746	Good = false;
1747	}
1748	if (Good && ExtNvjMI)
1749	Good = tryAllocateResourcesForConstExt(Reserve: true);
1750
1751	if (!Good) {
1752	endPacket(MBB, MI);
1753	assert(ResourceTracker->canReserveResources(MI));
1754	ResourceTracker->reserveResources(MI);
1755	if (ExtMI) {
1756	assert(canReserveResourcesForConstExt());
1757	tryAllocateResourcesForConstExt(Reserve: true);
1758	}
1759	assert(ResourceTracker->canReserveResources(NvjMI));
1760	ResourceTracker->reserveResources(MI&: NvjMI);
1761	if (ExtNvjMI) {
1762	assert(canReserveResourcesForConstExt());
1763	reserveResourcesForConstExt();
1764	}
1765	}
1766	CurrentPacketMIs.push_back(x: &MI);
1767	CurrentPacketMIs.push_back(x: &NvjMI);
1768	return MII;
1769	}
1770
1771	ResourceTracker->reserveResources(MI);
1772	if (ExtMI && !tryAllocateResourcesForConstExt(Reserve: true)) {
1773	endPacket(MBB, MI);
1774	if (PromotedToDotNew)
1775	demoteToDotOld(MI);
1776	if (GlueAllocframeStore) {
1777	useCalleesSP(MI);
1778	GlueAllocframeStore = false;
1779	}
1780	ResourceTracker->reserveResources(MI);
1781	reserveResourcesForConstExt();
1782	}
1783
1784	CurrentPacketMIs.push_back(x: &MI);
1785	return MII;
1786	}
1787
1788	void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
1789	MachineBasicBlock::iterator EndMI) {
1790	// Replace VLIWPacketizerList::endPacket(MBB, EndMI).
1791	LLVM_DEBUG({
1792	if (!CurrentPacketMIs.empty()) {
1793	dbgs() << "Finalizing packet:\n";
1794	unsigned Idx = `0`;
1795	for (MachineInstr *MI : CurrentPacketMIs) {
1796	unsigned R = ResourceTracker->getUsedResources(Idx++);
1797	dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
1798	}
1799	}
1800	});
1801
1802	bool memShufDisabled = getmemShufDisabled();
1803	if (memShufDisabled && !foundLSInPacket()) {
1804	setmemShufDisabled(false);
1805	LLVM_DEBUG(dbgs() << " Not added to NoShufPacket\n");
1806	}
1807	memShufDisabled = getmemShufDisabled();
1808
1809	OldPacketMIs.clear();
1810	for (MachineInstr *MI : CurrentPacketMIs) {
1811	MachineBasicBlock::instr_iterator NextMI = std::next(x: MI->getIterator());
1812	for (auto &I : make_range(x: HII->expandVGatherPseudo(MI&: *MI), y: NextMI))
1813	OldPacketMIs.push_back(x: &I);
1814	}
1815	CurrentPacketMIs.clear();
1816
1817	if (OldPacketMIs.size() > `1`) {
1818	MachineBasicBlock::instr_iterator FirstMI(OldPacketMIs.front());
1819	MachineBasicBlock::instr_iterator LastMI(EndMI.getInstrIterator());
1820	finalizeBundle(MBB&: *MBB, FirstMI, LastMI);
1821	auto BundleMII = std::prev(x: FirstMI);
1822	if (memShufDisabled)
1823	HII->setBundleNoShuf(BundleMII);
1824
1825	setmemShufDisabled(false);
1826	}
1827
1828	PacketHasDuplex = false;
1829	PacketHasSLOT0OnlyInsn = false;
1830	ResourceTracker->clearResources();
1831	LLVM_DEBUG(dbgs() << "End packet\n");
1832	}
1833
1834	bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
1835	if (Minimal)
1836	return false;
1837
1838	if (producesStall(MI))
1839	return false;
1840
1841	// If TinyCore with Duplexes is enabled, check if this MI can form a Duplex
1842	// with any other instruction in the existing packet.
1843	auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
1844	// Constraint 1: Only one duplex allowed per packet.
1845	// Constraint 2: Consider duplex checks only if there is atleast one
1846	// instruction in a packet.
1847	// Constraint 3: If one of the existing instructions in the packet has a
1848	// SLOT0 only instruction that can not be duplexed, do not attempt to form
1849	// duplexes. (TODO: This will invalidate the L4_return instructions to form a*
1850	// duplex)
1851	if (HST.isTinyCoreWithDuplex() && CurrentPacketMIs.size() > `0` &&
1852	!PacketHasDuplex) {
1853	// Check for SLOT0 only non-duplexable instruction in packet.
1854	for (auto &MJ : CurrentPacketMIs)
1855	PacketHasSLOT0OnlyInsn \|= HII->isPureSlot0(MI: *MJ);
1856	// Get the Big Core Opcode (dup_).*
1857	int Opcode = HII->getDuplexOpcode(MI, ForBigCore: false);
1858	if (Opcode >= `0`) {
1859	// We now have an instruction that can be duplexed.
1860	for (auto &MJ : CurrentPacketMIs) {
1861	if (HII->isDuplexPair(MIa: MI, MIb: *MJ) && !PacketHasSLOT0OnlyInsn) {
1862	PacketHasDuplex = true;
1863	return true;
1864	}
1865	}
1866	// If it can not be duplexed, check if there is a valid transition in DFA
1867	// with the original opcode.
1868	MachineInstr &MIRef = const_cast<MachineInstr &>(MI);
1869	MIRef.setDesc(HII->get(Opcode));
1870	return ResourceTracker->canReserveResources(MI&: MIRef);
1871	}
1872	}
1873
1874	return true;
1875	}
1876
1877	// V60 forward scheduling.
1878	unsigned int HexagonPacketizerList::calcStall(const MachineInstr &I) {
1879	// Check whether the previous packet is in a different loop. If this is the
1880	// case, there is little point in trying to avoid a stall because that would
1881	// favor the rare case (loop entry) over the common case (loop iteration).
1882	//
1883	// TODO: We should really be able to check all the incoming edges if this is
1884	// the first packet in a basic block, so we can avoid stalls from the loop
1885	// backedge.
1886	if (!OldPacketMIs.empty()) {
1887	auto *OldBB = OldPacketMIs.front()->getParent();
1888	auto *ThisBB = I.getParent();
1889	if (MLI->getLoopFor(BB: OldBB) != MLI->getLoopFor(BB: ThisBB))
1890	return `0`;
1891	}
1892
1893	SUnit SUI = MIToSUnit [const_cast<MachineInstr >(&I)];
1894	if (!SUI)
1895	return `0`;
1896
1897	// If the latency is 0 and there is a data dependence between this
1898	// instruction and any instruction in the current packet, we disregard any
1899	// potential stalls due to the instructions in the previous packet. Most of
1900	// the instruction pairs that can go together in the same packet have 0
1901	// latency between them. The exceptions are
1902	// 1. NewValueJumps as they're generated much later and the latencies can't
1903	// be changed at that point.
1904	// 2. .cur instructions, if its consumer has a 0 latency successor (such as
1905	// .new). In this case, the latency between .cur and the consumer stays
1906	// non-zero even though we can have both .cur and .new in the same packet.
1907	// Changing the latency to 0 is not an option as it causes software pipeliner
1908	// to not pipeline in some cases.
1909
1910	// For Example:
1911	// {
1912	// I1: v6.cur = vmem(r0++#1)
1913	// I2: v7 = valign(v6,v4,r2)
1914	// I3: vmem(r5++#1) = v7.new
1915	// }
1916	// Here I2 and I3 has 0 cycle latency, but I1 and I2 has 2.
1917
1918	for (auto *J : CurrentPacketMIs) {
1919	SUnit *SUJ = MIToSUnit [J];
1920	for (auto &Pred : SUI->Preds)
1921	if (Pred.getSUnit() == SUJ)
1922	if ((Pred.getLatency() == `0` && Pred.isAssignedRegDep()) \|\|
1923	HII->isNewValueJump(MI: I) \|\| HII->isToBeScheduledASAP(MI1: *J, MI2: I))
1924	return `0`;
1925	}
1926
1927	// Check if the latency is greater than one between this instruction and any
1928	// instruction in the previous packet.
1929	for (auto *J : OldPacketMIs) {
1930	SUnit *SUJ = MIToSUnit [J];
1931	for (auto &Pred : SUI->Preds)
1932	if (Pred.getSUnit() == SUJ && Pred.getLatency() > `1`)
1933	return Pred.getLatency();
1934	}
1935
1936	return `0`;
1937	}
1938
1939	bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
1940	unsigned int Latency = calcStall(I);
1941	if (Latency == `0`)
1942	return false;
1943	// Ignore stall unless it stalls more than previous instruction in packet
1944	if (PacketStalls)
1945	return Latency > PacketStallCycles;
1946	return true;
1947	}
1948
1949	//===----------------------------------------------------------------------===//
1950	// Public Constructor Functions
1951	//===----------------------------------------------------------------------===//
1952
1953	FunctionPass llvm::createHexagonPacketizer(bool* Minimal) {
1954	return new HexagonPacketizer (Minimal);
1955	}
1956

source code of llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp