RISCVInstrInfo.cpp source code [llvm/lib/Target/RISCV/RISCVInstrInfo.cpp]

1	//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information ------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains the RISC-V implementation of the TargetInstrInfo class.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "RISCVInstrInfo.h"
14	#include "MCTargetDesc/RISCVMatInt.h"
15	#include "RISCV.h"
16	#include "RISCVMachineFunctionInfo.h"
17	#include "RISCVSubtarget.h"
18	#include "RISCVTargetMachine.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/ADT/SmallVector.h"
21	#include "llvm/Analysis/MemoryLocation.h"
22	#include "llvm/Analysis/ValueTracking.h"
23	#include "llvm/CodeGen/LiveIntervals.h"
24	#include "llvm/CodeGen/LiveVariables.h"
25	#include "llvm/CodeGen/MachineCombinerPattern.h"
26	#include "llvm/CodeGen/MachineFunctionPass.h"
27	#include "llvm/CodeGen/MachineInstrBuilder.h"
28	#include "llvm/CodeGen/MachineRegisterInfo.h"
29	#include "llvm/CodeGen/MachineTraceMetrics.h"
30	#include "llvm/CodeGen/RegisterScavenging.h"
31	#include "llvm/CodeGen/StackMaps.h"
32	#include "llvm/IR/DebugInfoMetadata.h"
33	#include "llvm/MC/MCInstBuilder.h"
34	#include "llvm/MC/TargetRegistry.h"
35	#include "llvm/Support/ErrorHandling.h"
36
37	using namespace llvm;
38
39	#define GEN_CHECK_COMPRESS_INSTR
40	#include "RISCVGenCompressInstEmitter.inc"
41
42	#define GET_INSTRINFO_CTOR_DTOR
43	#define GET_INSTRINFO_NAMED_OPS
44	#include "RISCVGenInstrInfo.inc"
45
46	static cl::opt<bool> PreferWholeRegisterMove(
47	"riscv-prefer-whole-register-move", cl::init(Val: false), cl::Hidden,
48	cl::desc ("Prefer whole register move for vector registers."));
49
50	static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
51	"riscv-force-machine-combiner-strategy", cl::Hidden,
52	cl::desc ("Force machine combiner to use a specific strategy for machine "
53	"trace metrics evaluation."),
54	cl::init(Val: MachineTraceStrategy::TS_NumStrategies),
55	cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
56	"Local strategy."),
57	clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
58	"MinInstrCount strategy.")));
59
60	namespace llvm::RISCVVPseudosTable {
61
62	using namespace RISCV;
63
64	#define GET_RISCVVPseudosTable_IMPL
65	#include "RISCVGenSearchableTables.inc"
66
67	} // namespace llvm::RISCVVPseudosTable
68
69	namespace llvm::RISCV {
70
71	#define GET_RISCVMaskedPseudosTable_IMPL
72	#include "RISCVGenSearchableTables.inc"
73
74	} // end namespace llvm::RISCV
75
76	RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
77	: RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
78	STI(STI) {}
79
80	MCInst RISCVInstrInfo::getNop() const {
81	if (STI.hasStdExtCOrZca())
82	return MCInstBuilder(RISCV::C_NOP);
83	return MCInstBuilder(RISCV::ADDI)
84	.addReg(RISCV::X0)
85	.addReg(RISCV::X0)
86	.addImm(`0`);
87	}
88
89	Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
90	int &FrameIndex) const {
91	unsigned Dummy;
92	return isLoadFromStackSlot(MI, FrameIndex, MemBytes&: Dummy);
93	}
94
95	Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
96	int &FrameIndex,
97	unsigned &MemBytes) const {
98	switch (MI.getOpcode()) {
99	default:
100	return `0`;
101	case RISCV::LB:
102	case RISCV::LBU:
103	MemBytes = `1`;
104	break;
105	case RISCV::LH:
106	case RISCV::LHU:
107	case RISCV::FLH:
108	MemBytes = `2`;
109	break;
110	case RISCV::LW:
111	case RISCV::FLW:
112	case RISCV::LWU:
113	MemBytes = `4`;
114	break;
115	case RISCV::LD:
116	case RISCV::FLD:
117	MemBytes = `8`;
118	break;
119	}
120
121	if (MI.getOperand(i: `1`).isFI() && MI.getOperand(i: `2`).isImm() &&
122	MI.getOperand(i: `2`).getImm() == `0`) {
123	FrameIndex = MI.getOperand(i: `1`).getIndex();
124	return MI.getOperand(i: `0`).getReg();
125	}
126
127	return `0`;
128	}
129
130	Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
131	int &FrameIndex) const {
132	unsigned Dummy;
133	return isStoreToStackSlot(MI, FrameIndex, MemBytes&: Dummy);
134	}
135
136	Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
137	int &FrameIndex,
138	unsigned &MemBytes) const {
139	switch (MI.getOpcode()) {
140	default:
141	return `0`;
142	case RISCV::SB:
143	MemBytes = `1`;
144	break;
145	case RISCV::SH:
146	case RISCV::FSH:
147	MemBytes = `2`;
148	break;
149	case RISCV::SW:
150	case RISCV::FSW:
151	MemBytes = `4`;
152	break;
153	case RISCV::SD:
154	case RISCV::FSD:
155	MemBytes = `8`;
156	break;
157	}
158
159	if (MI.getOperand(i: `1`).isFI() && MI.getOperand(i: `2`).isImm() &&
160	MI.getOperand(i: `2`).getImm() == `0`) {
161	FrameIndex = MI.getOperand(i: `1`).getIndex();
162	return MI.getOperand(i: `0`).getReg();
163	}
164
165	return `0`;
166	}
167
168	static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
169	unsigned NumRegs) {
170	return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
171	}
172
173	static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
174	const MachineBasicBlock &MBB,
175	MachineBasicBlock::const_iterator MBBI,
176	MachineBasicBlock::const_iterator &DefMBBI,
177	RISCVII::VLMUL LMul) {
178	if (PreferWholeRegisterMove)
179	return false;
180
181	assert(MBBI ->getOpcode() == TargetOpcode::COPY &&
182	"Unexpected COPY instruction.");
183	Register SrcReg = MBBI ->getOperand(i: `1`).getReg();
184	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
185
186	bool FoundDef = false;
187	bool FirstVSetVLI = false;
188	unsigned FirstSEW = `0`;
189	while (MBBI != MBB.begin()) {
190	--MBBI;
191	if (MBBI ->isMetaInstruction())
192	continue;
193
194	if (MBBI ->getOpcode() == RISCV::PseudoVSETVLI \|\|
195	MBBI ->getOpcode() == RISCV::PseudoVSETVLIX0 \|\|
196	MBBI ->getOpcode() == RISCV::PseudoVSETIVLI) {
197	// There is a vsetvli between COPY and source define instruction.
198	// vy = def_vop ... (producing instruction)
199	// ...
200	// vsetvli
201	// ...
202	// vx = COPY vy
203	if (!FoundDef) {
204	if (!FirstVSetVLI) {
205	FirstVSetVLI = true;
206	unsigned FirstVType = MBBI ->getOperand(i: `2`).getImm();
207	RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(VType: FirstVType);
208	FirstSEW = RISCVVType::getSEW(VType: FirstVType);
209	// The first encountered vsetvli must have the same lmul as the
210	// register class of COPY.
211	if (FirstLMul != LMul)
212	return false;
213	}
214	// Only permit `vsetvli x0, x0, vtype` between COPY and the source
215	// define instruction.
216	if (MBBI ->getOperand(i: `0`).getReg() != RISCV::X0)
217	return false;
218	if (MBBI ->getOperand(i: `1`).isImm())
219	return false;
220	if (MBBI ->getOperand(i: `1`).getReg() != RISCV::X0)
221	return false;
222	continue;
223	}
224
225	// MBBI is the first vsetvli before the producing instruction.
226	unsigned VType = MBBI ->getOperand(i: `2`).getImm();
227	// If there is a vsetvli between COPY and the producing instruction.
228	if (FirstVSetVLI) {
229	// If SEW is different, return false.
230	if (RISCVVType::getSEW(VType) != FirstSEW)
231	return false;
232	}
233
234	// If the vsetvli is tail undisturbed, keep the whole register move.
235	if (!RISCVVType::isTailAgnostic(VType))
236	return false;
237
238	// The checking is conservative. We only have register classes for
239	// LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
240	// for fractional LMUL operations. However, we could not use the vsetvli
241	// lmul for widening operations. The result of widening operation is
242	// 2 x LMUL.
243	return LMul == RISCVVType::getVLMUL(VType);
244	} else if (MBBI ->isInlineAsm() \|\| MBBI ->isCall()) {
245	return false;
246	} else if (MBBI ->getNumDefs()) {
247	// Check all the instructions which will change VL.
248	// For example, vleff has implicit def VL.
249	if (MBBI ->modifiesRegister(RISCV::Reg: VL, /TRI=/nullptr))
250	return false;
251
252	// Only converting whole register copies to vmv.v.v when the defining
253	// value appears in the explicit operands.
254	for (const MachineOperand &MO : MBBI ->explicit_operands()) {
255	if (!MO.isReg() \|\| !MO.isDef())
256	continue;
257	if (!FoundDef && TRI->regsOverlap(RegA: MO.getReg(), RegB: SrcReg)) {
258	// We only permit the source of COPY has the same LMUL as the defined
259	// operand.
260	// There are cases we need to keep the whole register copy if the LMUL
261	// is different.
262	// For example,
263	// $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
264	// $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
265	// # The COPY may be created by vlmul_trunc intrinsic.
266	// $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
267	//
268	// After widening, the valid value will be 4 x e32 elements. If we
269	// convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
270	// FIXME: The COPY of subregister of Zvlsseg register will not be able
271	// to convert to vmv.v.[v\|i] under the constraint.
272	if (MO.getReg() != SrcReg)
273	return false;
274
275	// In widening reduction instructions with LMUL_1 input vector case,
276	// only checking the LMUL is insufficient due to reduction result is
277	// always LMUL_1.
278	// For example,
279	// $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
280	// $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
281	// $v26 = COPY killed renamable $v8
282	// After widening, The valid value will be 1 x e16 elements. If we
283	// convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
284	uint64_t TSFlags = MBBI ->getDesc().TSFlags;
285	if (RISCVII::isRVVWideningReduction(TSFlags))
286	return false;
287
288	// If the producing instruction does not depend on vsetvli, do not
289	// convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
290	if (!RISCVII::hasSEWOp(TSFlags) \|\| !RISCVII::hasVLOp(TSFlags))
291	return false;
292
293	// Found the definition.
294	FoundDef = true;
295	DefMBBI = MBBI;
296	break;
297	}
298	}
299	}
300	}
301
302	return false;
303	}
304
305	void RISCVInstrInfo::copyPhysRegVector(
306	MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
307	const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
308	const TargetRegisterClass RegClass) const* {
309	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
310	RISCVII::VLMUL LMul = RISCVRI::getLMul(TSFlags: RegClass->TSFlags);
311	unsigned NF = RISCVRI::getNF(TSFlags: RegClass->TSFlags);
312
313	uint16_t SrcEncoding = TRI->getEncodingValue(RegNo: SrcReg);
314	uint16_t DstEncoding = TRI->getEncodingValue(RegNo: DstReg);
315	auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(VLMUL: LMul);
316	assert(!Fractional && "It is impossible be fractional lmul here.");
317	unsigned NumRegs = NF * LMulVal;
318	bool ReversedCopy =
319	forwardCopyWillClobberTuple(DstReg: DstEncoding, SrcReg: SrcEncoding, NumRegs);
320	if (ReversedCopy) {
321	// If the src and dest overlap when copying a tuple, we need to copy the
322	// registers in reverse.
323	SrcEncoding += NumRegs - `1`;
324	DstEncoding += NumRegs - `1`;
325	}
326
327	unsigned I = `0`;
328	auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
329	-> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
330	unsigned, unsigned> {
331	if (ReversedCopy) {
332	// For reversed copying, if there are enough aligned registers(8/4/2), we
333	// can do a larger copy(LMUL8/4/2).
334	// Besides, we have already known that DstEncoding is larger than
335	// SrcEncoding in forwardCopyWillClobberTuple, so the difference between
336	// DstEncoding and SrcEncoding should be >= LMUL value we try to use to
337	// avoid clobbering.
338	uint16_t Diff = DstEncoding - SrcEncoding;
339	if (I + `8` <= NumRegs && Diff >= `8` && SrcEncoding % `8` == `7` &&
340	DstEncoding % `8` == `7`)
341	return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
342	RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
343	if (I + `4` <= NumRegs && Diff >= `4` && SrcEncoding % `4` == `3` &&
344	DstEncoding % `4` == `3`)
345	return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
346	RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
347	if (I + `2` <= NumRegs && Diff >= `2` && SrcEncoding % `2` == `1` &&
348	DstEncoding % `2` == `1`)
349	return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
350	RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
351	// Or we should do LMUL1 copying.
352	return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
353	RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
354	}
355
356	// For forward copying, if source register encoding and destination register
357	// encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
358	if (I + `8` <= NumRegs && SrcEncoding % `8` == `0` && DstEncoding % `8` == `0`)
359	return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
360	RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
361	if (I + `4` <= NumRegs && SrcEncoding % `4` == `0` && DstEncoding % `4` == `0`)
362	return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
363	RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
364	if (I + `2` <= NumRegs && SrcEncoding % `2` == `0` && DstEncoding % `2` == `0`)
365	return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
366	RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
367	// Or we should do LMUL1 copying.
368	return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
369	RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
370	};
371	auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
372	uint16_t Encoding) {
373	MCRegister Reg = RISCV::V0 + Encoding;
374	if (&RegClass == &RISCV::VRRegClass)
375	return Reg;
376	return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
377	};
378	while (I != NumRegs) {
379	// For non-segment copying, we only do this once as the registers are always
380	// aligned.
381	// For segment copying, we may do this several times. If the registers are
382	// aligned to larger LMUL, we can eliminate some copyings.
383	auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
384	GetCopyInfo (SrcEncoding, DstEncoding);
385	auto [NumCopied, _] = RISCVVType::decodeVLMUL(VLMUL: LMulCopied);
386
387	MachineBasicBlock::const_iterator DefMBBI;
388	if (LMul == LMulCopied &&
389	isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
390	Opc = VVOpc;
391	if (DefMBBI->getOpcode() == VIOpc)
392	Opc = VIOpc;
393	}
394
395	// Emit actual copying.
396	// For reversed copying, the encoding should be decreased.
397	MCRegister ActualSrcReg = FindRegWithEncoding(
398	RegClass, ReversedCopy ? (SrcEncoding - NumCopied + `1`) : SrcEncoding);
399	MCRegister ActualDstReg = FindRegWithEncoding(
400	RegClass, ReversedCopy ? (DstEncoding - NumCopied + `1`) : DstEncoding);
401
402	auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
403	bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
404	bool UseVMV = UseVMV_V_I \|\| RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
405	if (UseVMV)
406	MIB.addReg(ActualDstReg, RegState::Undef);
407	if (UseVMV_V_I)
408	MIB = MIB.add(DefMBBI ->getOperand(i: `2`));
409	else
410	MIB = MIB.addReg(ActualSrcReg, getKillRegState(B: KillSrc));
411	if (UseVMV) {
412	const MCInstrDesc &Desc = DefMBBI ->getDesc();
413	MIB.add(DefMBBI ->getOperand(i: RISCVII::getVLOpNum(Desc))); // AVL
414	MIB.add(DefMBBI ->getOperand(i: RISCVII::getSEWOpNum(Desc))); // SEW
415	MIB.addImm(`0`); // tu, mu
416	MIB.addReg(RISCV::VL, RegState::Implicit);
417	MIB.addReg(RISCV::VTYPE, RegState::Implicit);
418	}
419
420	// If we are copying reversely, we should decrease the encoding.
421	SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
422	DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
423	I += NumCopied;
424	}
425	}
426
427	void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
428	MachineBasicBlock::iterator MBBI,
429	const DebugLoc &DL, MCRegister DstReg,
430	MCRegister SrcReg, bool KillSrc) const {
431	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
432
433	if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
434	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
435	.addReg(SrcReg, getKillRegState(KillSrc))
436	.addImm(`0`);
437	return;
438	}
439
440	if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
441	// Emit an ADDI for both parts of GPRPair.
442	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
443	TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
444	.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
445	getKillRegState(KillSrc))
446	.addImm(`0`);
447	BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
448	TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
449	.addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
450	getKillRegState(KillSrc))
451	.addImm(`0`);
452	return;
453	}
454
455	// Handle copy from csr
456	if (RISCV::VCSRRegClass.contains(SrcReg) &&
457	RISCV::GPRRegClass.contains(DstReg)) {
458	BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
459	.addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
460	.addReg(RISCV::X0);
461	return;
462	}
463
464	if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
465	unsigned Opc;
466	if (STI.hasStdExtZfh()) {
467	Opc = RISCV::FSGNJ_H;
468	} else {
469	assert(STI.hasStdExtF() &&
470	(STI.hasStdExtZfhmin() \|\| STI.hasStdExtZfbfmin()) &&
471	"Unexpected extensions");
472	// Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
473	DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
474	&RISCV::FPR32RegClass);
475	SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
476	&RISCV::FPR32RegClass);
477	Opc = RISCV::FSGNJ_S;
478	}
479	BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
480	.addReg(SrcReg, getKillRegState(B: KillSrc))
481	.addReg(SrcReg, getKillRegState(B: KillSrc));
482	return;
483	}
484
485	if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
486	BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
487	.addReg(SrcReg, getKillRegState(KillSrc))
488	.addReg(SrcReg, getKillRegState(KillSrc));
489	return;
490	}
491
492	if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
493	BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
494	.addReg(SrcReg, getKillRegState(KillSrc))
495	.addReg(SrcReg, getKillRegState(KillSrc));
496	return;
497	}
498
499	if (RISCV::FPR32RegClass.contains(DstReg) &&
500	RISCV::GPRRegClass.contains(SrcReg)) {
501	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
502	.addReg(SrcReg, getKillRegState(KillSrc));
503	return;
504	}
505
506	if (RISCV::GPRRegClass.contains(DstReg) &&
507	RISCV::FPR32RegClass.contains(SrcReg)) {
508	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
509	.addReg(SrcReg, getKillRegState(KillSrc));
510	return;
511	}
512
513	if (RISCV::FPR64RegClass.contains(DstReg) &&
514	RISCV::GPRRegClass.contains(SrcReg)) {
515	assert(STI.getXLen() == `64` && "Unexpected GPR size");
516	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
517	.addReg(SrcReg, getKillRegState(KillSrc));
518	return;
519	}
520
521	if (RISCV::GPRRegClass.contains(DstReg) &&
522	RISCV::FPR64RegClass.contains(SrcReg)) {
523	assert(STI.getXLen() == `64` && "Unexpected GPR size");
524	BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
525	.addReg(SrcReg, getKillRegState(KillSrc));
526	return;
527	}
528
529	// VR->VR copies.
530	static const TargetRegisterClass *RVVRegClasses[] = {
531	&RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
532	&RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
533	&RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
534	&RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
535	&RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
536	for (const auto &RegClass : RVVRegClasses) {
537	if (RegClass->contains(DstReg, SrcReg)) {
538	copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
539	return;
540	}
541	}
542
543	llvm_unreachable("Impossible reg-to-reg copy");
544	}
545
546	void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
547	MachineBasicBlock::iterator I,
548	Register SrcReg, bool IsKill, int FI,
549	const TargetRegisterClass *RC,
550	const TargetRegisterInfo *TRI,
551	Register VReg) const {
552	MachineFunction *MF = MBB.getParent();
553	MachineFrameInfo &MFI = MF->getFrameInfo();
554
555	unsigned Opcode;
556	bool IsScalableVector = true;
557	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
558	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == `32` ?
559	RISCV::SW : RISCV::SD;
560	IsScalableVector = false;
561	} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
562	Opcode = RISCV::PseudoRV32ZdinxSD;
563	IsScalableVector = false;
564	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
565	Opcode = RISCV::FSH;
566	IsScalableVector = false;
567	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
568	Opcode = RISCV::FSW;
569	IsScalableVector = false;
570	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
571	Opcode = RISCV::FSD;
572	IsScalableVector = false;
573	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
574	Opcode = RISCV::VS1R_V;
575	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
576	Opcode = RISCV::VS2R_V;
577	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
578	Opcode = RISCV::VS4R_V;
579	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
580	Opcode = RISCV::VS8R_V;
581	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
582	Opcode = RISCV::PseudoVSPILL2_M1;
583	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
584	Opcode = RISCV::PseudoVSPILL2_M2;
585	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
586	Opcode = RISCV::PseudoVSPILL2_M4;
587	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
588	Opcode = RISCV::PseudoVSPILL3_M1;
589	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
590	Opcode = RISCV::PseudoVSPILL3_M2;
591	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
592	Opcode = RISCV::PseudoVSPILL4_M1;
593	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
594	Opcode = RISCV::PseudoVSPILL4_M2;
595	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
596	Opcode = RISCV::PseudoVSPILL5_M1;
597	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
598	Opcode = RISCV::PseudoVSPILL6_M1;
599	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
600	Opcode = RISCV::PseudoVSPILL7_M1;
601	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
602	Opcode = RISCV::PseudoVSPILL8_M1;
603	else
604	llvm_unreachable("Can't store this register to stack slot");
605
606	if (IsScalableVector) {
607	MachineMemOperand *MMO = MF->getMachineMemOperand(
608	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
609	Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
610
611	MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
612	BuildMI(MBB, I, DebugLoc (), get(Opcode))
613	.addReg(SrcReg, getKillRegState(B: IsKill))
614	.addFrameIndex(FI)
615	.addMemOperand(MMO);
616	} else {
617	MachineMemOperand *MMO = MF->getMachineMemOperand(
618	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
619	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
620
621	BuildMI(MBB, I, DebugLoc (), get(Opcode))
622	.addReg(SrcReg, getKillRegState(B: IsKill))
623	.addFrameIndex(FI)
624	.addImm(`0`)
625	.addMemOperand(MMO);
626	}
627	}
628
629	void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
630	MachineBasicBlock::iterator I,
631	Register DstReg, int FI,
632	const TargetRegisterClass *RC,
633	const TargetRegisterInfo *TRI,
634	Register VReg) const {
635	MachineFunction *MF = MBB.getParent();
636	MachineFrameInfo &MFI = MF->getFrameInfo();
637
638	unsigned Opcode;
639	bool IsScalableVector = true;
640	if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
641	Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == `32` ?
642	RISCV::LW : RISCV::LD;
643	IsScalableVector = false;
644	} else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
645	Opcode = RISCV::PseudoRV32ZdinxLD;
646	IsScalableVector = false;
647	} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
648	Opcode = RISCV::FLH;
649	IsScalableVector = false;
650	} else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
651	Opcode = RISCV::FLW;
652	IsScalableVector = false;
653	} else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
654	Opcode = RISCV::FLD;
655	IsScalableVector = false;
656	} else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
657	Opcode = RISCV::VL1RE8_V;
658	} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
659	Opcode = RISCV::VL2RE8_V;
660	} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
661	Opcode = RISCV::VL4RE8_V;
662	} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
663	Opcode = RISCV::VL8RE8_V;
664	} else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
665	Opcode = RISCV::PseudoVRELOAD2_M1;
666	else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
667	Opcode = RISCV::PseudoVRELOAD2_M2;
668	else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
669	Opcode = RISCV::PseudoVRELOAD2_M4;
670	else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
671	Opcode = RISCV::PseudoVRELOAD3_M1;
672	else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
673	Opcode = RISCV::PseudoVRELOAD3_M2;
674	else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
675	Opcode = RISCV::PseudoVRELOAD4_M1;
676	else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
677	Opcode = RISCV::PseudoVRELOAD4_M2;
678	else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
679	Opcode = RISCV::PseudoVRELOAD5_M1;
680	else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
681	Opcode = RISCV::PseudoVRELOAD6_M1;
682	else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
683	Opcode = RISCV::PseudoVRELOAD7_M1;
684	else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
685	Opcode = RISCV::PseudoVRELOAD8_M1;
686	else
687	llvm_unreachable("Can't load this register from stack slot");
688
689	if (IsScalableVector) {
690	MachineMemOperand *MMO = MF->getMachineMemOperand(
691	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
692	Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
693
694	MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
695	BuildMI(MBB, I, DebugLoc (), get(Opcode), DstReg)
696	.addFrameIndex(FI)
697	.addMemOperand(MMO);
698	} else {
699	MachineMemOperand *MMO = MF->getMachineMemOperand(
700	PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
701	Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
702
703	BuildMI(MBB, I, DebugLoc (), get(Opcode), DstReg)
704	.addFrameIndex(FI)
705	.addImm(`0`)
706	.addMemOperand(MMO);
707	}
708	}
709
710	MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
711	MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
712	MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
713	VirtRegMap VRM) const* {
714	const MachineFrameInfo &MFI = MF.getFrameInfo();
715
716	// The below optimizations narrow the load so they are only valid for little
717	// endian.
718	// TODO: Support big endian by adding an offset into the frame object?
719	if (MF.getDataLayout().isBigEndian())
720	return nullptr;
721
722	// Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
723	if (Ops.size() != `1` \|\| Ops [`0`] != `1`)
724	return nullptr;
725
726	unsigned LoadOpc;
727	switch (MI.getOpcode()) {
728	default:
729	if (RISCV::isSEXT_W(MI)) {
730	LoadOpc = RISCV::LW;
731	break;
732	}
733	if (RISCV::isZEXT_W(MI)) {
734	LoadOpc = RISCV::LWU;
735	break;
736	}
737	if (RISCV::isZEXT_B(MI)) {
738	LoadOpc = RISCV::LBU;
739	break;
740	}
741	return nullptr;
742	case RISCV::SEXT_H:
743	LoadOpc = RISCV::LH;
744	break;
745	case RISCV::SEXT_B:
746	LoadOpc = RISCV::LB;
747	break;
748	case RISCV::ZEXT_H_RV32:
749	case RISCV::ZEXT_H_RV64:
750	LoadOpc = RISCV::LHU;
751	break;
752	}
753
754	MachineMemOperand *MMO = MF.getMachineMemOperand(
755	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex),
756	F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIndex),
757	BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIndex));
758
759	Register DstReg = MI.getOperand(i: `0`).getReg();
760	return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
761	DstReg)
762	.addFrameIndex(FrameIndex)
763	.addImm(`0`)
764	.addMemOperand(MMO);
765	}
766
767	void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
768	MachineBasicBlock::iterator MBBI,
769	const DebugLoc &DL, Register DstReg, uint64_t Val,
770	MachineInstr::MIFlag Flag, bool DstRenamable,
771	bool DstIsDead) const {
772	Register SrcReg = RISCV::X0;
773
774	// For RV32, allow a sign or unsigned 32 bit value.
775	if (!STI.is64Bit() && !isInt<`32`>(x: Val)) {
776	// If have a uimm32 it will still fit in a register so we can allow it.
777	if (!isUInt<`32`>(x: Val))
778	report_fatal_error(reason: "Should only materialize 32-bit constants for RV32");
779
780	// Sign extend for generateInstSeq.
781	Val = SignExtend64<`32`>(x: Val);
782	}
783
784	RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
785	assert(!Seq.empty());
786
787	bool SrcRenamable = false;
788	unsigned Num = `0`;
789
790	for (const RISCVMatInt::Inst &Inst : Seq) {
791	bool LastItem = ++Num == Seq.size();
792	unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) \|
793	getRenamableRegState(DstRenamable);
794	unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) \|
795	getRenamableRegState(SrcRenamable);
796	switch (Inst.getOpndKind()) {
797	case RISCVMatInt::Imm:
798	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
799	.addReg(DstReg, RegState::Define \| DstRegState)
800	.addImm(Inst.getImm())
801	.setMIFlag(Flag);
802	break;
803	case RISCVMatInt::RegX0:
804	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
805	.addReg(DstReg, RegState::Define \| DstRegState)
806	.addReg(SrcReg, SrcRegState)
807	.addReg(RISCV::X0)
808	.setMIFlag(Flag);
809	break;
810	case RISCVMatInt::RegReg:
811	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
812	.addReg(DstReg, RegState::Define \| DstRegState)
813	.addReg(SrcReg, SrcRegState)
814	.addReg(SrcReg, SrcRegState)
815	.setMIFlag(Flag);
816	break;
817	case RISCVMatInt::RegImm:
818	BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
819	.addReg(DstReg, RegState::Define \| DstRegState)
820	.addReg(SrcReg, SrcRegState)
821	.addImm(Inst.getImm())
822	.setMIFlag(Flag);
823	break;
824	}
825
826	// Only the first instruction has X0 as its source.
827	SrcReg = DstReg;
828	SrcRenamable = DstRenamable;
829	}
830	}
831
832	static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
833	switch (Opc) {
834	default:
835	return RISCVCC::COND_INVALID;
836	case RISCV::BEQ:
837	return RISCVCC::COND_EQ;
838	case RISCV::BNE:
839	return RISCVCC::COND_NE;
840	case RISCV::BLT:
841	return RISCVCC::COND_LT;
842	case RISCV::BGE:
843	return RISCVCC::COND_GE;
844	case RISCV::BLTU:
845	return RISCVCC::COND_LTU;
846	case RISCV::BGEU:
847	return RISCVCC::COND_GEU;
848	}
849	}
850
851	// The contents of values added to Cond are not examined outside of
852	// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
853	// push BranchOpcode, Reg1, Reg2.
854	static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
855	SmallVectorImpl<MachineOperand> &Cond) {
856	// Block ends with fall-through condbranch.
857	assert(LastInst.getDesc().isConditionalBranch() &&
858	"Unknown conditional branch");
859	Target = LastInst.getOperand(i: `2`).getMBB();
860	unsigned CC = getCondFromBranchOpc(Opc: LastInst.getOpcode());
861	Cond.push_back(Elt: MachineOperand::CreateImm(Val: CC));
862	Cond.push_back(Elt: LastInst.getOperand(i: `0`));
863	Cond.push_back(Elt: LastInst.getOperand(i: `1`));
864	}
865
866	unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
867	switch (CC) {
868	default:
869	llvm_unreachable("Unknown condition code!");
870	case RISCVCC::COND_EQ:
871	return RISCV::BEQ;
872	case RISCVCC::COND_NE:
873	return RISCV::BNE;
874	case RISCVCC::COND_LT:
875	return RISCV::BLT;
876	case RISCVCC::COND_GE:
877	return RISCV::BGE;
878	case RISCVCC::COND_LTU:
879	return RISCV::BLTU;
880	case RISCVCC::COND_GEU:
881	return RISCV::BGEU;
882	}
883	}
884
885	const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
886	return get(RISCVCC::getBrCond(CC));
887	}
888
889	RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
890	switch (CC) {
891	default:
892	llvm_unreachable("Unrecognized conditional branch");
893	case RISCVCC::COND_EQ:
894	return RISCVCC::COND_NE;
895	case RISCVCC::COND_NE:
896	return RISCVCC::COND_EQ;
897	case RISCVCC::COND_LT:
898	return RISCVCC::COND_GE;
899	case RISCVCC::COND_GE:
900	return RISCVCC::COND_LT;
901	case RISCVCC::COND_LTU:
902	return RISCVCC::COND_GEU;
903	case RISCVCC::COND_GEU:
904	return RISCVCC::COND_LTU;
905	}
906	}
907
908	bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
909	MachineBasicBlock *&TBB,
910	MachineBasicBlock *&FBB,
911	SmallVectorImpl<MachineOperand> &Cond,
912	bool AllowModify) const {
913	TBB = FBB = nullptr;
914	Cond.clear();
915
916	// If the block has no terminators, it just falls into the block after it.
917	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
918	if (I == MBB.end() \|\| !isUnpredicatedTerminator(*I))
919	return false;
920
921	// Count the number of terminators and find the first unconditional or
922	// indirect branch.
923	MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
924	int NumTerminators = `0`;
925	for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
926	J ++) {
927	NumTerminators++;
928	if (J ->getDesc().isUnconditionalBranch() \|\|
929	J ->getDesc().isIndirectBranch()) {
930	FirstUncondOrIndirectBr = J.getReverse();
931	}
932	}
933
934	// If AllowModify is true, we can erase any terminators after
935	// FirstUncondOrIndirectBR.
936	if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
937	while (std::next(x: FirstUncondOrIndirectBr) != MBB.end()) {
938	std::next(x: FirstUncondOrIndirectBr)->eraseFromParent();
939	NumTerminators--;
940	}
941	I = FirstUncondOrIndirectBr;
942	}
943
944	// We can't handle blocks that end in an indirect branch.
945	if (I ->getDesc().isIndirectBranch())
946	return true;
947
948	// We can't handle Generic branch opcodes from Global ISel.
949	if (I ->isPreISelOpcode())
950	return true;
951
952	// We can't handle blocks with more than 2 terminators.
953	if (NumTerminators > `2`)
954	return true;
955
956	// Handle a single unconditional branch.
957	if (NumTerminators == `1` && I ->getDesc().isUnconditionalBranch()) {
958	TBB = getBranchDestBlock(MI: *I);
959	return false;
960	}
961
962	// Handle a single conditional branch.
963	if (NumTerminators == `1` && I ->getDesc().isConditionalBranch()) {
964	parseCondBranch(LastInst&: *I, Target&: TBB, Cond);
965	return false;
966	}
967
968	// Handle a conditional branch followed by an unconditional branch.
969	if (NumTerminators == `2` && std::prev(x: I)->getDesc().isConditionalBranch() &&
970	I ->getDesc().isUnconditionalBranch()) {
971	parseCondBranch(LastInst&: *std::prev(x: I), Target&: TBB, Cond);
972	FBB = getBranchDestBlock(MI: *I);
973	return false;
974	}
975
976	// Otherwise, we can't handle this.
977	return true;
978	}
979
980	unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
981	int BytesRemoved) const* {
982	if (BytesRemoved)
983	*BytesRemoved = `0`;
984	MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
985	if (I == MBB.end())
986	return `0`;
987
988	if (!I ->getDesc().isUnconditionalBranch() &&
989	!I ->getDesc().isConditionalBranch())
990	return `0`;
991
992	// Remove the branch.
993	if (BytesRemoved)
994	BytesRemoved += getInstSizeInBytes(MI: I);
995	I ->eraseFromParent();
996
997	I = MBB.end();
998
999	if (I == MBB.begin())
1000	return `1`;
1001	--I;
1002	if (!I ->getDesc().isConditionalBranch())
1003	return `1`;
1004
1005	// Remove the branch.
1006	if (BytesRemoved)
1007	BytesRemoved += getInstSizeInBytes(MI: I);
1008	I ->eraseFromParent();
1009	return `2`;
1010	}
1011
1012	// Inserts a branch into the end of the specific MachineBasicBlock, returning
1013	// the number of instructions inserted.
1014	unsigned RISCVInstrInfo::insertBranch(
1015	MachineBasicBlock &MBB, MachineBasicBlock TBB, MachineBasicBlock FBB,
1016	ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int BytesAdded) const* {
1017	if (BytesAdded)
1018	*BytesAdded = `0`;
1019
1020	// Shouldn't be a fall through.
1021	assert(TBB && "insertBranch must not be told to insert a fallthrough");
1022	assert((Cond.size() == `3` \|\| Cond.size() == `0`) &&
1023	"RISC-V branch conditions have two components!");
1024
1025	// Unconditional branch.
1026	if (Cond.empty()) {
1027	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1028	if (BytesAdded)
1029	*BytesAdded += getInstSizeInBytes(MI);
1030	return `1`;
1031	}
1032
1033	// Either a one or two-way conditional branch.
1034	auto CC = static_cast<RISCVCC::CondCode>(Cond [`0`].getImm());
1035	MachineInstr &CondMI =
1036	*BuildMI(BB: &MBB, MIMD: DL, MCID: getBrCond(CC)).add(MO: Cond [`1`]).add(MO: Cond [`2`]).addMBB(MBB: TBB);
1037	if (BytesAdded)
1038	*BytesAdded += getInstSizeInBytes(MI: CondMI);
1039
1040	// One-way conditional branch.
1041	if (!FBB)
1042	return `1`;
1043
1044	// Two-way conditional branch.
1045	MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1046	if (BytesAdded)
1047	*BytesAdded += getInstSizeInBytes(MI);
1048	return `2`;
1049	}
1050
1051	void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1052	MachineBasicBlock &DestBB,
1053	MachineBasicBlock &RestoreBB,
1054	const DebugLoc &DL, int64_t BrOffset,
1055	RegScavenger RS) const* {
1056	assert(RS && "RegScavenger required for long branching");
1057	assert(MBB.empty() &&
1058	"new block should be inserted for expanding unconditional branch");
1059	assert(MBB.pred_size() == `1`);
1060	assert(RestoreBB.empty() &&
1061	"restore block should be inserted for restoring clobbered registers");
1062
1063	MachineFunction *MF = MBB.getParent();
1064	MachineRegisterInfo &MRI = MF->getRegInfo();
1065	RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1066	const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1067
1068	if (!isInt<`32`>(x: BrOffset))
1069	report_fatal_error(
1070	reason: "Branch offsets outside of the signed 32-bit range not supported");
1071
1072	// FIXME: A virtual register must be used initially, as the register
1073	// scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1074	// uses the same workaround).
1075	Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1076	auto II = MBB.end();
1077	// We may also update the jump target to RestoreBB later.
1078	MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1079	.addReg(ScratchReg, RegState::Define \| RegState::Dead)
1080	.addMBB(&DestBB, RISCVII::MO_CALL);
1081
1082	RS->enterBasicBlockEnd(MBB);
1083	Register TmpGPR =
1084	RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1085	/RestoreAfter=/false, /SpAdj=/`0`,
1086	/AllowSpill=/false);
1087	if (TmpGPR != RISCV::NoRegister)
1088	RS->setRegUsed(Reg: TmpGPR);
1089	else {
1090	// The case when there is no scavenged register needs special handling.
1091
1092	// Pick s11 because it doesn't make a difference.
1093	TmpGPR = RISCV::X27;
1094
1095	int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1096	if (FrameIndex == -`1`)
1097	report_fatal_error(reason: "underestimated function size");
1098
1099	storeRegToStackSlot(MBB, MI, TmpGPR, /IsKill=/true, FrameIndex,
1100	&RISCV::GPRRegClass, TRI, Register());
1101	TRI->eliminateFrameIndex(MI: std::prev(x: MI.getIterator()),
1102	/SpAdj=/SPAdj: `0`, /FIOperandNum=/`1`);
1103
1104	MI.getOperand(i: `1`).setMBB(&RestoreBB);
1105
1106	loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1107	&RISCV::GPRRegClass, TRI, Register());
1108	TRI->eliminateFrameIndex(MI: RestoreBB.back(),
1109	/SpAdj=/SPAdj: `0`, /FIOperandNum=/`1`);
1110	}
1111
1112	MRI.replaceRegWith(FromReg: ScratchReg, ToReg: TmpGPR);
1113	MRI.clearVirtRegs();
1114	}
1115
1116	bool RISCVInstrInfo::reverseBranchCondition(
1117	SmallVectorImpl<MachineOperand> &Cond) const {
1118	assert((Cond.size() == `3`) && "Invalid branch condition!");
1119	auto CC = static_cast<RISCVCC::CondCode>(Cond [`0`].getImm());
1120	Cond [`0`].setImm(getOppositeBranchCondition(CC));
1121	return false;
1122	}
1123
1124	bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1125	MachineBasicBlock *MBB = MI.getParent();
1126	MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1127
1128	MachineBasicBlock TBB, FBB;
1129	SmallVector<MachineOperand, `3`> Cond;
1130	if (analyzeBranch(MBB&: MBB, TBB, FBB, Cond, /AllowModify=/*false))
1131	return false;
1132
1133	RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond [`0`].getImm());
1134	assert(CC != RISCVCC::COND_INVALID);
1135
1136	if (CC == RISCVCC::COND_EQ \|\| CC == RISCVCC::COND_NE)
1137	return false;
1138
1139	// For two constants C0 and C1 from
1140	// ```
1141	// li Y, C0
1142	// li Z, C1
1143	// ```
1144	// 1. if C1 = C0 + 1
1145	// we can turn:
1146	// (a) blt Y, X -> bge X, Z
1147	// (b) bge Y, X -> blt X, Z
1148	//
1149	// 2. if C1 = C0 - 1
1150	// we can turn:
1151	// (a) blt X, Y -> bge Z, X
1152	// (b) bge X, Y -> blt Z, X
1153	//
1154	// To make sure this optimization is really beneficial, we only
1155	// optimize for cases where Y had only one use (i.e. only used by the branch).
1156
1157	// Right now we only care about LI (i.e. ADDI x0, imm)
1158	auto isLoadImm = [](const MachineInstr MI, int64_t &Imm) -> bool* {
1159	if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(`1`).isReg() &&
1160	MI->getOperand(`1`).getReg() == RISCV::X0) {
1161	Imm = MI->getOperand(i: `2`).getImm();
1162	return true;
1163	}
1164	return false;
1165	};
1166	// Either a load from immediate instruction or X0.
1167	auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1168	if (!Op.isReg())
1169	return false;
1170	Register Reg = Op.getReg();
1171	return Reg.isVirtual() && isLoadImm (MRI.getVRegDef(Reg), Imm);
1172	};
1173
1174	MachineOperand &LHS = MI.getOperand(i: `0`);
1175	MachineOperand &RHS = MI.getOperand(i: `1`);
1176	// Try to find the register for constant Z; return
1177	// invalid register otherwise.
1178	auto searchConst = [&](int64_t C1) -> Register {
1179	MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1180	auto DefC1 = std::find_if(first: ++II, last: E, pred: [&](const MachineInstr &I) -> bool {
1181	int64_t Imm;
1182	return isLoadImm (&I, Imm) && Imm == C1 &&
1183	I.getOperand(i: `0`).getReg().isVirtual();
1184	});
1185	if (DefC1 != E)
1186	return DefC1 ->getOperand(i: `0`).getReg();
1187
1188	return Register ();
1189	};
1190
1191	bool Modify = false;
1192	int64_t C0;
1193	if (isFromLoadImm (LHS, C0) && MRI.hasOneUse(RegNo: LHS.getReg())) {
1194	// Might be case 1.
1195	// Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1196	// to worry about unsigned overflow here)
1197	if (C0 < INT64_MAX)
1198	if (Register RegZ = searchConst (C0 + `1`)) {
1199	reverseBranchCondition(Cond);
1200	Cond [`1`] = MachineOperand::CreateReg(Reg: RHS.getReg(), /isDef=/false);
1201	Cond [`2`] = MachineOperand::CreateReg(Reg: RegZ, /isDef=/false);
1202	// We might extend the live range of Z, clear its kill flag to
1203	// account for this.
1204	MRI.clearKillFlags(Reg: RegZ);
1205	Modify = true;
1206	}
1207	} else if (isFromLoadImm (RHS, C0) && MRI.hasOneUse(RegNo: RHS.getReg())) {
1208	// Might be case 2.
1209	// For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1210	// when C0 is zero.
1211	if ((CC == RISCVCC::COND_GE \|\| CC == RISCVCC::COND_LT) \|\| C0)
1212	if (Register RegZ = searchConst (C0 - `1`)) {
1213	reverseBranchCondition(Cond);
1214	Cond [`1`] = MachineOperand::CreateReg(Reg: RegZ, /isDef=/false);
1215	Cond [`2`] = MachineOperand::CreateReg(Reg: LHS.getReg(), /isDef=/false);
1216	// We might extend the live range of Z, clear its kill flag to
1217	// account for this.
1218	MRI.clearKillFlags(Reg: RegZ);
1219	Modify = true;
1220	}
1221	}
1222
1223	if (!Modify)
1224	return false;
1225
1226	// Build the new branch and remove the old one.
1227	BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(),
1228	MCID: getBrCond(CC: static_cast<RISCVCC::CondCode>(Cond [`0`].getImm())))
1229	.add(MO: Cond [`1`])
1230	.add(MO: Cond [`2`])
1231	.addMBB(MBB: TBB);
1232	MI.eraseFromParent();
1233
1234	return true;
1235	}
1236
1237	MachineBasicBlock *
1238	RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1239	assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1240	// The branch target is always the last operand.
1241	int NumOp = MI.getNumExplicitOperands();
1242	return MI.getOperand(i: NumOp - `1`).getMBB();
1243	}
1244
1245	bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1246	int64_t BrOffset) const {
1247	unsigned XLen = STI.getXLen();
1248	// Ideally we could determine the supported branch offset from the
1249	// RISCVII::FormMask, but this can't be used for Pseudo instructions like
1250	// PseudoBR.
1251	switch (BranchOp) {
1252	default:
1253	llvm_unreachable("Unexpected opcode!");
1254	case RISCV::BEQ:
1255	case RISCV::BNE:
1256	case RISCV::BLT:
1257	case RISCV::BGE:
1258	case RISCV::BLTU:
1259	case RISCV::BGEU:
1260	return isIntN(N: `13`, x: BrOffset);
1261	case RISCV::JAL:
1262	case RISCV::PseudoBR:
1263	return isIntN(N: `21`, x: BrOffset);
1264	case RISCV::PseudoJump:
1265	return isIntN(N: `32`, x: SignExtend64(X: BrOffset + `0x800`, B: XLen));
1266	}
1267	}
1268
1269	// If the operation has a predicated pseudo instruction, return the pseudo
1270	// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1271	// TODO: Support more operations.
1272	unsigned getPredicatedOpcode(unsigned Opcode) {
1273	switch (Opcode) {
1274	case RISCV::ADD: return RISCV::PseudoCCADD; break;
1275	case RISCV::SUB: return RISCV::PseudoCCSUB; break;
1276	case RISCV::SLL: return RISCV::PseudoCCSLL; break;
1277	case RISCV::SRL: return RISCV::PseudoCCSRL; break;
1278	case RISCV::SRA: return RISCV::PseudoCCSRA; break;
1279	case RISCV::AND: return RISCV::PseudoCCAND; break;
1280	case RISCV::OR: return RISCV::PseudoCCOR; break;
1281	case RISCV::XOR: return RISCV::PseudoCCXOR; break;
1282
1283	case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
1284	case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
1285	case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
1286	case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
1287	case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
1288	case RISCV::ORI: return RISCV::PseudoCCORI; break;
1289	case RISCV::XORI: return RISCV::PseudoCCXORI; break;
1290
1291	case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
1292	case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
1293	case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
1294	case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
1295	case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
1296
1297	case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1298	case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1299	case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1300	case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1301
1302	case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
1303	case RISCV::ORN: return RISCV::PseudoCCORN; break;
1304	case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1305	}
1306
1307	return RISCV::INSTRUCTION_LIST_END;
1308	}
1309
1310	/// Identify instructions that can be folded into a CCMOV instruction, and
1311	/// return the defining instruction.
1312	static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1313	const MachineRegisterInfo &MRI,
1314	const TargetInstrInfo *TII) {
1315	if (!Reg.isVirtual())
1316	return nullptr;
1317	if (!MRI.hasOneNonDBGUse(RegNo: Reg))
1318	return nullptr;
1319	MachineInstr *MI = MRI.getVRegDef(Reg);
1320	if (!MI)
1321	return nullptr;
1322	// Check if MI can be predicated and folded into the CCMOV.
1323	if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1324	return nullptr;
1325	// Don't predicate li idiom.
1326	if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(`1`).isReg() &&
1327	MI->getOperand(`1`).getReg() == RISCV::X0)
1328	return nullptr;
1329	// Check if MI has any other defs or physreg uses.
1330	for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
1331	// Reject frame index operands, PEI can't handle the predicated pseudos.
1332	if (MO.isFI() \|\| MO.isCPI() \|\| MO.isJTI())
1333	return nullptr;
1334	if (!MO.isReg())
1335	continue;
1336	// MI can't have any tied operands, that would conflict with predication.
1337	if (MO.isTied())
1338	return nullptr;
1339	if (MO.isDef())
1340	return nullptr;
1341	// Allow constant physregs.
1342	if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(PhysReg: MO.getReg()))
1343	return nullptr;
1344	}
1345	bool DontMoveAcrossStores = true;
1346	if (!MI->isSafeToMove(/ AliasAnalysis = / AA: nullptr, SawStore&: DontMoveAcrossStores))
1347	return nullptr;
1348	return MI;
1349	}
1350
1351	bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1352	SmallVectorImpl<MachineOperand> &Cond,
1353	unsigned &TrueOp, unsigned &FalseOp,
1354	bool &Optimizable) const {
1355	assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1356	"Unknown select instruction");
1357	// CCMOV operands:
1358	// 0: Def.
1359	// 1: LHS of compare.
1360	// 2: RHS of compare.
1361	// 3: Condition code.
1362	// 4: False use.
1363	// 5: True use.
1364	TrueOp = `5`;
1365	FalseOp = `4`;
1366	Cond.push_back(Elt: MI.getOperand(i: `1`));
1367	Cond.push_back(Elt: MI.getOperand(i: `2`));
1368	Cond.push_back(Elt: MI.getOperand(i: `3`));
1369	// We can only fold when we support short forward branch opt.
1370	Optimizable = STI.hasShortForwardBranchOpt();
1371	return false;
1372	}
1373
1374	MachineInstr *
1375	RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1376	SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1377	bool PreferFalse) const {
1378	assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1379	"Unknown select instruction");
1380	if (!STI.hasShortForwardBranchOpt())
1381	return nullptr;
1382
1383	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1384	MachineInstr *DefMI =
1385	canFoldAsPredicatedOp(MI.getOperand(i: `5`).getReg(), MRI, this);
1386	bool Invert = !DefMI;
1387	if (!DefMI)
1388	DefMI = canFoldAsPredicatedOp(MI.getOperand(i: `4`).getReg(), MRI, this);
1389	if (!DefMI)
1390	return nullptr;
1391
1392	// Find new register class to use.
1393	MachineOperand FalseReg = MI.getOperand(i: Invert ? `5` : `4`);
1394	Register DestReg = MI.getOperand(i: `0`).getReg();
1395	const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
1396	if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
1397	return nullptr;
1398
1399	unsigned PredOpc = getPredicatedOpcode(Opcode: DefMI->getOpcode());
1400	assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1401
1402	// Create a new predicated version of DefMI.
1403	MachineInstrBuilder NewMI =
1404	BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1405
1406	// Copy the condition portion.
1407	NewMI.add(MO: MI.getOperand(i: `1`));
1408	NewMI.add(MO: MI.getOperand(i: `2`));
1409
1410	// Add condition code, inverting if necessary.
1411	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: `3`).getImm());
1412	if (Invert)
1413	CC = RISCVCC::getOppositeBranchCondition(CC);
1414	NewMI.addImm(Val: CC);
1415
1416	// Copy the false register.
1417	NewMI.add(MO: FalseReg);
1418
1419	// Copy all the DefMI operands.
1420	const MCInstrDesc &DefDesc = DefMI->getDesc();
1421	for (unsigned i = `1`, e = DefDesc.getNumOperands(); i != e; ++i)
1422	NewMI.add(MO: DefMI->getOperand(i));
1423
1424	// Update SeenMIs set: register newly created MI and erase removed DefMI.
1425	SeenMIs.insert(Ptr: NewMI);
1426	SeenMIs.erase(Ptr: DefMI);
1427
1428	// If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1429	// DefMI would be invalid when tranferred inside the loop. Checking for a
1430	// loop is expensive, but at least remove kill flags if they are in different
1431	// BBs.
1432	if (DefMI->getParent() != MI.getParent())
1433	NewMI ->clearKillInfo();
1434
1435	// The caller will erase MI, but not DefMI.
1436	DefMI->eraseFromParent();
1437	return NewMI;
1438	}
1439
1440	unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1441	if (MI.isMetaInstruction())
1442	return `0`;
1443
1444	unsigned Opcode = MI.getOpcode();
1445
1446	if (Opcode == TargetOpcode::INLINEASM \|\|
1447	Opcode == TargetOpcode::INLINEASM_BR) {
1448	const MachineFunction &MF = *MI.getParent()->getParent();
1449	const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
1450	return getInlineAsmLength(MI.getOperand(i: `0`).getSymbolName(),
1451	*TM.getMCAsmInfo());
1452	}
1453
1454	if (!MI.memoperands_empty()) {
1455	MachineMemOperand MMO = (MI.memoperands_begin());
1456	const MachineFunction &MF = *MI.getParent()->getParent();
1457	const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1458	if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1459	if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
1460	if (isCompressibleInst(MI, STI))
1461	return `4`; // c.ntl.all + c.load/c.store
1462	return `6`; // c.ntl.all + load/store
1463	}
1464	return `8`; // ntl.all + load/store
1465	}
1466	}
1467
1468	if (Opcode == TargetOpcode::BUNDLE)
1469	return getInstBundleLength(MI);
1470
1471	if (MI.getParent() && MI.getParent()->getParent()) {
1472	if (isCompressibleInst(MI, STI))
1473	return `2`;
1474	}
1475
1476	switch (Opcode) {
1477	case TargetOpcode::STACKMAP:
1478	// The upper bound for a stackmap intrinsic is the full length of its shadow
1479	return StackMapOpers (&MI).getNumPatchBytes();
1480	case TargetOpcode::PATCHPOINT:
1481	// The size of the patchpoint intrinsic is the number of bytes requested
1482	return PatchPointOpers (&MI).getNumPatchBytes();
1483	case TargetOpcode::STATEPOINT: {
1484	// The size of the statepoint intrinsic is the number of bytes requested
1485	unsigned NumBytes = StatepointOpers (&MI).getNumPatchBytes();
1486	// No patch bytes means at most a PseudoCall is emitted
1487	return std::max(a: NumBytes, b: `8U`);
1488	}
1489	default:
1490	return get(Opcode).getSize();
1491	}
1492	}
1493
1494	unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1495	unsigned Size = `0`;
1496	MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1497	MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1498	while (++I != E && I ->isInsideBundle()) {
1499	assert(!I ->isBundle() && "No nested bundle!");
1500	Size += getInstSizeInBytes(MI: *I);
1501	}
1502	return Size;
1503	}
1504
1505	bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1506	const unsigned Opcode = MI.getOpcode();
1507	switch (Opcode) {
1508	default:
1509	break;
1510	case RISCV::FSGNJ_D:
1511	case RISCV::FSGNJ_S:
1512	case RISCV::FSGNJ_H:
1513	case RISCV::FSGNJ_D_INX:
1514	case RISCV::FSGNJ_D_IN32X:
1515	case RISCV::FSGNJ_S_INX:
1516	case RISCV::FSGNJ_H_INX:
1517	// The canonical floating-point move is fsgnj rd, rs, rs.
1518	return MI.getOperand(i: `1`).isReg() && MI.getOperand(i: `2`).isReg() &&
1519	MI.getOperand(i: `1`).getReg() == MI.getOperand(i: `2`).getReg();
1520	case RISCV::ADDI:
1521	case RISCV::ORI:
1522	case RISCV::XORI:
1523	return (MI.getOperand(`1`).isReg() &&
1524	MI.getOperand(`1`).getReg() == RISCV::X0) \|\|
1525	(MI.getOperand(`2`).isImm() && MI.getOperand(`2`).getImm() == `0`);
1526	}
1527	return MI.isAsCheapAsAMove();
1528	}
1529
1530	std::optional<DestSourcePair>
1531	RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1532	if (MI.isMoveReg())
1533	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `1`)};
1534	switch (MI.getOpcode()) {
1535	default:
1536	break;
1537	case RISCV::ADDI:
1538	// Operand 1 can be a frameindex but callers expect registers
1539	if (MI.getOperand(i: `1`).isReg() && MI.getOperand(i: `2`).isImm() &&
1540	MI.getOperand(i: `2`).getImm() == `0`)
1541	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `1`)};
1542	break;
1543	case RISCV::FSGNJ_D:
1544	case RISCV::FSGNJ_S:
1545	case RISCV::FSGNJ_H:
1546	case RISCV::FSGNJ_D_INX:
1547	case RISCV::FSGNJ_D_IN32X:
1548	case RISCV::FSGNJ_S_INX:
1549	case RISCV::FSGNJ_H_INX:
1550	// The canonical floating-point move is fsgnj rd, rs, rs.
1551	if (MI.getOperand(i: `1`).isReg() && MI.getOperand(i: `2`).isReg() &&
1552	MI.getOperand(i: `1`).getReg() == MI.getOperand(i: `2`).getReg())
1553	return DestSourcePair {MI.getOperand(i: `0`), MI.getOperand(i: `1`)};
1554	break;
1555	}
1556	return std::nullopt;
1557	}
1558
1559	MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1560	if (ForceMachineCombinerStrategy.getNumOccurrences() == `0`) {
1561	// The option is unused. Choose Local strategy only for in-order cores. When
1562	// scheduling model is unspecified, use MinInstrCount strategy as more
1563	// generic one.
1564	const auto &SchedModel = STI.getSchedModel();
1565	return (!SchedModel.hasInstrSchedModel() \|\| SchedModel.isOutOfOrder())
1566	? MachineTraceStrategy::TS_MinInstrCount
1567	: MachineTraceStrategy::TS_Local;
1568	}
1569	// The strategy was forced by the option.
1570	return ForceMachineCombinerStrategy;
1571	}
1572
1573	void RISCVInstrInfo::finalizeInsInstrs(
1574	MachineInstr &Root, unsigned &Pattern,
1575	SmallVectorImpl<MachineInstr > &InsInstrs) const* {
1576	int16_t FrmOpIdx =
1577	RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1578	if (FrmOpIdx < `0`) {
1579	assert(all_of(InsInstrs,
1580	[](MachineInstr *MI) {
1581	return RISCV::getNamedOperandIdx(MI->getOpcode(),
1582	RISCV::OpName::frm) < `0`;
1583	}) &&
1584	"New instructions require FRM whereas the old one does not have it");
1585	return;
1586	}
1587
1588	const MachineOperand &FRM = Root.getOperand(i: FrmOpIdx);
1589	MachineFunction &MF = *Root.getMF();
1590
1591	for (auto *NewMI : InsInstrs) {
1592	// We'd already added the FRM operand.
1593	if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
1594	NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
1595	continue;
1596	MachineInstrBuilder MIB(MF, NewMI);
1597	MIB.add(MO: FRM);
1598	if (FRM.getImm() == RISCVFPRndMode::DYN)
1599	MIB.addUse(RISCV::FRM, RegState::Implicit);
1600	}
1601	}
1602
1603	static bool isFADD(unsigned Opc) {
1604	switch (Opc) {
1605	default:
1606	return false;
1607	case RISCV::FADD_H:
1608	case RISCV::FADD_S:
1609	case RISCV::FADD_D:
1610	return true;
1611	}
1612	}
1613
1614	static bool isFSUB(unsigned Opc) {
1615	switch (Opc) {
1616	default:
1617	return false;
1618	case RISCV::FSUB_H:
1619	case RISCV::FSUB_S:
1620	case RISCV::FSUB_D:
1621	return true;
1622	}
1623	}
1624
1625	static bool isFMUL(unsigned Opc) {
1626	switch (Opc) {
1627	default:
1628	return false;
1629	case RISCV::FMUL_H:
1630	case RISCV::FMUL_S:
1631	case RISCV::FMUL_D:
1632	return true;
1633	}
1634	}
1635
1636	bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1637	bool &Commuted) const {
1638	if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1639	return false;
1640
1641	const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1642	unsigned OperandIdx = Commuted ? `2` : `1`;
1643	const MachineInstr &Sibling =
1644	*MRI.getVRegDef(Reg: Inst.getOperand(i: OperandIdx).getReg());
1645
1646	int16_t InstFrmOpIdx =
1647	RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1648	int16_t SiblingFrmOpIdx =
1649	RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1650
1651	return (InstFrmOpIdx < `0` && SiblingFrmOpIdx < `0`) \|\|
1652	RISCV::hasEqualFRM(MI1: Inst, MI2: Sibling);
1653	}
1654
1655	bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1656	bool Invert) const {
1657	unsigned Opc = Inst.getOpcode();
1658	if (Invert) {
1659	auto InverseOpcode = getInverseOpcode(Opcode: Opc);
1660	if (!InverseOpcode)
1661	return false;
1662	Opc = *InverseOpcode;
1663	}
1664
1665	if (isFADD(Opc) \|\| isFMUL(Opc))
1666	return Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
1667	Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
1668
1669	switch (Opc) {
1670	default:
1671	return false;
1672	case RISCV::ADD:
1673	case RISCV::ADDW:
1674	case RISCV::AND:
1675	case RISCV::OR:
1676	case RISCV::XOR:
1677	// From RISC-V ISA spec, if both the high and low bits of the same product
1678	// are required, then the recommended code sequence is:
1679	//
1680	// MULH[[S]U] rdh, rs1, rs2
1681	// MUL rdl, rs1, rs2
1682	// (source register specifiers must be in same order and rdh cannot be the
1683	// same as rs1 or rs2)
1684	//
1685	// Microarchitectures can then fuse these into a single multiply operation
1686	// instead of performing two separate multiplies.
1687	// MachineCombiner may reassociate MUL operands and lose the fusion
1688	// opportunity.
1689	case RISCV::MUL:
1690	case RISCV::MULW:
1691	case RISCV::MIN:
1692	case RISCV::MINU:
1693	case RISCV::MAX:
1694	case RISCV::MAXU:
1695	case RISCV::FMIN_H:
1696	case RISCV::FMIN_S:
1697	case RISCV::FMIN_D:
1698	case RISCV::FMAX_H:
1699	case RISCV::FMAX_S:
1700	case RISCV::FMAX_D:
1701	return true;
1702	}
1703
1704	return false;
1705	}
1706
1707	std::optional<unsigned>
1708	RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1709	switch (Opcode) {
1710	default:
1711	return std::nullopt;
1712	case RISCV::FADD_H:
1713	return RISCV::FSUB_H;
1714	case RISCV::FADD_S:
1715	return RISCV::FSUB_S;
1716	case RISCV::FADD_D:
1717	return RISCV::FSUB_D;
1718	case RISCV::FSUB_H:
1719	return RISCV::FADD_H;
1720	case RISCV::FSUB_S:
1721	return RISCV::FADD_S;
1722	case RISCV::FSUB_D:
1723	return RISCV::FADD_D;
1724	case RISCV::ADD:
1725	return RISCV::SUB;
1726	case RISCV::SUB:
1727	return RISCV::ADD;
1728	case RISCV::ADDW:
1729	return RISCV::SUBW;
1730	case RISCV::SUBW:
1731	return RISCV::ADDW;
1732	}
1733	}
1734
1735	static bool canCombineFPFusedMultiply(const MachineInstr &Root,
1736	const MachineOperand &MO,
1737	bool DoRegPressureReduce) {
1738	if (!MO.isReg() \|\| !MO.getReg().isVirtual())
1739	return false;
1740	const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1741	MachineInstr *MI = MRI.getVRegDef(Reg: MO.getReg());
1742	if (!MI \|\| !isFMUL(Opc: MI->getOpcode()))
1743	return false;
1744
1745	if (!Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) \|\|
1746	!MI->getFlag(Flag: MachineInstr::MIFlag::FmContract))
1747	return false;
1748
1749	// Try combining even if fmul has more than one use as it eliminates
1750	// dependency between fadd(fsub) and fmul. However, it can extend liveranges
1751	// for fmul operands, so reject the transformation in register pressure
1752	// reduction mode.
1753	if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1754	return false;
1755
1756	// Do not combine instructions from different basic blocks.
1757	if (Root.getParent() != MI->getParent())
1758	return false;
1759	return RISCV::hasEqualFRM(MI1: Root, MI2: *MI);
1760	}
1761
1762	static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
1763	SmallVectorImpl<unsigned> &Patterns,
1764	bool DoRegPressureReduce) {
1765	unsigned Opc = Root.getOpcode();
1766	bool IsFAdd = isFADD(Opc);
1767	if (!IsFAdd && !isFSUB(Opc))
1768	return false;
1769	bool Added = false;
1770	if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: `1`),
1771	DoRegPressureReduce)) {
1772	Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
1773	: RISCVMachineCombinerPattern::FMSUB);
1774	Added = true;
1775	}
1776	if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: `2`),
1777	DoRegPressureReduce)) {
1778	Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
1779	: RISCVMachineCombinerPattern::FNMSUB);
1780	Added = true;
1781	}
1782	return Added;
1783	}
1784
1785	static bool getFPPatterns(MachineInstr &Root,
1786	SmallVectorImpl<unsigned> &Patterns,
1787	bool DoRegPressureReduce) {
1788	return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
1789	}
1790
1791	/// Utility routine that checks if \param MO is defined by an
1792	/// \param CombineOpc instruction in the basic block \param MBB
1793	static const MachineInstr canCombine(const* MachineBasicBlock &MBB,
1794	const MachineOperand &MO,
1795	unsigned CombineOpc) {
1796	const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1797	const MachineInstr MI = nullptr*;
1798
1799	if (MO.isReg() && MO.getReg().isVirtual())
1800	MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
1801	// And it needs to be in the trace (otherwise, it won't have a depth).
1802	if (!MI \|\| MI->getParent() != &MBB \|\| MI->getOpcode() != CombineOpc)
1803	return nullptr;
1804	// Must only used by the user we combine with.
1805	if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: `0`).getReg()))
1806	return nullptr;
1807
1808	return MI;
1809	}
1810
1811	/// Utility routine that checks if \param MO is defined by a SLLI in \param
1812	/// MBB that can be combined by splitting across 2 SHXADD instructions. The
1813	/// first SHXADD shift amount is given by \param OuterShiftAmt.
1814	static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
1815	const MachineOperand &MO,
1816	unsigned OuterShiftAmt) {
1817	const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
1818	if (!ShiftMI)
1819	return false;
1820
1821	unsigned InnerShiftAmt = ShiftMI->getOperand(i: `2`).getImm();
1822	if (InnerShiftAmt < OuterShiftAmt \|\| (InnerShiftAmt - OuterShiftAmt) > `3`)
1823	return false;
1824
1825	return true;
1826	}
1827
1828	// Returns the shift amount from a SHXADD instruction. Returns 0 if the
1829	// instruction is not a SHXADD.
1830	static unsigned getSHXADDShiftAmount(unsigned Opc) {
1831	switch (Opc) {
1832	default:
1833	return `0`;
1834	case RISCV::SH1ADD:
1835	return `1`;
1836	case RISCV::SH2ADD:
1837	return `2`;
1838	case RISCV::SH3ADD:
1839	return `3`;
1840	}
1841	}
1842
1843	// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
1844	// (sh3add (sh2add Y, Z), X).
1845	static bool getSHXADDPatterns(const MachineInstr &Root,
1846	SmallVectorImpl<unsigned> &Patterns) {
1847	unsigned ShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
1848	if (!ShiftAmt)
1849	return false;
1850
1851	const MachineBasicBlock &MBB = *Root.getParent();
1852
1853	const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(`2`), RISCV::ADD);
1854	if (!AddMI)
1855	return false;
1856
1857	bool Found = false;
1858	if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: `1`), OuterShiftAmt: ShiftAmt)) {
1859	Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
1860	Found = true;
1861	}
1862	if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: `2`), OuterShiftAmt: ShiftAmt)) {
1863	Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
1864	Found = true;
1865	}
1866
1867	return Found;
1868	}
1869
1870	CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
1871	switch (Pattern) {
1872	case RISCVMachineCombinerPattern::FMADD_AX:
1873	case RISCVMachineCombinerPattern::FMADD_XA:
1874	case RISCVMachineCombinerPattern::FMSUB:
1875	case RISCVMachineCombinerPattern::FNMSUB:
1876	return CombinerObjective::MustReduceDepth;
1877	default:
1878	return TargetInstrInfo::getCombinerObjective(Pattern);
1879	}
1880	}
1881
1882	bool RISCVInstrInfo::getMachineCombinerPatterns(
1883	MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
1884	bool DoRegPressureReduce) const {
1885
1886	if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
1887	return true;
1888
1889	if (getSHXADDPatterns(Root, Patterns))
1890	return true;
1891
1892	return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
1893	DoRegPressureReduce);
1894	}
1895
1896	static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
1897	switch (RootOpc) {
1898	default:
1899	llvm_unreachable("Unexpected opcode");
1900	case RISCV::FADD_H:
1901	return RISCV::FMADD_H;
1902	case RISCV::FADD_S:
1903	return RISCV::FMADD_S;
1904	case RISCV::FADD_D:
1905	return RISCV::FMADD_D;
1906	case RISCV::FSUB_H:
1907	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
1908	: RISCV::FNMSUB_H;
1909	case RISCV::FSUB_S:
1910	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
1911	: RISCV::FNMSUB_S;
1912	case RISCV::FSUB_D:
1913	return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
1914	: RISCV::FNMSUB_D;
1915	}
1916	}
1917
1918	static unsigned getAddendOperandIdx(unsigned Pattern) {
1919	switch (Pattern) {
1920	default:
1921	llvm_unreachable("Unexpected pattern");
1922	case RISCVMachineCombinerPattern::FMADD_AX:
1923	case RISCVMachineCombinerPattern::FMSUB:
1924	return `2`;
1925	case RISCVMachineCombinerPattern::FMADD_XA:
1926	case RISCVMachineCombinerPattern::FNMSUB:
1927	return `1`;
1928	}
1929	}
1930
1931	static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
1932	unsigned Pattern,
1933	SmallVectorImpl<MachineInstr *> &InsInstrs,
1934	SmallVectorImpl<MachineInstr *> &DelInstrs) {
1935	MachineFunction *MF = Root.getMF();
1936	MachineRegisterInfo &MRI = MF->getRegInfo();
1937	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1938
1939	MachineOperand &Mul1 = Prev.getOperand(i: `1`);
1940	MachineOperand &Mul2 = Prev.getOperand(i: `2`);
1941	MachineOperand &Dst = Root.getOperand(i: `0`);
1942	MachineOperand &Addend = Root.getOperand(i: getAddendOperandIdx(Pattern));
1943
1944	Register DstReg = Dst.getReg();
1945	unsigned FusedOpc = getFPFusedMultiplyOpcode(RootOpc: Root.getOpcode(), Pattern);
1946	uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
1947	DebugLoc MergedLoc =
1948	DILocation::getMergedLocation(LocA: Root.getDebugLoc(), LocB: Prev.getDebugLoc());
1949
1950	bool Mul1IsKill = Mul1.isKill();
1951	bool Mul2IsKill = Mul2.isKill();
1952	bool AddendIsKill = Addend.isKill();
1953
1954	// We need to clear kill flags since we may be extending the live range past
1955	// a kill. If the mul had kill flags, we can preserve those since we know
1956	// where the previous range stopped.
1957	MRI.clearKillFlags(Reg: Mul1.getReg());
1958	MRI.clearKillFlags(Reg: Mul2.getReg());
1959
1960	MachineInstrBuilder MIB =
1961	BuildMI(MF&: *MF, MIMD: MergedLoc, MCID: TII->get(Opcode: FusedOpc), DestReg: DstReg)
1962	.addReg(RegNo: Mul1.getReg(), flags: getKillRegState(B: Mul1IsKill))
1963	.addReg(RegNo: Mul2.getReg(), flags: getKillRegState(B: Mul2IsKill))
1964	.addReg(RegNo: Addend.getReg(), flags: getKillRegState(B: AddendIsKill))
1965	.setMIFlags(IntersectedFlags);
1966
1967	InsInstrs.push_back(Elt: MIB);
1968	if (MRI.hasOneNonDBGUse(RegNo: Prev.getOperand(i: `0`).getReg()))
1969	DelInstrs.push_back(Elt: &Prev);
1970	DelInstrs.push_back(Elt: &Root);
1971	}
1972
1973	// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
1974	// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
1975	// shXadd instructions. The outer shXadd keeps its original opcode.
1976	static void
1977	genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
1978	SmallVectorImpl<MachineInstr *> &InsInstrs,
1979	SmallVectorImpl<MachineInstr *> &DelInstrs,
1980	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
1981	MachineFunction *MF = Root.getMF();
1982	MachineRegisterInfo &MRI = MF->getRegInfo();
1983	const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1984
1985	unsigned OuterShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
1986	assert(OuterShiftAmt != `0` && "Unexpected opcode");
1987
1988	MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: `2`).getReg());
1989	MachineInstr *ShiftMI =
1990	MRI.getUniqueVRegDef(Reg: AddMI->getOperand(i: AddOpIdx).getReg());
1991
1992	unsigned InnerShiftAmt = ShiftMI->getOperand(i: `2`).getImm();
1993	assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
1994
1995	unsigned InnerOpc;
1996	switch (InnerShiftAmt - OuterShiftAmt) {
1997	default:
1998	llvm_unreachable("Unexpected shift amount");
1999	case `0`:
2000	InnerOpc = RISCV::ADD;
2001	break;
2002	case `1`:
2003	InnerOpc = RISCV::SH1ADD;
2004	break;
2005	case `2`:
2006	InnerOpc = RISCV::SH2ADD;
2007	break;
2008	case `3`:
2009	InnerOpc = RISCV::SH3ADD;
2010	break;
2011	}
2012
2013	const MachineOperand &X = AddMI->getOperand(i: `3` - AddOpIdx);
2014	const MachineOperand &Y = ShiftMI->getOperand(i: `1`);
2015	const MachineOperand &Z = Root.getOperand(i: `1`);
2016
2017	Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2018
2019	auto MIB1 = BuildMI(MF&: *MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: InnerOpc), DestReg: NewVR)
2020	.addReg(RegNo: Y.getReg(), flags: getKillRegState(B: Y.isKill()))
2021	.addReg(RegNo: Z.getReg(), flags: getKillRegState(B: Z.isKill()));
2022	auto MIB2 = BuildMI(MF&: *MF, MIMD: MIMetadata (Root), MCID: TII->get(Opcode: Root.getOpcode()),
2023	DestReg: Root.getOperand(i: `0`).getReg())
2024	.addReg(RegNo: NewVR, flags: RegState::Kill)
2025	.addReg(RegNo: X.getReg(), flags: getKillRegState(B: X.isKill()));
2026
2027	InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: `0`));
2028	InsInstrs.push_back(Elt: MIB1);
2029	InsInstrs.push_back(Elt: MIB2);
2030	DelInstrs.push_back(Elt: ShiftMI);
2031	DelInstrs.push_back(Elt: AddMI);
2032	DelInstrs.push_back(Elt: &Root);
2033	}
2034
2035	void RISCVInstrInfo::genAlternativeCodeSequence(
2036	MachineInstr &Root, unsigned Pattern,
2037	SmallVectorImpl<MachineInstr *> &InsInstrs,
2038	SmallVectorImpl<MachineInstr *> &DelInstrs,
2039	DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2040	MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2041	switch (Pattern) {
2042	default:
2043	TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2044	DelInstrs, InstrIdxForVirtReg);
2045	return;
2046	case RISCVMachineCombinerPattern::FMADD_AX:
2047	case RISCVMachineCombinerPattern::FMSUB: {
2048	MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: `1`).getReg());
2049	combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2050	return;
2051	}
2052	case RISCVMachineCombinerPattern::FMADD_XA:
2053	case RISCVMachineCombinerPattern::FNMSUB: {
2054	MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: `2`).getReg());
2055	combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2056	return;
2057	}
2058	case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2059	genShXAddAddShift(Root, AddOpIdx: `1`, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2060	return;
2061	case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2062	genShXAddAddShift(Root, AddOpIdx: `2`, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2063	return;
2064	}
2065	}
2066
2067	bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2068	StringRef &ErrInfo) const {
2069	MCInstrDesc const &Desc = MI.getDesc();
2070
2071	for (const auto &[Index, Operand] : enumerate(First: Desc.operands())) {
2072	unsigned OpType = Operand.OperandType;
2073	if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2074	OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2075	const MachineOperand &MO = MI.getOperand(i: Index);
2076	if (MO.isImm()) {
2077	int64_t Imm = MO.getImm();
2078	bool Ok;
2079	switch (OpType) {
2080	default:
2081	llvm_unreachable("Unexpected operand type");
2082
2083	// clang-format off
2084	#define CASE_OPERAND_UIMM(NUM) \
2085	case RISCVOp::OPERAND_UIMM##NUM: \
2086	Ok = isUInt<NUM>(Imm); \
2087	break;
2088	CASE_OPERAND_UIMM(`1`)
2089	CASE_OPERAND_UIMM(`2`)
2090	CASE_OPERAND_UIMM(`3`)
2091	CASE_OPERAND_UIMM(`4`)
2092	CASE_OPERAND_UIMM(`5`)
2093	CASE_OPERAND_UIMM(`6`)
2094	CASE_OPERAND_UIMM(`7`)
2095	CASE_OPERAND_UIMM(`8`)
2096	CASE_OPERAND_UIMM(`12`)
2097	CASE_OPERAND_UIMM(`20`)
2098	// clang-format on
2099	case RISCVOp::OPERAND_UIMM2_LSB0:
2100	Ok = isShiftedUInt<`1`, `1`>(x: Imm);
2101	break;
2102	case RISCVOp::OPERAND_UIMM7_LSB00:
2103	Ok = isShiftedUInt<`5`, `2`>(x: Imm);
2104	break;
2105	case RISCVOp::OPERAND_UIMM8_LSB00:
2106	Ok = isShiftedUInt<`6`, `2`>(x: Imm);
2107	break;
2108	case RISCVOp::OPERAND_UIMM8_LSB000:
2109	Ok = isShiftedUInt<`5`, `3`>(x: Imm);
2110	break;
2111	case RISCVOp::OPERAND_UIMM8_GE32:
2112	Ok = isUInt<`8`>(x: Imm) && Imm >= `32`;
2113	break;
2114	case RISCVOp::OPERAND_UIMM9_LSB000:
2115	Ok = isShiftedUInt<`6`, `3`>(x: Imm);
2116	break;
2117	case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
2118	Ok = isShiftedInt<`6`, `4`>(x: Imm) && (Imm != `0`);
2119	break;
2120	case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
2121	Ok = isShiftedUInt<`8`, `2`>(x: Imm) && (Imm != `0`);
2122	break;
2123	case RISCVOp::OPERAND_ZERO:
2124	Ok = Imm == `0`;
2125	break;
2126	case RISCVOp::OPERAND_SIMM5:
2127	Ok = isInt<`5`>(x: Imm);
2128	break;
2129	case RISCVOp::OPERAND_SIMM5_PLUS1:
2130	Ok = (isInt<`5`>(x: Imm) && Imm != -`16`) \|\| Imm == `16`;
2131	break;
2132	case RISCVOp::OPERAND_SIMM6:
2133	Ok = isInt<`6`>(x: Imm);
2134	break;
2135	case RISCVOp::OPERAND_SIMM6_NONZERO:
2136	Ok = Imm != `0` && isInt<`6`>(x: Imm);
2137	break;
2138	case RISCVOp::OPERAND_VTYPEI10:
2139	Ok = isUInt<`10`>(x: Imm);
2140	break;
2141	case RISCVOp::OPERAND_VTYPEI11:
2142	Ok = isUInt<`11`>(x: Imm);
2143	break;
2144	case RISCVOp::OPERAND_SIMM12:
2145	Ok = isInt<`12`>(x: Imm);
2146	break;
2147	case RISCVOp::OPERAND_SIMM12_LSB00000:
2148	Ok = isShiftedInt<`7`, `5`>(x: Imm);
2149	break;
2150	case RISCVOp::OPERAND_UIMMLOG2XLEN:
2151	Ok = STI.is64Bit() ? isUInt<`6`>(x: Imm) : isUInt<`5`>(x: Imm);
2152	break;
2153	case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
2154	Ok = STI.is64Bit() ? isUInt<`6`>(x: Imm) : isUInt<`5`>(x: Imm);
2155	Ok = Ok && Imm != `0`;
2156	break;
2157	case RISCVOp::OPERAND_CLUI_IMM:
2158	Ok = (isUInt<`5`>(x: Imm) && Imm != `0`) \|\|
2159	(Imm >= `0xfffe0` && Imm <= `0xfffff`);
2160	break;
2161	case RISCVOp::OPERAND_RVKRNUM:
2162	Ok = Imm >= `0` && Imm <= `10`;
2163	break;
2164	case RISCVOp::OPERAND_RVKRNUM_0_7:
2165	Ok = Imm >= `0` && Imm <= `7`;
2166	break;
2167	case RISCVOp::OPERAND_RVKRNUM_1_10:
2168	Ok = Imm >= `1` && Imm <= `10`;
2169	break;
2170	case RISCVOp::OPERAND_RVKRNUM_2_14:
2171	Ok = Imm >= `2` && Imm <= `14`;
2172	break;
2173	case RISCVOp::OPERAND_SPIMM:
2174	Ok = (Imm & `0xf`) == `0`;
2175	break;
2176	}
2177	if (!Ok) {
2178	ErrInfo = "Invalid immediate";
2179	return false;
2180	}
2181	}
2182	}
2183	}
2184
2185	const uint64_t TSFlags = Desc.TSFlags;
2186	if (RISCVII::hasVLOp(TSFlags)) {
2187	const MachineOperand &Op = MI.getOperand(i: RISCVII::getVLOpNum(Desc));
2188	if (!Op.isImm() && !Op.isReg()) {
2189	ErrInfo = "Invalid operand type for VL operand";
2190	return false;
2191	}
2192	if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2193	const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2194	auto *RC = MRI.getRegClass(Reg: Op.getReg());
2195	if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2196	ErrInfo = "Invalid register class for VL operand";
2197	return false;
2198	}
2199	}
2200	if (!RISCVII::hasSEWOp(TSFlags)) {
2201	ErrInfo = "VL operand w/o SEW operand?";
2202	return false;
2203	}
2204	}
2205	if (RISCVII::hasSEWOp(TSFlags)) {
2206	unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2207	if (!MI.getOperand(i: OpIdx).isImm()) {
2208	ErrInfo = "SEW value expected to be an immediate";
2209	return false;
2210	}
2211	uint64_t Log2SEW = MI.getOperand(i: OpIdx).getImm();
2212	if (Log2SEW > `31`) {
2213	ErrInfo = "Unexpected SEW value";
2214	return false;
2215	}
2216	unsigned SEW = Log2SEW ? `1` << Log2SEW : `8`;
2217	if (!RISCVVType::isValidSEW(SEW)) {
2218	ErrInfo = "Unexpected SEW value";
2219	return false;
2220	}
2221	}
2222	if (RISCVII::hasVecPolicyOp(TSFlags)) {
2223	unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2224	if (!MI.getOperand(i: OpIdx).isImm()) {
2225	ErrInfo = "Policy operand expected to be an immediate";
2226	return false;
2227	}
2228	uint64_t Policy = MI.getOperand(i: OpIdx).getImm();
2229	if (Policy > (RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC)) {
2230	ErrInfo = "Invalid Policy Value";
2231	return false;
2232	}
2233	if (!RISCVII::hasVLOp(TSFlags)) {
2234	ErrInfo = "policy operand w/o VL operand?";
2235	return false;
2236	}
2237
2238	// VecPolicy operands can only exist on instructions with passthru/merge
2239	// arguments. Note that not all arguments with passthru have vec policy
2240	// operands- some instructions have implicit policies.
2241	unsigned UseOpIdx;
2242	if (!MI.isRegTiedToUseOperand(DefOpIdx: `0`, UseOpIdx: &UseOpIdx)) {
2243	ErrInfo = "policy operand w/o tied operand?";
2244	return false;
2245	}
2246	}
2247
2248	return true;
2249	}
2250
2251	bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
2252	const MachineInstr &AddrI,
2253	ExtAddrMode &AM) const {
2254	switch (MemI.getOpcode()) {
2255	default:
2256	return false;
2257	case RISCV::LB:
2258	case RISCV::LBU:
2259	case RISCV::LH:
2260	case RISCV::LHU:
2261	case RISCV::LW:
2262	case RISCV::LWU:
2263	case RISCV::LD:
2264	case RISCV::FLH:
2265	case RISCV::FLW:
2266	case RISCV::FLD:
2267	case RISCV::SB:
2268	case RISCV::SH:
2269	case RISCV::SW:
2270	case RISCV::SD:
2271	case RISCV::FSH:
2272	case RISCV::FSW:
2273	case RISCV::FSD:
2274	break;
2275	}
2276
2277	if (MemI.getOperand(i: `0`).getReg() == Reg)
2278	return false;
2279
2280	if (AddrI.getOpcode() != RISCV::ADDI \|\| !AddrI.getOperand(`1`).isReg() \|\|
2281	!AddrI.getOperand(`2`).isImm())
2282	return false;
2283
2284	int64_t OldOffset = MemI.getOperand(i: `2`).getImm();
2285	int64_t Disp = AddrI.getOperand(i: `2`).getImm();
2286	int64_t NewOffset = OldOffset + Disp;
2287	if (!STI.is64Bit())
2288	NewOffset = SignExtend64<`32`>(x: NewOffset);
2289
2290	if (!isInt<`12`>(x: NewOffset))
2291	return false;
2292
2293	AM.BaseReg = AddrI.getOperand(i: `1`).getReg();
2294	AM.ScaledReg = `0`;
2295	AM.Scale = `0`;
2296	AM.Displacement = NewOffset;
2297	AM.Form = ExtAddrMode::Formula::Basic;
2298	return true;
2299	}
2300
2301	MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
2302	const ExtAddrMode &AM) const {
2303
2304	const DebugLoc &DL = MemI.getDebugLoc();
2305	MachineBasicBlock &MBB = *MemI.getParent();
2306
2307	assert(AM.ScaledReg == `0` && AM.Scale == `0` &&
2308	"Addressing mode not supported for folding");
2309
2310	return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2311	.addReg(MemI.getOperand(i: `0`).getReg(),
2312	MemI.mayLoad() ? RegState::Define : `0`)
2313	.addReg(AM.BaseReg)
2314	.addImm(AM.Displacement)
2315	.setMemRefs(MemI.memoperands())
2316	.setMIFlags(MemI.getFlags());
2317	}
2318
2319	bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2320	const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2321	int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2322	const TargetRegisterInfo TRI) const* {
2323	if (!LdSt.mayLoadOrStore())
2324	return false;
2325
2326	// Conservatively, only handle scalar loads/stores for now.
2327	switch (LdSt.getOpcode()) {
2328	case RISCV::LB:
2329	case RISCV::LBU:
2330	case RISCV::SB:
2331	case RISCV::LH:
2332	case RISCV::LHU:
2333	case RISCV::FLH:
2334	case RISCV::SH:
2335	case RISCV::FSH:
2336	case RISCV::LW:
2337	case RISCV::LWU:
2338	case RISCV::FLW:
2339	case RISCV::SW:
2340	case RISCV::FSW:
2341	case RISCV::LD:
2342	case RISCV::FLD:
2343	case RISCV::SD:
2344	case RISCV::FSD:
2345	break;
2346	default:
2347	return false;
2348	}
2349	const MachineOperand *BaseOp;
2350	OffsetIsScalable = false;
2351	if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2352	return false;
2353	BaseOps.push_back(Elt: BaseOp);
2354	return true;
2355	}
2356
2357	// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2358	// helper?
2359	static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2360	ArrayRef<const MachineOperand *> BaseOps1,
2361	const MachineInstr &MI2,
2362	ArrayRef<const MachineOperand *> BaseOps2) {
2363	// Only examine the first "base" operand of each instruction, on the
2364	// assumption that it represents the real base address of the memory access.
2365	// Other operands are typically offsets or indices from this base address.
2366	if (BaseOps1.front()->isIdenticalTo(Other: *BaseOps2.front()))
2367	return true;
2368
2369	if (!MI1.hasOneMemOperand() \|\| !MI2.hasOneMemOperand())
2370	return false;
2371
2372	auto MO1 = *MI1.memoperands_begin();
2373	auto MO2 = *MI2.memoperands_begin();
2374	if (MO1->getAddrSpace() != MO2->getAddrSpace())
2375	return false;
2376
2377	auto Base1 = MO1->getValue();
2378	auto Base2 = MO2->getValue();
2379	if (!Base1 \|\| !Base2)
2380	return false;
2381	Base1 = getUnderlyingObject(V: Base1);
2382	Base2 = getUnderlyingObject(V: Base2);
2383
2384	if (isa<UndefValue>(Val: Base1) \|\| isa<UndefValue>(Val: Base2))
2385	return false;
2386
2387	return Base1 == Base2;
2388	}
2389
2390	bool RISCVInstrInfo::shouldClusterMemOps(
2391	ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2392	bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2393	int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2394	unsigned NumBytes) const {
2395	// If the mem ops (to be clustered) do not have the same base ptr, then they
2396	// should not be clustered
2397	if (!BaseOps1.empty() && !BaseOps2.empty()) {
2398	const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2399	const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2400	if (!memOpsHaveSameBasePtr(MI1: FirstLdSt, BaseOps1, MI2: SecondLdSt, BaseOps2))
2401	return false;
2402	} else if (!BaseOps1.empty() \|\| !BaseOps2.empty()) {
2403	// If only one base op is empty, they do not have the same base ptr
2404	return false;
2405	}
2406
2407	unsigned CacheLineSize =
2408	BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2409	// Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2410	CacheLineSize = CacheLineSize ? CacheLineSize : `64`;
2411	// Cluster if the memory operations are on the same or a neighbouring cache
2412	// line, but limit the maximum ClusterSize to avoid creating too much
2413	// additional register pressure.
2414	return ClusterSize <= `4` && std::abs(i: Offset1 - Offset2) < CacheLineSize;
2415	}
2416
2417	// Set BaseReg (the base register operand), Offset (the byte offset being
2418	// accessed) and the access Width of the passed instruction that reads/writes
2419	// memory. Returns false if the instruction does not read/write memory or the
2420	// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2421	// recognise base operands and offsets in all cases.
2422	// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2423	// function) and set it as appropriate.
2424	bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2425	const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2426	LocationSize &Width, const TargetRegisterInfo TRI) const* {
2427	if (!LdSt.mayLoadOrStore())
2428	return false;
2429
2430	// Here we assume the standard RISC-V ISA, which uses a base+offset
2431	// addressing mode. You'll need to relax these conditions to support custom
2432	// load/store instructions.
2433	if (LdSt.getNumExplicitOperands() != `3`)
2434	return false;
2435	if ((!LdSt.getOperand(i: `1`).isReg() && !LdSt.getOperand(i: `1`).isFI()) \|\|
2436	!LdSt.getOperand(i: `2`).isImm())
2437	return false;
2438
2439	if (!LdSt.hasOneMemOperand())
2440	return false;
2441
2442	Width = (*LdSt.memoperands_begin())->getSize();
2443	BaseReg = &LdSt.getOperand(i: `1`);
2444	Offset = LdSt.getOperand(i: `2`).getImm();
2445	return true;
2446	}
2447
2448	bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2449	const MachineInstr &MIa, const MachineInstr &MIb) const {
2450	assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2451	assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2452
2453	if (MIa.hasUnmodeledSideEffects() \|\| MIb.hasUnmodeledSideEffects() \|\|
2454	MIa.hasOrderedMemoryRef() \|\| MIb.hasOrderedMemoryRef())
2455	return false;
2456
2457	// Retrieve the base register, offset from the base register and width. Width
2458	// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2459	// base registers are identical, and the offset of a lower memory access +
2460	// the width doesn't overlap the offset of a higher memory access,
2461	// then the memory accesses are different.
2462	const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2463	const MachineOperand BaseOpA = nullptr, BaseOpB = nullptr;
2464	int64_t OffsetA = `0`, OffsetB = `0`;
2465	LocationSize WidthA = `0`, WidthB = `0`;
2466	if (getMemOperandWithOffsetWidth(LdSt: MIa, BaseReg&: BaseOpA, Offset&: OffsetA, Width&: WidthA, TRI) &&
2467	getMemOperandWithOffsetWidth(LdSt: MIb, BaseReg&: BaseOpB, Offset&: OffsetB, Width&: WidthB, TRI)) {
2468	if (BaseOpA->isIdenticalTo(Other: *BaseOpB)) {
2469	int LowOffset = std::min(a: OffsetA, b: OffsetB);
2470	int HighOffset = std::max(a: OffsetA, b: OffsetB);
2471	LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2472	if (LowWidth.hasValue() &&
2473	LowOffset + (int)LowWidth.getValue() <= HighOffset)
2474	return true;
2475	}
2476	}
2477	return false;
2478	}
2479
2480	std::pair<unsigned, unsigned>
2481	RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2482	const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2483	return std::make_pair(x: TF & Mask, y: TF & ~Mask);
2484	}
2485
2486	ArrayRef<std::pair<unsigned, const char *>>
2487	RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2488	using namespace RISCVII;
2489	static const std::pair<unsigned, const char *> TargetFlags[] = {
2490	{MO_CALL, "riscv-call"},
2491	{MO_LO, "riscv-lo"},
2492	{MO_HI, "riscv-hi"},
2493	{MO_PCREL_LO, "riscv-pcrel-lo"},
2494	{MO_PCREL_HI, "riscv-pcrel-hi"},
2495	{MO_GOT_HI, "riscv-got-hi"},
2496	{MO_TPREL_LO, "riscv-tprel-lo"},
2497	{MO_TPREL_HI, "riscv-tprel-hi"},
2498	{MO_TPREL_ADD, "riscv-tprel-add"},
2499	{MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2500	{MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2501	{MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2502	{MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2503	{MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2504	{MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2505	return ArrayRef(TargetFlags);
2506	}
2507	bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2508	MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2509	const Function &F = MF.getFunction();
2510
2511	// Can F be deduplicated by the linker? If it can, don't outline from it.
2512	if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2513	return false;
2514
2515	// Don't outline from functions with section markings; the program could
2516	// expect that all the code is in the named section.
2517	if (F.hasSection())
2518	return false;
2519
2520	// It's safe to outline from MF.
2521	return true;
2522	}
2523
2524	bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
2525	unsigned &Flags) const {
2526	// More accurate safety checking is done in getOutliningCandidateInfo.
2527	return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
2528	}
2529
2530	// Enum values indicating how an outlined call should be constructed.
2531	enum MachineOutlinerConstructionID {
2532	MachineOutlinerDefault
2533	};
2534
2535	bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2536	MachineFunction &MF) const {
2537	return MF.getFunction().hasMinSize();
2538	}
2539
2540	std::optional<outliner::OutlinedFunction>
2541	RISCVInstrInfo::getOutliningCandidateInfo(
2542	std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2543
2544	// First we need to filter out candidates where the X5 register (IE t0) can't
2545	// be used to setup the function call.
2546	auto CannotInsertCall = [](outliner::Candidate &C) {
2547	const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2548	return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2549	};
2550
2551	llvm::erase_if(C&: RepeatedSequenceLocs, P: CannotInsertCall);
2552
2553	// If the sequence doesn't have enough candidates left, then we're done.
2554	if (RepeatedSequenceLocs.size() < `2`)
2555	return std::nullopt;
2556
2557	unsigned SequenceSize = `0`;
2558
2559	for (auto &MI : RepeatedSequenceLocs [`0`])
2560	SequenceSize += getInstSizeInBytes(MI);
2561
2562	// call t0, function = 8 bytes.
2563	unsigned CallOverhead = `8`;
2564	for (auto &C : RepeatedSequenceLocs)
2565	C.setCallInfo(CID: MachineOutlinerDefault, CO: CallOverhead);
2566
2567	// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2568	unsigned FrameOverhead = `4`;
2569	if (RepeatedSequenceLocs [`0`]
2570	.getMF()
2571	->getSubtarget<RISCVSubtarget>()
2572	.hasStdExtCOrZca())
2573	FrameOverhead = `2`;
2574
2575	return outliner::OutlinedFunction (RepeatedSequenceLocs, SequenceSize,
2576	FrameOverhead, MachineOutlinerDefault);
2577	}
2578
2579	outliner::InstrType
2580	RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
2581	unsigned Flags) const {
2582	MachineInstr &MI = *MBBI;
2583	MachineBasicBlock *MBB = MI.getParent();
2584	const TargetRegisterInfo *TRI =
2585	MBB->getParent()->getSubtarget().getRegisterInfo();
2586	const auto &F = MI.getMF()->getFunction();
2587
2588	// We can manually strip out CFI instructions later.
2589	if (MI.isCFIInstruction())
2590	// If current function has exception handling code, we can't outline &
2591	// strip these CFI instructions since it may break .eh_frame section
2592	// needed in unwinding.
2593	return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2594	: outliner::InstrType::Invisible;
2595
2596	// We need support for tail calls to outlined functions before return
2597	// statements can be allowed.
2598	if (MI.isReturn())
2599	return outliner::InstrType::Illegal;
2600
2601	// Don't allow modifying the X5 register which we use for return addresses for
2602	// these outlined functions.
2603	if (MI.modifiesRegister(RISCV::X5, TRI) \|\|
2604	MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2605	return outliner::InstrType::Illegal;
2606
2607	// Make sure the operands don't reference something unsafe.
2608	for (const auto &MO : MI.operands()) {
2609
2610	// pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2611	// if any possible.
2612	if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2613	(MI.getMF()->getTarget().getFunctionSections() \|\| F.hasComdat() \|\|
2614	F.hasSection()))
2615	return outliner::InstrType::Illegal;
2616	}
2617
2618	return outliner::InstrType::Legal;
2619	}
2620
2621	void RISCVInstrInfo::buildOutlinedFrame(
2622	MachineBasicBlock &MBB, MachineFunction &MF,
2623	const outliner::OutlinedFunction &OF) const {
2624
2625	// Strip out any CFI instructions
2626	bool Changed = true;
2627	while (Changed) {
2628	Changed = false;
2629	auto I = MBB.begin();
2630	auto E = MBB.end();
2631	for (; I != E; ++I) {
2632	if (I ->isCFIInstruction()) {
2633	I ->removeFromParent();
2634	Changed = true;
2635	break;
2636	}
2637	}
2638	}
2639
2640	MBB.addLiveIn(RISCV::X5);
2641
2642	// Add in a return instruction to the end of the outlined frame.
2643	MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2644	.addReg(RISCV::X0, RegState::Define)
2645	.addReg(RISCV::X5)
2646	.addImm(`0`));
2647	}
2648
2649	MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
2650	Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
2651	MachineFunction &MF, outliner::Candidate &C) const {
2652
2653	// Add in a call instruction to the outlined function at the given location.
2654	It = MBB.insert(It,
2655	BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2656	.addGlobalAddress(M.getNamedValue(MF.getName()), `0`,
2657	RISCVII::MO_CALL));
2658	return It;
2659	}
2660
2661	std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
2662	Register Reg) const {
2663	// TODO: Handle cases where Reg is a super- or sub-register of the
2664	// destination register.
2665	const MachineOperand &Op0 = MI.getOperand(i: `0`);
2666	if (!Op0.isReg() \|\| Reg != Op0.getReg())
2667	return std::nullopt;
2668
2669	// Don't consider ADDIW as a candidate because the caller may not be aware
2670	// of its sign extension behaviour.
2671	if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(`1`).isReg() &&
2672	MI.getOperand(`2`).isImm())
2673	return RegImmPair {MI.getOperand(i: `1`).getReg(), MI.getOperand(i: `2`).getImm()};
2674
2675	return std::nullopt;
2676	}
2677
2678	// MIR printer helper function to annotate Operands with a comment.
2679	std::string RISCVInstrInfo::createMIROperandComment(
2680	const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2681	const TargetRegisterInfo TRI) const* {
2682	// Print a generic comment for this operand if there is one.
2683	std::string GenericComment =
2684	TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
2685	if (!GenericComment.empty())
2686	return GenericComment;
2687
2688	// If not, we must have an immediate operand.
2689	if (!Op.isImm())
2690	return std::string ();
2691
2692	std::string Comment;
2693	raw_string_ostream OS(Comment);
2694
2695	uint64_t TSFlags = MI.getDesc().TSFlags;
2696
2697	// Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2698	// operand of vector codegen pseudos.
2699	if ((MI.getOpcode() == RISCV::VSETVLI \|\| MI.getOpcode() == RISCV::VSETIVLI \|\|
2700	MI.getOpcode() == RISCV::PseudoVSETVLI \|\|
2701	MI.getOpcode() == RISCV::PseudoVSETIVLI \|\|
2702	MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2703	OpIdx == `2`) {
2704	unsigned Imm = MI.getOperand(i: OpIdx).getImm();
2705	RISCVVType::printVType(VType: Imm, OS);
2706	} else if (RISCVII::hasSEWOp(TSFlags) &&
2707	OpIdx == RISCVII::getSEWOpNum(Desc: MI.getDesc())) {
2708	unsigned Log2SEW = MI.getOperand(i: OpIdx).getImm();
2709	unsigned SEW = Log2SEW ? `1` << Log2SEW : `8`;
2710	assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2711	OS << "e" << SEW;
2712	} else if (RISCVII::hasVecPolicyOp(TSFlags) &&
2713	OpIdx == RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())) {
2714	unsigned Policy = MI.getOperand(i: OpIdx).getImm();
2715	assert(Policy <= (RISCVII::TAIL_AGNOSTIC \| RISCVII::MASK_AGNOSTIC) &&
2716	"Invalid Policy Value");
2717	OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
2718	<< (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
2719	}
2720
2721	OS.flush();
2722	return Comment;
2723	}
2724
2725	// clang-format off
2726	#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
2727	RISCV::Pseudo##OP##_##LMUL
2728
2729	#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
2730	RISCV::Pseudo##OP##_##LMUL##_MASK
2731
2732	#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
2733	CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
2734	case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
2735
2736	#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
2737	CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
2738	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
2739	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
2740	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
2741	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
2742	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
2743
2744	#define CASE_RVV_OPCODE_UNMASK(OP) \
2745	CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
2746	case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
2747
2748	#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
2749	CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
2750	case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
2751	case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
2752	case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
2753	case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
2754	case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
2755
2756	#define CASE_RVV_OPCODE_MASK(OP) \
2757	CASE_RVV_OPCODE_MASK_WIDEN(OP): \
2758	case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
2759
2760	#define CASE_RVV_OPCODE_WIDEN(OP) \
2761	CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
2762	case CASE_RVV_OPCODE_MASK_WIDEN(OP)
2763
2764	#define CASE_RVV_OPCODE(OP) \
2765	CASE_RVV_OPCODE_UNMASK(OP): \
2766	case CASE_RVV_OPCODE_MASK(OP)
2767	// clang-format on
2768
2769	// clang-format off
2770	#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
2771	RISCV::PseudoV##OP##_##TYPE##_##LMUL
2772
2773	#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
2774	CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
2775	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
2776	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
2777	case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
2778
2779	#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
2780	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
2781	case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
2782
2783	#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
2784	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
2785	case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
2786
2787	#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
2788	CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
2789	case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
2790
2791	// VFMA instructions are SEW specific.
2792	#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
2793	RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
2794
2795	#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
2796	CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
2797	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
2798	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
2799	case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
2800
2801	#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
2802	CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
2803	case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
2804
2805	#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
2806	CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
2807	case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
2808
2809	#define CASE_VFMA_OPCODE_VV(OP) \
2810	CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
2811	case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
2812	case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
2813
2814	#define CASE_VFMA_SPLATS(OP) \
2815	CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
2816	case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
2817	case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
2818	// clang-format on
2819
2820	bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2821	unsigned &SrcOpIdx1,
2822	unsigned &SrcOpIdx2) const {
2823	const MCInstrDesc &Desc = MI.getDesc();
2824	if (!Desc.isCommutable())
2825	return false;
2826
2827	switch (MI.getOpcode()) {
2828	case RISCV::TH_MVEQZ:
2829	case RISCV::TH_MVNEZ:
2830	// We can't commute operands if operand 2 (i.e., rs1 in
2831	// mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
2832	// not valid as the in/out-operand 1).
2833	if (MI.getOperand(`2`).getReg() == RISCV::X0)
2834	return false;
2835	// Operands 1 and 2 are commutable, if we switch the opcode.
2836	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, `1`, `2`);
2837	case RISCV::TH_MULA:
2838	case RISCV::TH_MULAW:
2839	case RISCV::TH_MULAH:
2840	case RISCV::TH_MULS:
2841	case RISCV::TH_MULSW:
2842	case RISCV::TH_MULSH:
2843	// Operands 2 and 3 are commutable.
2844	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, `2`, `3`);
2845	case RISCV::PseudoCCMOVGPRNoX0:
2846	case RISCV::PseudoCCMOVGPR:
2847	// Operands 4 and 5 are commutable.
2848	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, `4`, `5`);
2849	case CASE_RVV_OPCODE(VADD_VV):
2850	case CASE_RVV_OPCODE(VAND_VV):
2851	case CASE_RVV_OPCODE(VOR_VV):
2852	case CASE_RVV_OPCODE(VXOR_VV):
2853	case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
2854	case CASE_RVV_OPCODE_MASK(VMSNE_VV):
2855	case CASE_RVV_OPCODE(VMIN_VV):
2856	case CASE_RVV_OPCODE(VMINU_VV):
2857	case CASE_RVV_OPCODE(VMAX_VV):
2858	case CASE_RVV_OPCODE(VMAXU_VV):
2859	case CASE_RVV_OPCODE(VMUL_VV):
2860	case CASE_RVV_OPCODE(VMULH_VV):
2861	case CASE_RVV_OPCODE(VMULHU_VV):
2862	case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
2863	case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
2864	case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
2865	case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
2866	case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
2867	case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
2868	case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
2869	// Operands 2 and 3 are commutable.
2870	return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, `2`, `3`);
2871	case CASE_VFMA_SPLATS(FMADD):
2872	case CASE_VFMA_SPLATS(FMSUB):
2873	case CASE_VFMA_SPLATS(FMACC):
2874	case CASE_VFMA_SPLATS(FMSAC):
2875	case CASE_VFMA_SPLATS(FNMADD):
2876	case CASE_VFMA_SPLATS(FNMSUB):
2877	case CASE_VFMA_SPLATS(FNMACC):
2878	case CASE_VFMA_SPLATS(FNMSAC):
2879	case CASE_VFMA_OPCODE_VV(FMACC):
2880	case CASE_VFMA_OPCODE_VV(FMSAC):
2881	case CASE_VFMA_OPCODE_VV(FNMACC):
2882	case CASE_VFMA_OPCODE_VV(FNMSAC):
2883	case CASE_VMA_OPCODE_LMULS(MADD, VX):
2884	case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
2885	case CASE_VMA_OPCODE_LMULS(MACC, VX):
2886	case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
2887	case CASE_VMA_OPCODE_LMULS(MACC, VV):
2888	case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
2889	// If the tail policy is undisturbed we can't commute.
2890	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2891	if ((MI.getOperand(i: MI.getNumExplicitOperands() - `1`).getImm() & `1`) == `0`)
2892	return false;
2893
2894	// For these instructions we can only swap operand 1 and operand 3 by
2895	// changing the opcode.
2896	unsigned CommutableOpIdx1 = `1`;
2897	unsigned CommutableOpIdx2 = `3`;
2898	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2899	CommutableOpIdx2))
2900	return false;
2901	return true;
2902	}
2903	case CASE_VFMA_OPCODE_VV(FMADD):
2904	case CASE_VFMA_OPCODE_VV(FMSUB):
2905	case CASE_VFMA_OPCODE_VV(FNMADD):
2906	case CASE_VFMA_OPCODE_VV(FNMSUB):
2907	case CASE_VMA_OPCODE_LMULS(MADD, VV):
2908	case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
2909	// If the tail policy is undisturbed we can't commute.
2910	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2911	if ((MI.getOperand(i: MI.getNumExplicitOperands() - `1`).getImm() & `1`) == `0`)
2912	return false;
2913
2914	// For these instructions we have more freedom. We can commute with the
2915	// other multiplicand or with the addend/subtrahend/minuend.
2916
2917	// Any fixed operand must be from source 1, 2 or 3.
2918	if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > `3`)
2919	return false;
2920	if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > `3`)
2921	return false;
2922
2923	// It both ops are fixed one must be the tied source.
2924	if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2925	SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != `1` && SrcOpIdx2 != `1`)
2926	return false;
2927
2928	// Look for two different register operands assumed to be commutable
2929	// regardless of the FMA opcode. The FMA opcode is adjusted later if
2930	// needed.
2931	if (SrcOpIdx1 == CommuteAnyOperandIndex \|\|
2932	SrcOpIdx2 == CommuteAnyOperandIndex) {
2933	// At least one of operands to be commuted is not specified and
2934	// this method is free to choose appropriate commutable operands.
2935	unsigned CommutableOpIdx1 = SrcOpIdx1;
2936	if (SrcOpIdx1 == SrcOpIdx2) {
2937	// Both of operands are not fixed. Set one of commutable
2938	// operands to the tied source.
2939	CommutableOpIdx1 = `1`;
2940	} else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
2941	// Only one of the operands is not fixed.
2942	CommutableOpIdx1 = SrcOpIdx2;
2943	}
2944
2945	// CommutableOpIdx1 is well defined now. Let's choose another commutable
2946	// operand and assign its index to CommutableOpIdx2.
2947	unsigned CommutableOpIdx2;
2948	if (CommutableOpIdx1 != `1`) {
2949	// If we haven't already used the tied source, we must use it now.
2950	CommutableOpIdx2 = `1`;
2951	} else {
2952	Register Op1Reg = MI.getOperand(i: CommutableOpIdx1).getReg();
2953
2954	// The commuted operands should have different registers.
2955	// Otherwise, the commute transformation does not change anything and
2956	// is useless. We use this as a hint to make our decision.
2957	if (Op1Reg != MI.getOperand(i: `2`).getReg())
2958	CommutableOpIdx2 = `2`;
2959	else
2960	CommutableOpIdx2 = `3`;
2961	}
2962
2963	// Assign the found pair of commutable indices to SrcOpIdx1 and
2964	// SrcOpIdx2 to return those values.
2965	if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2966	CommutableOpIdx2))
2967	return false;
2968	}
2969
2970	return true;
2971	}
2972	}
2973
2974	return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2975	}
2976
2977	// clang-format off
2978	#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
2979	case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
2980	Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
2981	break;
2982
2983	#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
2984	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
2985	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
2986	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
2987	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
2988
2989	#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
2990	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
2991	CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
2992
2993	#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
2994	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
2995	CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
2996
2997	#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
2998	CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
2999	CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
3000
3001	#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3002	CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
3003	CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
3004	CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
3005
3006	// VFMA depends on SEW.
3007	#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
3008	case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
3009	Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
3010	break;
3011
3012	#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
3013	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
3014	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
3015	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
3016	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
3017
3018	#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
3019	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
3020	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
3021
3022	#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
3023	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
3024	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
3025	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
3026
3027	#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
3028	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
3029	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
3030
3031	#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
3032	CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
3033	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
3034
3035	#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3036	CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
3037	CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
3038	CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
3039
3040	MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
3041	bool NewMI,
3042	unsigned OpIdx1,
3043	unsigned OpIdx2) const {
3044	auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
3045	if (NewMI)
3046	return *MI.getParent()->getParent()->CloneMachineInstr(Orig: &MI);
3047	return MI;
3048	};
3049
3050	switch (MI.getOpcode()) {
3051	case RISCV::TH_MVEQZ:
3052	case RISCV::TH_MVNEZ: {
3053	auto &WorkingMI = cloneIfNew (MI);
3054	WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
3055	: RISCV::TH_MVEQZ));
3056	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
3057	OpIdx2);
3058	}
3059	case RISCV::PseudoCCMOVGPRNoX0:
3060	case RISCV::PseudoCCMOVGPR: {
3061	// CCMOV can be commuted by inverting the condition.
3062	auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: `3`).getImm());
3063	CC = RISCVCC::getOppositeBranchCondition(CC);
3064	auto &WorkingMI = cloneIfNew (MI);
3065	WorkingMI.getOperand(i: `3`).setImm(CC);
3066	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI/ false,
3067	OpIdx1, OpIdx2);
3068	}
3069	case CASE_VFMA_SPLATS(FMACC):
3070	case CASE_VFMA_SPLATS(FMADD):
3071	case CASE_VFMA_SPLATS(FMSAC):
3072	case CASE_VFMA_SPLATS(FMSUB):
3073	case CASE_VFMA_SPLATS(FNMACC):
3074	case CASE_VFMA_SPLATS(FNMADD):
3075	case CASE_VFMA_SPLATS(FNMSAC):
3076	case CASE_VFMA_SPLATS(FNMSUB):
3077	case CASE_VFMA_OPCODE_VV(FMACC):
3078	case CASE_VFMA_OPCODE_VV(FMSAC):
3079	case CASE_VFMA_OPCODE_VV(FNMACC):
3080	case CASE_VFMA_OPCODE_VV(FNMSAC):
3081	case CASE_VMA_OPCODE_LMULS(MADD, VX):
3082	case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3083	case CASE_VMA_OPCODE_LMULS(MACC, VX):
3084	case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3085	case CASE_VMA_OPCODE_LMULS(MACC, VV):
3086	case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3087	// It only make sense to toggle these between clobbering the
3088	// addend/subtrahend/minuend one of the multiplicands.
3089	assert((OpIdx1 == `1` \|\| OpIdx2 == `1`) && "Unexpected opcode index");
3090	assert((OpIdx1 == `3` \|\| OpIdx2 == `3`) && "Unexpected opcode index");
3091	unsigned Opc;
3092	switch (MI.getOpcode()) {
3093	default:
3094	llvm_unreachable("Unexpected opcode");
3095	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
3096	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
3097	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
3098	CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
3099	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
3100	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
3101	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
3102	CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
3103	CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
3104	CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
3105	CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
3106	CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
3107	CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
3108	CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
3109	CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
3110	CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
3111	CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
3112	CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
3113	}
3114
3115	auto &WorkingMI = cloneIfNew (MI);
3116	WorkingMI.setDesc(get(Opc));
3117	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
3118	OpIdx1, OpIdx2);
3119	}
3120	case CASE_VFMA_OPCODE_VV(FMADD):
3121	case CASE_VFMA_OPCODE_VV(FMSUB):
3122	case CASE_VFMA_OPCODE_VV(FNMADD):
3123	case CASE_VFMA_OPCODE_VV(FNMSUB):
3124	case CASE_VMA_OPCODE_LMULS(MADD, VV):
3125	case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3126	assert((OpIdx1 == `1` \|\| OpIdx2 == `1`) && "Unexpected opcode index");
3127	// If one of the operands, is the addend we need to change opcode.
3128	// Otherwise we're just swapping 2 of the multiplicands.
3129	if (OpIdx1 == `3` \|\| OpIdx2 == `3`) {
3130	unsigned Opc;
3131	switch (MI.getOpcode()) {
3132	default:
3133	llvm_unreachable("Unexpected opcode");
3134	CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
3135	CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
3136	CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
3137	CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
3138	CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
3139	CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
3140	}
3141
3142	auto &WorkingMI = cloneIfNew (MI);
3143	WorkingMI.setDesc(get(Opc));
3144	return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /NewMI=/false,
3145	OpIdx1, OpIdx2);
3146	}
3147	// Let the default code handle it.
3148	break;
3149	}
3150	}
3151
3152	return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
3153	}
3154
3155	#undef CASE_VMA_OPCODE_COMMON
3156	#undef CASE_VMA_OPCODE_LMULS_M1
3157	#undef CASE_VMA_OPCODE_LMULS_MF2
3158	#undef CASE_VMA_OPCODE_LMULS_MF4
3159	#undef CASE_VMA_OPCODE_LMULS
3160	#undef CASE_VFMA_OPCODE_COMMON
3161	#undef CASE_VFMA_OPCODE_LMULS_M1
3162	#undef CASE_VFMA_OPCODE_LMULS_MF2
3163	#undef CASE_VFMA_OPCODE_LMULS_MF4
3164	#undef CASE_VFMA_OPCODE_VV
3165	#undef CASE_VFMA_SPLATS
3166
3167	// clang-format off
3168	#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3169	RISCV::PseudoV##OP##_##LMUL##_TIED
3170
3171	#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3172	CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3173	case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3174	case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3175	case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3176	case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3177
3178	#define CASE_WIDEOP_OPCODE_LMULS(OP) \
3179	CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3180	case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3181
3182	#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3183	case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3184	NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3185	break;
3186
3187	#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3188	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3189	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3190	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3191	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3192	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3193
3194	#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3195	CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3196	CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3197
3198	// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3199	#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3200	RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3201
3202	#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3203	CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3204	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3205	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3206	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3207	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3208	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3209	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3210	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3211	case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3212
3213	#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3214	case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3215	NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3216	break;
3217
3218	#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3219	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3220	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3221	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3222	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3223	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3224	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3225	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3226	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3227	CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3228
3229	#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3230	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3231	// clang-format on
3232
3233	MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
3234	LiveVariables *LV,
3235	LiveIntervals LIS) const* {
3236	MachineInstrBuilder MIB;
3237	switch (MI.getOpcode()) {
3238	default:
3239	return nullptr;
3240	case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
3241	case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
3242	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3243	MI.getNumExplicitOperands() == `7` &&
3244	"Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3245	// If the tail policy is undisturbed we can't convert.
3246	if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
3247	`1`) == `0`)
3248	return nullptr;
3249	// clang-format off
3250	unsigned NewOpc;
3251	switch (MI.getOpcode()) {
3252	default:
3253	llvm_unreachable("Unexpected opcode");
3254	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
3255	CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
3256	}
3257	// clang-format on
3258
3259	MachineBasicBlock &MBB = *MI.getParent();
3260	MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3261	.add(MI.getOperand(i: `0`))
3262	.addReg(MI.getOperand(i: `0`).getReg(), RegState::Undef)
3263	.add(MI.getOperand(i: `1`))
3264	.add(MI.getOperand(i: `2`))
3265	.add(MI.getOperand(i: `3`))
3266	.add(MI.getOperand(i: `4`))
3267	.add(MI.getOperand(i: `5`))
3268	.add(MI.getOperand(i: `6`));
3269	break;
3270	}
3271	case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
3272	case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
3273	case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
3274	case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
3275	// If the tail policy is undisturbed we can't convert.
3276	assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3277	MI.getNumExplicitOperands() == `6`);
3278	if ((MI.getOperand(i: `5`).getImm() & `1`) == `0`)
3279	return nullptr;
3280
3281	// clang-format off
3282	unsigned NewOpc;
3283	switch (MI.getOpcode()) {
3284	default:
3285	llvm_unreachable("Unexpected opcode");
3286	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
3287	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
3288	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
3289	CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
3290	}
3291	// clang-format on
3292
3293	MachineBasicBlock &MBB = *MI.getParent();
3294	MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3295	.add(MI.getOperand(i: `0`))
3296	.addReg(MI.getOperand(i: `0`).getReg(), RegState::Undef)
3297	.add(MI.getOperand(i: `1`))
3298	.add(MI.getOperand(i: `2`))
3299	.add(MI.getOperand(i: `3`))
3300	.add(MI.getOperand(i: `4`))
3301	.add(MI.getOperand(i: `5`));
3302	break;
3303	}
3304	}
3305	MIB.copyImplicitOps(OtherMI: MI);
3306
3307	if (LV) {
3308	unsigned NumOps = MI.getNumOperands();
3309	for (unsigned I = `1`; I < NumOps; ++I) {
3310	MachineOperand &Op = MI.getOperand(i: I);
3311	if (Op.isReg() && Op.isKill())
3312	LV->replaceKillInstruction(Reg: Op.getReg(), OldMI&: MI, NewMI&: *MIB);
3313	}
3314	}
3315
3316	if (LIS) {
3317	SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, NewMI&: *MIB);
3318
3319	if (MI.getOperand(i: `0`).isEarlyClobber()) {
3320	// Use operand 1 was tied to early-clobber def operand 0, so its live
3321	// interval could have ended at an early-clobber slot. Now they are not
3322	// tied we need to update it to the normal register slot.
3323	LiveInterval &LI = LIS->getInterval(Reg: MI.getOperand(i: `1`).getReg());
3324	LiveRange::Segment *S = LI.getSegmentContaining(Idx);
3325	if (S->end == Idx.getRegSlot(EC: true))
3326	S->end = Idx.getRegSlot();
3327	}
3328	}
3329
3330	return MIB;
3331	}
3332
3333	#undef CASE_WIDEOP_OPCODE_COMMON
3334	#undef CASE_WIDEOP_OPCODE_LMULS_MF4
3335	#undef CASE_WIDEOP_OPCODE_LMULS
3336	#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3337	#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3338	#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3339	#undef CASE_FP_WIDEOP_OPCODE_COMMON
3340	#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3341	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3342	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3343	#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3344
3345	void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
3346	MachineBasicBlock::iterator II, const DebugLoc &DL,
3347	Register DestReg, uint32_t Amount,
3348	MachineInstr::MIFlag Flag) const {
3349	MachineRegisterInfo &MRI = MF.getRegInfo();
3350	if (llvm::has_single_bit<uint32_t>(Value: Amount)) {
3351	uint32_t ShiftAmount = Log2_32(Value: Amount);
3352	if (ShiftAmount == `0`)
3353	return;
3354	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3355	.addReg(DestReg, RegState::Kill)
3356	.addImm(ShiftAmount)
3357	.setMIFlag(Flag);
3358	} else if (STI.hasStdExtZba() &&
3359	((Amount % `3` == `0` && isPowerOf2_64(Value: Amount / `3`)) \|\|
3360	(Amount % `5` == `0` && isPowerOf2_64(Value: Amount / `5`)) \|\|
3361	(Amount % `9` == `0` && isPowerOf2_64(Value: Amount / `9`)))) {
3362	// We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3363	unsigned Opc;
3364	uint32_t ShiftAmount;
3365	if (Amount % `9` == `0`) {
3366	Opc = RISCV::SH3ADD;
3367	ShiftAmount = Log2_64(Value: Amount / `9`);
3368	} else if (Amount % `5` == `0`) {
3369	Opc = RISCV::SH2ADD;
3370	ShiftAmount = Log2_64(Value: Amount / `5`);
3371	} else if (Amount % `3` == `0`) {
3372	Opc = RISCV::SH1ADD;
3373	ShiftAmount = Log2_64(Value: Amount / `3`);
3374	} else {
3375	llvm_unreachable("implied by if-clause");
3376	}
3377	if (ShiftAmount)
3378	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3379	.addReg(DestReg, RegState::Kill)
3380	.addImm(ShiftAmount)
3381	.setMIFlag(Flag);
3382	BuildMI(MBB, II, DL, get(Opc), DestReg)
3383	.addReg(DestReg, RegState::Kill)
3384	.addReg(DestReg)
3385	.setMIFlag(Flag);
3386	} else if (llvm::has_single_bit<uint32_t>(Value: Amount - `1`)) {
3387	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3388	uint32_t ShiftAmount = Log2_32(Value: Amount - `1`);
3389	BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3390	.addReg(DestReg)
3391	.addImm(ShiftAmount)
3392	.setMIFlag(Flag);
3393	BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3394	.addReg(ScaledRegister, RegState::Kill)
3395	.addReg(DestReg, RegState::Kill)
3396	.setMIFlag(Flag);
3397	} else if (llvm::has_single_bit<uint32_t>(Value: Amount + `1`)) {
3398	Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3399	uint32_t ShiftAmount = Log2_32(Value: Amount + `1`);
3400	BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3401	.addReg(DestReg)
3402	.addImm(ShiftAmount)
3403	.setMIFlag(Flag);
3404	BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3405	.addReg(ScaledRegister, RegState::Kill)
3406	.addReg(DestReg, RegState::Kill)
3407	.setMIFlag(Flag);
3408	} else if (STI.hasStdExtM() \|\| STI.hasStdExtZmmul()) {
3409	Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3410	movImm(MBB, MBBI: II, DL, DstReg: N, Val: Amount, Flag);
3411	BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3412	.addReg(DestReg, RegState::Kill)
3413	.addReg(N, RegState::Kill)
3414	.setMIFlag(Flag);
3415	} else {
3416	Register Acc;
3417	uint32_t PrevShiftAmount = `0`;
3418	for (uint32_t ShiftAmount = `0`; Amount >> ShiftAmount; ShiftAmount++) {
3419	if (Amount & (`1U` << ShiftAmount)) {
3420	if (ShiftAmount)
3421	BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3422	.addReg(DestReg, RegState::Kill)
3423	.addImm(ShiftAmount - PrevShiftAmount)
3424	.setMIFlag(Flag);
3425	if (Amount >> (ShiftAmount + `1`)) {
3426	// If we don't have an accmulator yet, create it and copy DestReg.
3427	if (!Acc) {
3428	Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3429	BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3430	.addReg(DestReg)
3431	.setMIFlag(Flag);
3432	} else {
3433	BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3434	.addReg(Acc, RegState::Kill)
3435	.addReg(DestReg)
3436	.setMIFlag(Flag);
3437	}
3438	}
3439	PrevShiftAmount = ShiftAmount;
3440	}
3441	}
3442	assert(Acc && "Expected valid accumulator");
3443	BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3444	.addReg(DestReg, RegState::Kill)
3445	.addReg(Acc, RegState::Kill)
3446	.setMIFlag(Flag);
3447	}
3448	}
3449
3450	ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
3451	RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3452	static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3453	{{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3454	{MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3455	return ArrayRef(TargetFlags);
3456	}
3457
3458	// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3459	bool RISCV::isSEXT_W(const MachineInstr &MI) {
3460	return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(`1`).isReg() &&
3461	MI.getOperand(`2`).isImm() && MI.getOperand(`2`).getImm() == `0`;
3462	}
3463
3464	// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3465	bool RISCV::isZEXT_W(const MachineInstr &MI) {
3466	return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(`1`).isReg() &&
3467	MI.getOperand(`2`).isReg() && MI.getOperand(`2`).getReg() == RISCV::X0;
3468	}
3469
3470	// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3471	bool RISCV::isZEXT_B(const MachineInstr &MI) {
3472	return MI.getOpcode() == RISCV::ANDI && MI.getOperand(`1`).isReg() &&
3473	MI.getOperand(`2`).isImm() && MI.getOperand(`2`).getImm() == `255`;
3474	}
3475
3476	static bool isRVVWholeLoadStore(unsigned Opcode) {
3477	switch (Opcode) {
3478	default:
3479	return false;
3480	case RISCV::VS1R_V:
3481	case RISCV::VS2R_V:
3482	case RISCV::VS4R_V:
3483	case RISCV::VS8R_V:
3484	case RISCV::VL1RE8_V:
3485	case RISCV::VL2RE8_V:
3486	case RISCV::VL4RE8_V:
3487	case RISCV::VL8RE8_V:
3488	case RISCV::VL1RE16_V:
3489	case RISCV::VL2RE16_V:
3490	case RISCV::VL4RE16_V:
3491	case RISCV::VL8RE16_V:
3492	case RISCV::VL1RE32_V:
3493	case RISCV::VL2RE32_V:
3494	case RISCV::VL4RE32_V:
3495	case RISCV::VL8RE32_V:
3496	case RISCV::VL1RE64_V:
3497	case RISCV::VL2RE64_V:
3498	case RISCV::VL4RE64_V:
3499	case RISCV::VL8RE64_V:
3500	return true;
3501	}
3502	}
3503
3504	bool RISCV::isRVVSpill(const MachineInstr &MI) {
3505	// RVV lacks any support for immediate addressing for stack addresses, so be
3506	// conservative.
3507	unsigned Opcode = MI.getOpcode();
3508	if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
3509	!isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
3510	return false;
3511	return true;
3512	}
3513
3514	std::optional<std::pair<unsigned, unsigned>>
3515	RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
3516	switch (Opcode) {
3517	default:
3518	return std::nullopt;
3519	case RISCV::PseudoVSPILL2_M1:
3520	case RISCV::PseudoVRELOAD2_M1:
3521	return std::make_pair(x: `2u`, y: `1u`);
3522	case RISCV::PseudoVSPILL2_M2:
3523	case RISCV::PseudoVRELOAD2_M2:
3524	return std::make_pair(x: `2u`, y: `2u`);
3525	case RISCV::PseudoVSPILL2_M4:
3526	case RISCV::PseudoVRELOAD2_M4:
3527	return std::make_pair(x: `2u`, y: `4u`);
3528	case RISCV::PseudoVSPILL3_M1:
3529	case RISCV::PseudoVRELOAD3_M1:
3530	return std::make_pair(x: `3u`, y: `1u`);
3531	case RISCV::PseudoVSPILL3_M2:
3532	case RISCV::PseudoVRELOAD3_M2:
3533	return std::make_pair(x: `3u`, y: `2u`);
3534	case RISCV::PseudoVSPILL4_M1:
3535	case RISCV::PseudoVRELOAD4_M1:
3536	return std::make_pair(x: `4u`, y: `1u`);
3537	case RISCV::PseudoVSPILL4_M2:
3538	case RISCV::PseudoVRELOAD4_M2:
3539	return std::make_pair(x: `4u`, y: `2u`);
3540	case RISCV::PseudoVSPILL5_M1:
3541	case RISCV::PseudoVRELOAD5_M1:
3542	return std::make_pair(x: `5u`, y: `1u`);
3543	case RISCV::PseudoVSPILL6_M1:
3544	case RISCV::PseudoVRELOAD6_M1:
3545	return std::make_pair(x: `6u`, y: `1u`);
3546	case RISCV::PseudoVSPILL7_M1:
3547	case RISCV::PseudoVRELOAD7_M1:
3548	return std::make_pair(x: `7u`, y: `1u`);
3549	case RISCV::PseudoVSPILL8_M1:
3550	case RISCV::PseudoVRELOAD8_M1:
3551	return std::make_pair(x: `8u`, y: `1u`);
3552	}
3553	}
3554
3555	bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
3556	return MI.getNumExplicitDefs() == `2` &&
3557	MI.modifiesRegister(RISCV::VL, /TRI=/nullptr) && !MI.isInlineAsm();
3558	}
3559
3560	bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
3561	int16_t MI1FrmOpIdx =
3562	RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
3563	int16_t MI2FrmOpIdx =
3564	RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
3565	if (MI1FrmOpIdx < `0` \|\| MI2FrmOpIdx < `0`)
3566	return false;
3567	MachineOperand FrmOp1 = MI1.getOperand(i: MI1FrmOpIdx);
3568	MachineOperand FrmOp2 = MI2.getOperand(i: MI2FrmOpIdx);
3569	return FrmOp1.getImm() == FrmOp2.getImm();
3570	}
3571
3572	std::optional<unsigned>
3573	RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
3574	// TODO: Handle Zvbb instructions
3575	switch (Opcode) {
3576	default:
3577	return std::nullopt;
3578
3579	// 11.6. Vector Single-Width Shift Instructions
3580	case RISCV::VSLL_VX:
3581	case RISCV::VSRL_VX:
3582	case RISCV::VSRA_VX:
3583	// 12.4. Vector Single-Width Scaling Shift Instructions
3584	case RISCV::VSSRL_VX:
3585	case RISCV::VSSRA_VX:
3586	// Only the low lg2(SEW) bits of the shift-amount value are used.
3587	return Log2SEW;
3588
3589	// 11.7 Vector Narrowing Integer Right Shift Instructions
3590	case RISCV::VNSRL_WX:
3591	case RISCV::VNSRA_WX:
3592	// 12.5. Vector Narrowing Fixed-Point Clip Instructions
3593	case RISCV::VNCLIPU_WX:
3594	case RISCV::VNCLIP_WX:
3595	// Only the low lg2(2SEW) bits of the shift-amount value are used.*
3596	return Log2SEW + `1`;
3597
3598	// 11.1. Vector Single-Width Integer Add and Subtract
3599	case RISCV::VADD_VX:
3600	case RISCV::VSUB_VX:
3601	case RISCV::VRSUB_VX:
3602	// 11.2. Vector Widening Integer Add/Subtract
3603	case RISCV::VWADDU_VX:
3604	case RISCV::VWSUBU_VX:
3605	case RISCV::VWADD_VX:
3606	case RISCV::VWSUB_VX:
3607	case RISCV::VWADDU_WX:
3608	case RISCV::VWSUBU_WX:
3609	case RISCV::VWADD_WX:
3610	case RISCV::VWSUB_WX:
3611	// 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
3612	case RISCV::VADC_VXM:
3613	case RISCV::VADC_VIM:
3614	case RISCV::VMADC_VXM:
3615	case RISCV::VMADC_VIM:
3616	case RISCV::VMADC_VX:
3617	case RISCV::VSBC_VXM:
3618	case RISCV::VMSBC_VXM:
3619	case RISCV::VMSBC_VX:
3620	// 11.5 Vector Bitwise Logical Instructions
3621	case RISCV::VAND_VX:
3622	case RISCV::VOR_VX:
3623	case RISCV::VXOR_VX:
3624	// 11.8. Vector Integer Compare Instructions
3625	case RISCV::VMSEQ_VX:
3626	case RISCV::VMSNE_VX:
3627	case RISCV::VMSLTU_VX:
3628	case RISCV::VMSLT_VX:
3629	case RISCV::VMSLEU_VX:
3630	case RISCV::VMSLE_VX:
3631	case RISCV::VMSGTU_VX:
3632	case RISCV::VMSGT_VX:
3633	// 11.9. Vector Integer Min/Max Instructions
3634	case RISCV::VMINU_VX:
3635	case RISCV::VMIN_VX:
3636	case RISCV::VMAXU_VX:
3637	case RISCV::VMAX_VX:
3638	// 11.10. Vector Single-Width Integer Multiply Instructions
3639	case RISCV::VMUL_VX:
3640	case RISCV::VMULH_VX:
3641	case RISCV::VMULHU_VX:
3642	case RISCV::VMULHSU_VX:
3643	// 11.11. Vector Integer Divide Instructions
3644	case RISCV::VDIVU_VX:
3645	case RISCV::VDIV_VX:
3646	case RISCV::VREMU_VX:
3647	case RISCV::VREM_VX:
3648	// 11.12. Vector Widening Integer Multiply Instructions
3649	case RISCV::VWMUL_VX:
3650	case RISCV::VWMULU_VX:
3651	case RISCV::VWMULSU_VX:
3652	// 11.13. Vector Single-Width Integer Multiply-Add Instructions
3653	case RISCV::VMACC_VX:
3654	case RISCV::VNMSAC_VX:
3655	case RISCV::VMADD_VX:
3656	case RISCV::VNMSUB_VX:
3657	// 11.14. Vector Widening Integer Multiply-Add Instructions
3658	case RISCV::VWMACCU_VX:
3659	case RISCV::VWMACC_VX:
3660	case RISCV::VWMACCSU_VX:
3661	case RISCV::VWMACCUS_VX:
3662	// 11.15. Vector Integer Merge Instructions
3663	case RISCV::VMERGE_VXM:
3664	// 11.16. Vector Integer Move Instructions
3665	case RISCV::VMV_V_X:
3666	// 12.1. Vector Single-Width Saturating Add and Subtract
3667	case RISCV::VSADDU_VX:
3668	case RISCV::VSADD_VX:
3669	case RISCV::VSSUBU_VX:
3670	case RISCV::VSSUB_VX:
3671	// 12.2. Vector Single-Width Averaging Add and Subtract
3672	case RISCV::VAADDU_VX:
3673	case RISCV::VAADD_VX:
3674	case RISCV::VASUBU_VX:
3675	case RISCV::VASUB_VX:
3676	// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
3677	case RISCV::VSMUL_VX:
3678	// 16.1. Integer Scalar Move Instructions
3679	case RISCV::VMV_S_X:
3680	return `1U` << Log2SEW;
3681	}
3682	}
3683
3684	unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
3685	const RISCVVPseudosTable::PseudoInfo *RVV =
3686	RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
3687	if (!RVV)
3688	return `0`;
3689	return RVV->BaseInstr;
3690	}
3691

source code of llvm/lib/Target/RISCV/RISCVInstrInfo.cpp