1//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the RISC-V implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCVInstrInfo.h"
14#include "MCTargetDesc/RISCVMatInt.h"
15#include "RISCV.h"
16#include "RISCVMachineFunctionInfo.h"
17#include "RISCVSubtarget.h"
18#include "RISCVTargetMachine.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/Analysis/MemoryLocation.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/CodeGen/LiveIntervals.h"
24#include "llvm/CodeGen/LiveVariables.h"
25#include "llvm/CodeGen/MachineCombinerPattern.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/CodeGen/MachineTraceMetrics.h"
30#include "llvm/CodeGen/RegisterScavenging.h"
31#include "llvm/CodeGen/StackMaps.h"
32#include "llvm/IR/DebugInfoMetadata.h"
33#include "llvm/MC/MCInstBuilder.h"
34#include "llvm/MC/TargetRegistry.h"
35#include "llvm/Support/ErrorHandling.h"
36
37using namespace llvm;
38
39#define GEN_CHECK_COMPRESS_INSTR
40#include "RISCVGenCompressInstEmitter.inc"
41
42#define GET_INSTRINFO_CTOR_DTOR
43#define GET_INSTRINFO_NAMED_OPS
44#include "RISCVGenInstrInfo.inc"
45
46static cl::opt<bool> PreferWholeRegisterMove(
47 "riscv-prefer-whole-register-move", cl::init(Val: false), cl::Hidden,
48 cl::desc("Prefer whole register move for vector registers."));
49
50static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
51 "riscv-force-machine-combiner-strategy", cl::Hidden,
52 cl::desc("Force machine combiner to use a specific strategy for machine "
53 "trace metrics evaluation."),
54 cl::init(Val: MachineTraceStrategy::TS_NumStrategies),
55 cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
56 "Local strategy."),
57 clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
58 "MinInstrCount strategy.")));
59
60namespace llvm::RISCVVPseudosTable {
61
62using namespace RISCV;
63
64#define GET_RISCVVPseudosTable_IMPL
65#include "RISCVGenSearchableTables.inc"
66
67} // namespace llvm::RISCVVPseudosTable
68
69namespace llvm::RISCV {
70
71#define GET_RISCVMaskedPseudosTable_IMPL
72#include "RISCVGenSearchableTables.inc"
73
74} // end namespace llvm::RISCV
75
76RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
77 : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
78 STI(STI) {}
79
80MCInst RISCVInstrInfo::getNop() const {
81 if (STI.hasStdExtCOrZca())
82 return MCInstBuilder(RISCV::C_NOP);
83 return MCInstBuilder(RISCV::ADDI)
84 .addReg(RISCV::X0)
85 .addReg(RISCV::X0)
86 .addImm(0);
87}
88
89Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
90 int &FrameIndex) const {
91 unsigned Dummy;
92 return isLoadFromStackSlot(MI, FrameIndex, MemBytes&: Dummy);
93}
94
95Register RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
96 int &FrameIndex,
97 unsigned &MemBytes) const {
98 switch (MI.getOpcode()) {
99 default:
100 return 0;
101 case RISCV::LB:
102 case RISCV::LBU:
103 MemBytes = 1;
104 break;
105 case RISCV::LH:
106 case RISCV::LHU:
107 case RISCV::FLH:
108 MemBytes = 2;
109 break;
110 case RISCV::LW:
111 case RISCV::FLW:
112 case RISCV::LWU:
113 MemBytes = 4;
114 break;
115 case RISCV::LD:
116 case RISCV::FLD:
117 MemBytes = 8;
118 break;
119 }
120
121 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
122 MI.getOperand(i: 2).getImm() == 0) {
123 FrameIndex = MI.getOperand(i: 1).getIndex();
124 return MI.getOperand(i: 0).getReg();
125 }
126
127 return 0;
128}
129
130Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
131 int &FrameIndex) const {
132 unsigned Dummy;
133 return isStoreToStackSlot(MI, FrameIndex, MemBytes&: Dummy);
134}
135
136Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
137 int &FrameIndex,
138 unsigned &MemBytes) const {
139 switch (MI.getOpcode()) {
140 default:
141 return 0;
142 case RISCV::SB:
143 MemBytes = 1;
144 break;
145 case RISCV::SH:
146 case RISCV::FSH:
147 MemBytes = 2;
148 break;
149 case RISCV::SW:
150 case RISCV::FSW:
151 MemBytes = 4;
152 break;
153 case RISCV::SD:
154 case RISCV::FSD:
155 MemBytes = 8;
156 break;
157 }
158
159 if (MI.getOperand(i: 1).isFI() && MI.getOperand(i: 2).isImm() &&
160 MI.getOperand(i: 2).getImm() == 0) {
161 FrameIndex = MI.getOperand(i: 1).getIndex();
162 return MI.getOperand(i: 0).getReg();
163 }
164
165 return 0;
166}
167
168static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
169 unsigned NumRegs) {
170 return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
171}
172
173static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
174 const MachineBasicBlock &MBB,
175 MachineBasicBlock::const_iterator MBBI,
176 MachineBasicBlock::const_iterator &DefMBBI,
177 RISCVII::VLMUL LMul) {
178 if (PreferWholeRegisterMove)
179 return false;
180
181 assert(MBBI->getOpcode() == TargetOpcode::COPY &&
182 "Unexpected COPY instruction.");
183 Register SrcReg = MBBI->getOperand(i: 1).getReg();
184 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
185
186 bool FoundDef = false;
187 bool FirstVSetVLI = false;
188 unsigned FirstSEW = 0;
189 while (MBBI != MBB.begin()) {
190 --MBBI;
191 if (MBBI->isMetaInstruction())
192 continue;
193
194 if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
195 MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
196 MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
197 // There is a vsetvli between COPY and source define instruction.
198 // vy = def_vop ... (producing instruction)
199 // ...
200 // vsetvli
201 // ...
202 // vx = COPY vy
203 if (!FoundDef) {
204 if (!FirstVSetVLI) {
205 FirstVSetVLI = true;
206 unsigned FirstVType = MBBI->getOperand(i: 2).getImm();
207 RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(VType: FirstVType);
208 FirstSEW = RISCVVType::getSEW(VType: FirstVType);
209 // The first encountered vsetvli must have the same lmul as the
210 // register class of COPY.
211 if (FirstLMul != LMul)
212 return false;
213 }
214 // Only permit `vsetvli x0, x0, vtype` between COPY and the source
215 // define instruction.
216 if (MBBI->getOperand(i: 0).getReg() != RISCV::X0)
217 return false;
218 if (MBBI->getOperand(i: 1).isImm())
219 return false;
220 if (MBBI->getOperand(i: 1).getReg() != RISCV::X0)
221 return false;
222 continue;
223 }
224
225 // MBBI is the first vsetvli before the producing instruction.
226 unsigned VType = MBBI->getOperand(i: 2).getImm();
227 // If there is a vsetvli between COPY and the producing instruction.
228 if (FirstVSetVLI) {
229 // If SEW is different, return false.
230 if (RISCVVType::getSEW(VType) != FirstSEW)
231 return false;
232 }
233
234 // If the vsetvli is tail undisturbed, keep the whole register move.
235 if (!RISCVVType::isTailAgnostic(VType))
236 return false;
237
238 // The checking is conservative. We only have register classes for
239 // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
240 // for fractional LMUL operations. However, we could not use the vsetvli
241 // lmul for widening operations. The result of widening operation is
242 // 2 x LMUL.
243 return LMul == RISCVVType::getVLMUL(VType);
244 } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
245 return false;
246 } else if (MBBI->getNumDefs()) {
247 // Check all the instructions which will change VL.
248 // For example, vleff has implicit def VL.
249 if (MBBI->modifiesRegister(RISCV::Reg: VL, /*TRI=*/nullptr))
250 return false;
251
252 // Only converting whole register copies to vmv.v.v when the defining
253 // value appears in the explicit operands.
254 for (const MachineOperand &MO : MBBI->explicit_operands()) {
255 if (!MO.isReg() || !MO.isDef())
256 continue;
257 if (!FoundDef && TRI->regsOverlap(RegA: MO.getReg(), RegB: SrcReg)) {
258 // We only permit the source of COPY has the same LMUL as the defined
259 // operand.
260 // There are cases we need to keep the whole register copy if the LMUL
261 // is different.
262 // For example,
263 // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
264 // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
265 // # The COPY may be created by vlmul_trunc intrinsic.
266 // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
267 //
268 // After widening, the valid value will be 4 x e32 elements. If we
269 // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
270 // FIXME: The COPY of subregister of Zvlsseg register will not be able
271 // to convert to vmv.v.[v|i] under the constraint.
272 if (MO.getReg() != SrcReg)
273 return false;
274
275 // In widening reduction instructions with LMUL_1 input vector case,
276 // only checking the LMUL is insufficient due to reduction result is
277 // always LMUL_1.
278 // For example,
279 // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
280 // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
281 // $v26 = COPY killed renamable $v8
282 // After widening, The valid value will be 1 x e16 elements. If we
283 // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
284 uint64_t TSFlags = MBBI->getDesc().TSFlags;
285 if (RISCVII::isRVVWideningReduction(TSFlags))
286 return false;
287
288 // If the producing instruction does not depend on vsetvli, do not
289 // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
290 if (!RISCVII::hasSEWOp(TSFlags) || !RISCVII::hasVLOp(TSFlags))
291 return false;
292
293 // Found the definition.
294 FoundDef = true;
295 DefMBBI = MBBI;
296 break;
297 }
298 }
299 }
300 }
301
302 return false;
303}
304
305void RISCVInstrInfo::copyPhysRegVector(
306 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
307 const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
308 const TargetRegisterClass *RegClass) const {
309 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
310 RISCVII::VLMUL LMul = RISCVRI::getLMul(TSFlags: RegClass->TSFlags);
311 unsigned NF = RISCVRI::getNF(TSFlags: RegClass->TSFlags);
312
313 uint16_t SrcEncoding = TRI->getEncodingValue(RegNo: SrcReg);
314 uint16_t DstEncoding = TRI->getEncodingValue(RegNo: DstReg);
315 auto [LMulVal, Fractional] = RISCVVType::decodeVLMUL(VLMUL: LMul);
316 assert(!Fractional && "It is impossible be fractional lmul here.");
317 unsigned NumRegs = NF * LMulVal;
318 bool ReversedCopy =
319 forwardCopyWillClobberTuple(DstReg: DstEncoding, SrcReg: SrcEncoding, NumRegs);
320 if (ReversedCopy) {
321 // If the src and dest overlap when copying a tuple, we need to copy the
322 // registers in reverse.
323 SrcEncoding += NumRegs - 1;
324 DstEncoding += NumRegs - 1;
325 }
326
327 unsigned I = 0;
328 auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding)
329 -> std::tuple<RISCVII::VLMUL, const TargetRegisterClass &, unsigned,
330 unsigned, unsigned> {
331 if (ReversedCopy) {
332 // For reversed copying, if there are enough aligned registers(8/4/2), we
333 // can do a larger copy(LMUL8/4/2).
334 // Besides, we have already known that DstEncoding is larger than
335 // SrcEncoding in forwardCopyWillClobberTuple, so the difference between
336 // DstEncoding and SrcEncoding should be >= LMUL value we try to use to
337 // avoid clobbering.
338 uint16_t Diff = DstEncoding - SrcEncoding;
339 if (I + 8 <= NumRegs && Diff >= 8 && SrcEncoding % 8 == 7 &&
340 DstEncoding % 8 == 7)
341 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
342 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
343 if (I + 4 <= NumRegs && Diff >= 4 && SrcEncoding % 4 == 3 &&
344 DstEncoding % 4 == 3)
345 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
346 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
347 if (I + 2 <= NumRegs && Diff >= 2 && SrcEncoding % 2 == 1 &&
348 DstEncoding % 2 == 1)
349 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
350 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
351 // Or we should do LMUL1 copying.
352 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
353 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
354 }
355
356 // For forward copying, if source register encoding and destination register
357 // encoding are aligned to 8/4/2, we can do a LMUL8/4/2 copying.
358 if (I + 8 <= NumRegs && SrcEncoding % 8 == 0 && DstEncoding % 8 == 0)
359 return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V,
360 RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8};
361 if (I + 4 <= NumRegs && SrcEncoding % 4 == 0 && DstEncoding % 4 == 0)
362 return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V,
363 RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4};
364 if (I + 2 <= NumRegs && SrcEncoding % 2 == 0 && DstEncoding % 2 == 0)
365 return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V,
366 RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2};
367 // Or we should do LMUL1 copying.
368 return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
369 RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
370 };
371 auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
372 uint16_t Encoding) {
373 MCRegister Reg = RISCV::V0 + Encoding;
374 if (&RegClass == &RISCV::VRRegClass)
375 return Reg;
376 return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
377 };
378 while (I != NumRegs) {
379 // For non-segment copying, we only do this once as the registers are always
380 // aligned.
381 // For segment copying, we may do this several times. If the registers are
382 // aligned to larger LMUL, we can eliminate some copyings.
383 auto [LMulCopied, RegClass, Opc, VVOpc, VIOpc] =
384 GetCopyInfo(SrcEncoding, DstEncoding);
385 auto [NumCopied, _] = RISCVVType::decodeVLMUL(VLMUL: LMulCopied);
386
387 MachineBasicBlock::const_iterator DefMBBI;
388 if (LMul == LMulCopied &&
389 isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
390 Opc = VVOpc;
391 if (DefMBBI->getOpcode() == VIOpc)
392 Opc = VIOpc;
393 }
394
395 // Emit actual copying.
396 // For reversed copying, the encoding should be decreased.
397 MCRegister ActualSrcReg = FindRegWithEncoding(
398 RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
399 MCRegister ActualDstReg = FindRegWithEncoding(
400 RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
401
402 auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
403 bool UseVMV_V_I = RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_I;
404 bool UseVMV = UseVMV_V_I || RISCV::getRVVMCOpcode(Opc) == RISCV::VMV_V_V;
405 if (UseVMV)
406 MIB.addReg(ActualDstReg, RegState::Undef);
407 if (UseVMV_V_I)
408 MIB = MIB.add(DefMBBI->getOperand(i: 2));
409 else
410 MIB = MIB.addReg(ActualSrcReg, getKillRegState(B: KillSrc));
411 if (UseVMV) {
412 const MCInstrDesc &Desc = DefMBBI->getDesc();
413 MIB.add(DefMBBI->getOperand(i: RISCVII::getVLOpNum(Desc))); // AVL
414 MIB.add(DefMBBI->getOperand(i: RISCVII::getSEWOpNum(Desc))); // SEW
415 MIB.addImm(0); // tu, mu
416 MIB.addReg(RISCV::VL, RegState::Implicit);
417 MIB.addReg(RISCV::VTYPE, RegState::Implicit);
418 }
419
420 // If we are copying reversely, we should decrease the encoding.
421 SrcEncoding += (ReversedCopy ? -NumCopied : NumCopied);
422 DstEncoding += (ReversedCopy ? -NumCopied : NumCopied);
423 I += NumCopied;
424 }
425}
426
427void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
428 MachineBasicBlock::iterator MBBI,
429 const DebugLoc &DL, MCRegister DstReg,
430 MCRegister SrcReg, bool KillSrc) const {
431 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
432
433 if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
434 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
435 .addReg(SrcReg, getKillRegState(KillSrc))
436 .addImm(0);
437 return;
438 }
439
440 if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
441 // Emit an ADDI for both parts of GPRPair.
442 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
443 TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
444 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
445 getKillRegState(KillSrc))
446 .addImm(0);
447 BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
448 TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
449 .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
450 getKillRegState(KillSrc))
451 .addImm(0);
452 return;
453 }
454
455 // Handle copy from csr
456 if (RISCV::VCSRRegClass.contains(SrcReg) &&
457 RISCV::GPRRegClass.contains(DstReg)) {
458 BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
459 .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
460 .addReg(RISCV::X0);
461 return;
462 }
463
464 if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
465 unsigned Opc;
466 if (STI.hasStdExtZfh()) {
467 Opc = RISCV::FSGNJ_H;
468 } else {
469 assert(STI.hasStdExtF() &&
470 (STI.hasStdExtZfhmin() || STI.hasStdExtZfbfmin()) &&
471 "Unexpected extensions");
472 // Zfhmin/Zfbfmin doesn't have FSGNJ_H, replace FSGNJ_H with FSGNJ_S.
473 DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
474 &RISCV::FPR32RegClass);
475 SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
476 &RISCV::FPR32RegClass);
477 Opc = RISCV::FSGNJ_S;
478 }
479 BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
480 .addReg(SrcReg, getKillRegState(B: KillSrc))
481 .addReg(SrcReg, getKillRegState(B: KillSrc));
482 return;
483 }
484
485 if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
486 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_S), DstReg)
487 .addReg(SrcReg, getKillRegState(KillSrc))
488 .addReg(SrcReg, getKillRegState(KillSrc));
489 return;
490 }
491
492 if (RISCV::FPR64RegClass.contains(DstReg, SrcReg)) {
493 BuildMI(MBB, MBBI, DL, get(RISCV::FSGNJ_D), DstReg)
494 .addReg(SrcReg, getKillRegState(KillSrc))
495 .addReg(SrcReg, getKillRegState(KillSrc));
496 return;
497 }
498
499 if (RISCV::FPR32RegClass.contains(DstReg) &&
500 RISCV::GPRRegClass.contains(SrcReg)) {
501 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_W_X), DstReg)
502 .addReg(SrcReg, getKillRegState(KillSrc));
503 return;
504 }
505
506 if (RISCV::GPRRegClass.contains(DstReg) &&
507 RISCV::FPR32RegClass.contains(SrcReg)) {
508 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_W), DstReg)
509 .addReg(SrcReg, getKillRegState(KillSrc));
510 return;
511 }
512
513 if (RISCV::FPR64RegClass.contains(DstReg) &&
514 RISCV::GPRRegClass.contains(SrcReg)) {
515 assert(STI.getXLen() == 64 && "Unexpected GPR size");
516 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_D_X), DstReg)
517 .addReg(SrcReg, getKillRegState(KillSrc));
518 return;
519 }
520
521 if (RISCV::GPRRegClass.contains(DstReg) &&
522 RISCV::FPR64RegClass.contains(SrcReg)) {
523 assert(STI.getXLen() == 64 && "Unexpected GPR size");
524 BuildMI(MBB, MBBI, DL, get(RISCV::FMV_X_D), DstReg)
525 .addReg(SrcReg, getKillRegState(KillSrc));
526 return;
527 }
528
529 // VR->VR copies.
530 static const TargetRegisterClass *RVVRegClasses[] = {
531 &RISCV::VRRegClass, &RISCV::VRM2RegClass, &RISCV::VRM4RegClass,
532 &RISCV::VRM8RegClass, &RISCV::VRN2M1RegClass, &RISCV::VRN2M2RegClass,
533 &RISCV::VRN2M4RegClass, &RISCV::VRN3M1RegClass, &RISCV::VRN3M2RegClass,
534 &RISCV::VRN4M1RegClass, &RISCV::VRN4M2RegClass, &RISCV::VRN5M1RegClass,
535 &RISCV::VRN6M1RegClass, &RISCV::VRN7M1RegClass, &RISCV::VRN8M1RegClass};
536 for (const auto &RegClass : RVVRegClasses) {
537 if (RegClass->contains(DstReg, SrcReg)) {
538 copyPhysRegVector(MBB, MBBI, DL, DstReg, SrcReg, KillSrc, RegClass);
539 return;
540 }
541 }
542
543 llvm_unreachable("Impossible reg-to-reg copy");
544}
545
546void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
547 MachineBasicBlock::iterator I,
548 Register SrcReg, bool IsKill, int FI,
549 const TargetRegisterClass *RC,
550 const TargetRegisterInfo *TRI,
551 Register VReg) const {
552 MachineFunction *MF = MBB.getParent();
553 MachineFrameInfo &MFI = MF->getFrameInfo();
554
555 unsigned Opcode;
556 bool IsScalableVector = true;
557 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
558 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
559 RISCV::SW : RISCV::SD;
560 IsScalableVector = false;
561 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
562 Opcode = RISCV::PseudoRV32ZdinxSD;
563 IsScalableVector = false;
564 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
565 Opcode = RISCV::FSH;
566 IsScalableVector = false;
567 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
568 Opcode = RISCV::FSW;
569 IsScalableVector = false;
570 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
571 Opcode = RISCV::FSD;
572 IsScalableVector = false;
573 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
574 Opcode = RISCV::VS1R_V;
575 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
576 Opcode = RISCV::VS2R_V;
577 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
578 Opcode = RISCV::VS4R_V;
579 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
580 Opcode = RISCV::VS8R_V;
581 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
582 Opcode = RISCV::PseudoVSPILL2_M1;
583 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
584 Opcode = RISCV::PseudoVSPILL2_M2;
585 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
586 Opcode = RISCV::PseudoVSPILL2_M4;
587 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
588 Opcode = RISCV::PseudoVSPILL3_M1;
589 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
590 Opcode = RISCV::PseudoVSPILL3_M2;
591 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
592 Opcode = RISCV::PseudoVSPILL4_M1;
593 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
594 Opcode = RISCV::PseudoVSPILL4_M2;
595 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
596 Opcode = RISCV::PseudoVSPILL5_M1;
597 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
598 Opcode = RISCV::PseudoVSPILL6_M1;
599 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
600 Opcode = RISCV::PseudoVSPILL7_M1;
601 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
602 Opcode = RISCV::PseudoVSPILL8_M1;
603 else
604 llvm_unreachable("Can't store this register to stack slot");
605
606 if (IsScalableVector) {
607 MachineMemOperand *MMO = MF->getMachineMemOperand(
608 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
609 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
610
611 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
612 BuildMI(MBB, I, DebugLoc(), get(Opcode))
613 .addReg(SrcReg, getKillRegState(B: IsKill))
614 .addFrameIndex(FI)
615 .addMemOperand(MMO);
616 } else {
617 MachineMemOperand *MMO = MF->getMachineMemOperand(
618 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOStore,
619 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
620
621 BuildMI(MBB, I, DebugLoc(), get(Opcode))
622 .addReg(SrcReg, getKillRegState(B: IsKill))
623 .addFrameIndex(FI)
624 .addImm(0)
625 .addMemOperand(MMO);
626 }
627}
628
629void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
630 MachineBasicBlock::iterator I,
631 Register DstReg, int FI,
632 const TargetRegisterClass *RC,
633 const TargetRegisterInfo *TRI,
634 Register VReg) const {
635 MachineFunction *MF = MBB.getParent();
636 MachineFrameInfo &MFI = MF->getFrameInfo();
637
638 unsigned Opcode;
639 bool IsScalableVector = true;
640 if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
641 Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
642 RISCV::LW : RISCV::LD;
643 IsScalableVector = false;
644 } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
645 Opcode = RISCV::PseudoRV32ZdinxLD;
646 IsScalableVector = false;
647 } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
648 Opcode = RISCV::FLH;
649 IsScalableVector = false;
650 } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
651 Opcode = RISCV::FLW;
652 IsScalableVector = false;
653 } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
654 Opcode = RISCV::FLD;
655 IsScalableVector = false;
656 } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
657 Opcode = RISCV::VL1RE8_V;
658 } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
659 Opcode = RISCV::VL2RE8_V;
660 } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
661 Opcode = RISCV::VL4RE8_V;
662 } else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
663 Opcode = RISCV::VL8RE8_V;
664 } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
665 Opcode = RISCV::PseudoVRELOAD2_M1;
666 else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
667 Opcode = RISCV::PseudoVRELOAD2_M2;
668 else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
669 Opcode = RISCV::PseudoVRELOAD2_M4;
670 else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
671 Opcode = RISCV::PseudoVRELOAD3_M1;
672 else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
673 Opcode = RISCV::PseudoVRELOAD3_M2;
674 else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
675 Opcode = RISCV::PseudoVRELOAD4_M1;
676 else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
677 Opcode = RISCV::PseudoVRELOAD4_M2;
678 else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
679 Opcode = RISCV::PseudoVRELOAD5_M1;
680 else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
681 Opcode = RISCV::PseudoVRELOAD6_M1;
682 else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
683 Opcode = RISCV::PseudoVRELOAD7_M1;
684 else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
685 Opcode = RISCV::PseudoVRELOAD8_M1;
686 else
687 llvm_unreachable("Can't load this register from stack slot");
688
689 if (IsScalableVector) {
690 MachineMemOperand *MMO = MF->getMachineMemOperand(
691 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
692 Size: LocationSize::beforeOrAfterPointer(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
693
694 MFI.setStackID(ObjectIdx: FI, ID: TargetStackID::ScalableVector);
695 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
696 .addFrameIndex(FI)
697 .addMemOperand(MMO);
698 } else {
699 MachineMemOperand *MMO = MF->getMachineMemOperand(
700 PtrInfo: MachinePointerInfo::getFixedStack(MF&: *MF, FI), F: MachineMemOperand::MOLoad,
701 Size: MFI.getObjectSize(ObjectIdx: FI), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI));
702
703 BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
704 .addFrameIndex(FI)
705 .addImm(0)
706 .addMemOperand(MMO);
707 }
708}
709
710MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl(
711 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
712 MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS,
713 VirtRegMap *VRM) const {
714 const MachineFrameInfo &MFI = MF.getFrameInfo();
715
716 // The below optimizations narrow the load so they are only valid for little
717 // endian.
718 // TODO: Support big endian by adding an offset into the frame object?
719 if (MF.getDataLayout().isBigEndian())
720 return nullptr;
721
722 // Fold load from stack followed by sext.b/sext.h/sext.w/zext.b/zext.h/zext.w.
723 if (Ops.size() != 1 || Ops[0] != 1)
724 return nullptr;
725
726 unsigned LoadOpc;
727 switch (MI.getOpcode()) {
728 default:
729 if (RISCV::isSEXT_W(MI)) {
730 LoadOpc = RISCV::LW;
731 break;
732 }
733 if (RISCV::isZEXT_W(MI)) {
734 LoadOpc = RISCV::LWU;
735 break;
736 }
737 if (RISCV::isZEXT_B(MI)) {
738 LoadOpc = RISCV::LBU;
739 break;
740 }
741 return nullptr;
742 case RISCV::SEXT_H:
743 LoadOpc = RISCV::LH;
744 break;
745 case RISCV::SEXT_B:
746 LoadOpc = RISCV::LB;
747 break;
748 case RISCV::ZEXT_H_RV32:
749 case RISCV::ZEXT_H_RV64:
750 LoadOpc = RISCV::LHU;
751 break;
752 }
753
754 MachineMemOperand *MMO = MF.getMachineMemOperand(
755 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI: FrameIndex),
756 F: MachineMemOperand::MOLoad, Size: MFI.getObjectSize(ObjectIdx: FrameIndex),
757 BaseAlignment: MFI.getObjectAlign(ObjectIdx: FrameIndex));
758
759 Register DstReg = MI.getOperand(i: 0).getReg();
760 return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(LoadOpc),
761 DstReg)
762 .addFrameIndex(FrameIndex)
763 .addImm(0)
764 .addMemOperand(MMO);
765}
766
767void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
768 MachineBasicBlock::iterator MBBI,
769 const DebugLoc &DL, Register DstReg, uint64_t Val,
770 MachineInstr::MIFlag Flag, bool DstRenamable,
771 bool DstIsDead) const {
772 Register SrcReg = RISCV::X0;
773
774 // For RV32, allow a sign or unsigned 32 bit value.
775 if (!STI.is64Bit() && !isInt<32>(x: Val)) {
776 // If have a uimm32 it will still fit in a register so we can allow it.
777 if (!isUInt<32>(x: Val))
778 report_fatal_error(reason: "Should only materialize 32-bit constants for RV32");
779
780 // Sign extend for generateInstSeq.
781 Val = SignExtend64<32>(x: Val);
782 }
783
784 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Val, STI);
785 assert(!Seq.empty());
786
787 bool SrcRenamable = false;
788 unsigned Num = 0;
789
790 for (const RISCVMatInt::Inst &Inst : Seq) {
791 bool LastItem = ++Num == Seq.size();
792 unsigned DstRegState = getDeadRegState(DstIsDead && LastItem) |
793 getRenamableRegState(DstRenamable);
794 unsigned SrcRegState = getKillRegState(SrcReg != RISCV::X0) |
795 getRenamableRegState(SrcRenamable);
796 switch (Inst.getOpndKind()) {
797 case RISCVMatInt::Imm:
798 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
799 .addReg(DstReg, RegState::Define | DstRegState)
800 .addImm(Inst.getImm())
801 .setMIFlag(Flag);
802 break;
803 case RISCVMatInt::RegX0:
804 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
805 .addReg(DstReg, RegState::Define | DstRegState)
806 .addReg(SrcReg, SrcRegState)
807 .addReg(RISCV::X0)
808 .setMIFlag(Flag);
809 break;
810 case RISCVMatInt::RegReg:
811 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
812 .addReg(DstReg, RegState::Define | DstRegState)
813 .addReg(SrcReg, SrcRegState)
814 .addReg(SrcReg, SrcRegState)
815 .setMIFlag(Flag);
816 break;
817 case RISCVMatInt::RegImm:
818 BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()))
819 .addReg(DstReg, RegState::Define | DstRegState)
820 .addReg(SrcReg, SrcRegState)
821 .addImm(Inst.getImm())
822 .setMIFlag(Flag);
823 break;
824 }
825
826 // Only the first instruction has X0 as its source.
827 SrcReg = DstReg;
828 SrcRenamable = DstRenamable;
829 }
830}
831
832static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
833 switch (Opc) {
834 default:
835 return RISCVCC::COND_INVALID;
836 case RISCV::BEQ:
837 return RISCVCC::COND_EQ;
838 case RISCV::BNE:
839 return RISCVCC::COND_NE;
840 case RISCV::BLT:
841 return RISCVCC::COND_LT;
842 case RISCV::BGE:
843 return RISCVCC::COND_GE;
844 case RISCV::BLTU:
845 return RISCVCC::COND_LTU;
846 case RISCV::BGEU:
847 return RISCVCC::COND_GEU;
848 }
849}
850
851// The contents of values added to Cond are not examined outside of
852// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
853// push BranchOpcode, Reg1, Reg2.
854static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
855 SmallVectorImpl<MachineOperand> &Cond) {
856 // Block ends with fall-through condbranch.
857 assert(LastInst.getDesc().isConditionalBranch() &&
858 "Unknown conditional branch");
859 Target = LastInst.getOperand(i: 2).getMBB();
860 unsigned CC = getCondFromBranchOpc(Opc: LastInst.getOpcode());
861 Cond.push_back(Elt: MachineOperand::CreateImm(Val: CC));
862 Cond.push_back(Elt: LastInst.getOperand(i: 0));
863 Cond.push_back(Elt: LastInst.getOperand(i: 1));
864}
865
866unsigned RISCVCC::getBrCond(RISCVCC::CondCode CC) {
867 switch (CC) {
868 default:
869 llvm_unreachable("Unknown condition code!");
870 case RISCVCC::COND_EQ:
871 return RISCV::BEQ;
872 case RISCVCC::COND_NE:
873 return RISCV::BNE;
874 case RISCVCC::COND_LT:
875 return RISCV::BLT;
876 case RISCVCC::COND_GE:
877 return RISCV::BGE;
878 case RISCVCC::COND_LTU:
879 return RISCV::BLTU;
880 case RISCVCC::COND_GEU:
881 return RISCV::BGEU;
882 }
883}
884
885const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
886 return get(RISCVCC::getBrCond(CC));
887}
888
889RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
890 switch (CC) {
891 default:
892 llvm_unreachable("Unrecognized conditional branch");
893 case RISCVCC::COND_EQ:
894 return RISCVCC::COND_NE;
895 case RISCVCC::COND_NE:
896 return RISCVCC::COND_EQ;
897 case RISCVCC::COND_LT:
898 return RISCVCC::COND_GE;
899 case RISCVCC::COND_GE:
900 return RISCVCC::COND_LT;
901 case RISCVCC::COND_LTU:
902 return RISCVCC::COND_GEU;
903 case RISCVCC::COND_GEU:
904 return RISCVCC::COND_LTU;
905 }
906}
907
908bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
909 MachineBasicBlock *&TBB,
910 MachineBasicBlock *&FBB,
911 SmallVectorImpl<MachineOperand> &Cond,
912 bool AllowModify) const {
913 TBB = FBB = nullptr;
914 Cond.clear();
915
916 // If the block has no terminators, it just falls into the block after it.
917 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
918 if (I == MBB.end() || !isUnpredicatedTerminator(*I))
919 return false;
920
921 // Count the number of terminators and find the first unconditional or
922 // indirect branch.
923 MachineBasicBlock::iterator FirstUncondOrIndirectBr = MBB.end();
924 int NumTerminators = 0;
925 for (auto J = I.getReverse(); J != MBB.rend() && isUnpredicatedTerminator(*J);
926 J++) {
927 NumTerminators++;
928 if (J->getDesc().isUnconditionalBranch() ||
929 J->getDesc().isIndirectBranch()) {
930 FirstUncondOrIndirectBr = J.getReverse();
931 }
932 }
933
934 // If AllowModify is true, we can erase any terminators after
935 // FirstUncondOrIndirectBR.
936 if (AllowModify && FirstUncondOrIndirectBr != MBB.end()) {
937 while (std::next(x: FirstUncondOrIndirectBr) != MBB.end()) {
938 std::next(x: FirstUncondOrIndirectBr)->eraseFromParent();
939 NumTerminators--;
940 }
941 I = FirstUncondOrIndirectBr;
942 }
943
944 // We can't handle blocks that end in an indirect branch.
945 if (I->getDesc().isIndirectBranch())
946 return true;
947
948 // We can't handle Generic branch opcodes from Global ISel.
949 if (I->isPreISelOpcode())
950 return true;
951
952 // We can't handle blocks with more than 2 terminators.
953 if (NumTerminators > 2)
954 return true;
955
956 // Handle a single unconditional branch.
957 if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
958 TBB = getBranchDestBlock(MI: *I);
959 return false;
960 }
961
962 // Handle a single conditional branch.
963 if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) {
964 parseCondBranch(LastInst&: *I, Target&: TBB, Cond);
965 return false;
966 }
967
968 // Handle a conditional branch followed by an unconditional branch.
969 if (NumTerminators == 2 && std::prev(x: I)->getDesc().isConditionalBranch() &&
970 I->getDesc().isUnconditionalBranch()) {
971 parseCondBranch(LastInst&: *std::prev(x: I), Target&: TBB, Cond);
972 FBB = getBranchDestBlock(MI: *I);
973 return false;
974 }
975
976 // Otherwise, we can't handle this.
977 return true;
978}
979
980unsigned RISCVInstrInfo::removeBranch(MachineBasicBlock &MBB,
981 int *BytesRemoved) const {
982 if (BytesRemoved)
983 *BytesRemoved = 0;
984 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
985 if (I == MBB.end())
986 return 0;
987
988 if (!I->getDesc().isUnconditionalBranch() &&
989 !I->getDesc().isConditionalBranch())
990 return 0;
991
992 // Remove the branch.
993 if (BytesRemoved)
994 *BytesRemoved += getInstSizeInBytes(MI: *I);
995 I->eraseFromParent();
996
997 I = MBB.end();
998
999 if (I == MBB.begin())
1000 return 1;
1001 --I;
1002 if (!I->getDesc().isConditionalBranch())
1003 return 1;
1004
1005 // Remove the branch.
1006 if (BytesRemoved)
1007 *BytesRemoved += getInstSizeInBytes(MI: *I);
1008 I->eraseFromParent();
1009 return 2;
1010}
1011
1012// Inserts a branch into the end of the specific MachineBasicBlock, returning
1013// the number of instructions inserted.
1014unsigned RISCVInstrInfo::insertBranch(
1015 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
1016 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
1017 if (BytesAdded)
1018 *BytesAdded = 0;
1019
1020 // Shouldn't be a fall through.
1021 assert(TBB && "insertBranch must not be told to insert a fallthrough");
1022 assert((Cond.size() == 3 || Cond.size() == 0) &&
1023 "RISC-V branch conditions have two components!");
1024
1025 // Unconditional branch.
1026 if (Cond.empty()) {
1027 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(TBB);
1028 if (BytesAdded)
1029 *BytesAdded += getInstSizeInBytes(MI);
1030 return 1;
1031 }
1032
1033 // Either a one or two-way conditional branch.
1034 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1035 MachineInstr &CondMI =
1036 *BuildMI(BB: &MBB, MIMD: DL, MCID: getBrCond(CC)).add(MO: Cond[1]).add(MO: Cond[2]).addMBB(MBB: TBB);
1037 if (BytesAdded)
1038 *BytesAdded += getInstSizeInBytes(MI: CondMI);
1039
1040 // One-way conditional branch.
1041 if (!FBB)
1042 return 1;
1043
1044 // Two-way conditional branch.
1045 MachineInstr &MI = *BuildMI(&MBB, DL, get(RISCV::PseudoBR)).addMBB(FBB);
1046 if (BytesAdded)
1047 *BytesAdded += getInstSizeInBytes(MI);
1048 return 2;
1049}
1050
1051void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
1052 MachineBasicBlock &DestBB,
1053 MachineBasicBlock &RestoreBB,
1054 const DebugLoc &DL, int64_t BrOffset,
1055 RegScavenger *RS) const {
1056 assert(RS && "RegScavenger required for long branching");
1057 assert(MBB.empty() &&
1058 "new block should be inserted for expanding unconditional branch");
1059 assert(MBB.pred_size() == 1);
1060 assert(RestoreBB.empty() &&
1061 "restore block should be inserted for restoring clobbered registers");
1062
1063 MachineFunction *MF = MBB.getParent();
1064 MachineRegisterInfo &MRI = MF->getRegInfo();
1065 RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
1066 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1067
1068 if (!isInt<32>(x: BrOffset))
1069 report_fatal_error(
1070 reason: "Branch offsets outside of the signed 32-bit range not supported");
1071
1072 // FIXME: A virtual register must be used initially, as the register
1073 // scavenger won't work with empty blocks (SIInstrInfo::insertIndirectBranch
1074 // uses the same workaround).
1075 Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRJALRRegClass);
1076 auto II = MBB.end();
1077 // We may also update the jump target to RestoreBB later.
1078 MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
1079 .addReg(ScratchReg, RegState::Define | RegState::Dead)
1080 .addMBB(&DestBB, RISCVII::MO_CALL);
1081
1082 RS->enterBasicBlockEnd(MBB);
1083 Register TmpGPR =
1084 RS->scavengeRegisterBackwards(RISCV::GPRRegClass, MI.getIterator(),
1085 /*RestoreAfter=*/false, /*SpAdj=*/0,
1086 /*AllowSpill=*/false);
1087 if (TmpGPR != RISCV::NoRegister)
1088 RS->setRegUsed(Reg: TmpGPR);
1089 else {
1090 // The case when there is no scavenged register needs special handling.
1091
1092 // Pick s11 because it doesn't make a difference.
1093 TmpGPR = RISCV::X27;
1094
1095 int FrameIndex = RVFI->getBranchRelaxationScratchFrameIndex();
1096 if (FrameIndex == -1)
1097 report_fatal_error(reason: "underestimated function size");
1098
1099 storeRegToStackSlot(MBB, MI, TmpGPR, /*IsKill=*/true, FrameIndex,
1100 &RISCV::GPRRegClass, TRI, Register());
1101 TRI->eliminateFrameIndex(MI: std::prev(x: MI.getIterator()),
1102 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1103
1104 MI.getOperand(i: 1).setMBB(&RestoreBB);
1105
1106 loadRegFromStackSlot(RestoreBB, RestoreBB.end(), TmpGPR, FrameIndex,
1107 &RISCV::GPRRegClass, TRI, Register());
1108 TRI->eliminateFrameIndex(MI: RestoreBB.back(),
1109 /*SpAdj=*/SPAdj: 0, /*FIOperandNum=*/1);
1110 }
1111
1112 MRI.replaceRegWith(FromReg: ScratchReg, ToReg: TmpGPR);
1113 MRI.clearVirtRegs();
1114}
1115
1116bool RISCVInstrInfo::reverseBranchCondition(
1117 SmallVectorImpl<MachineOperand> &Cond) const {
1118 assert((Cond.size() == 3) && "Invalid branch condition!");
1119 auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1120 Cond[0].setImm(getOppositeBranchCondition(CC));
1121 return false;
1122}
1123
1124bool RISCVInstrInfo::optimizeCondBranch(MachineInstr &MI) const {
1125 MachineBasicBlock *MBB = MI.getParent();
1126 MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
1127
1128 MachineBasicBlock *TBB, *FBB;
1129 SmallVector<MachineOperand, 3> Cond;
1130 if (analyzeBranch(MBB&: *MBB, TBB, FBB, Cond, /*AllowModify=*/false))
1131 return false;
1132
1133 RISCVCC::CondCode CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
1134 assert(CC != RISCVCC::COND_INVALID);
1135
1136 if (CC == RISCVCC::COND_EQ || CC == RISCVCC::COND_NE)
1137 return false;
1138
1139 // For two constants C0 and C1 from
1140 // ```
1141 // li Y, C0
1142 // li Z, C1
1143 // ```
1144 // 1. if C1 = C0 + 1
1145 // we can turn:
1146 // (a) blt Y, X -> bge X, Z
1147 // (b) bge Y, X -> blt X, Z
1148 //
1149 // 2. if C1 = C0 - 1
1150 // we can turn:
1151 // (a) blt X, Y -> bge Z, X
1152 // (b) bge X, Y -> blt Z, X
1153 //
1154 // To make sure this optimization is really beneficial, we only
1155 // optimize for cases where Y had only one use (i.e. only used by the branch).
1156
1157 // Right now we only care about LI (i.e. ADDI x0, imm)
1158 auto isLoadImm = [](const MachineInstr *MI, int64_t &Imm) -> bool {
1159 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1160 MI->getOperand(1).getReg() == RISCV::X0) {
1161 Imm = MI->getOperand(i: 2).getImm();
1162 return true;
1163 }
1164 return false;
1165 };
1166 // Either a load from immediate instruction or X0.
1167 auto isFromLoadImm = [&](const MachineOperand &Op, int64_t &Imm) -> bool {
1168 if (!Op.isReg())
1169 return false;
1170 Register Reg = Op.getReg();
1171 return Reg.isVirtual() && isLoadImm(MRI.getVRegDef(Reg), Imm);
1172 };
1173
1174 MachineOperand &LHS = MI.getOperand(i: 0);
1175 MachineOperand &RHS = MI.getOperand(i: 1);
1176 // Try to find the register for constant Z; return
1177 // invalid register otherwise.
1178 auto searchConst = [&](int64_t C1) -> Register {
1179 MachineBasicBlock::reverse_iterator II(&MI), E = MBB->rend();
1180 auto DefC1 = std::find_if(first: ++II, last: E, pred: [&](const MachineInstr &I) -> bool {
1181 int64_t Imm;
1182 return isLoadImm(&I, Imm) && Imm == C1 &&
1183 I.getOperand(i: 0).getReg().isVirtual();
1184 });
1185 if (DefC1 != E)
1186 return DefC1->getOperand(i: 0).getReg();
1187
1188 return Register();
1189 };
1190
1191 bool Modify = false;
1192 int64_t C0;
1193 if (isFromLoadImm(LHS, C0) && MRI.hasOneUse(RegNo: LHS.getReg())) {
1194 // Might be case 1.
1195 // Signed integer overflow is UB. (UINT64_MAX is bigger so we don't need
1196 // to worry about unsigned overflow here)
1197 if (C0 < INT64_MAX)
1198 if (Register RegZ = searchConst(C0 + 1)) {
1199 reverseBranchCondition(Cond);
1200 Cond[1] = MachineOperand::CreateReg(Reg: RHS.getReg(), /*isDef=*/false);
1201 Cond[2] = MachineOperand::CreateReg(Reg: RegZ, /*isDef=*/false);
1202 // We might extend the live range of Z, clear its kill flag to
1203 // account for this.
1204 MRI.clearKillFlags(Reg: RegZ);
1205 Modify = true;
1206 }
1207 } else if (isFromLoadImm(RHS, C0) && MRI.hasOneUse(RegNo: RHS.getReg())) {
1208 // Might be case 2.
1209 // For unsigned cases, we don't want C1 to wrap back to UINT64_MAX
1210 // when C0 is zero.
1211 if ((CC == RISCVCC::COND_GE || CC == RISCVCC::COND_LT) || C0)
1212 if (Register RegZ = searchConst(C0 - 1)) {
1213 reverseBranchCondition(Cond);
1214 Cond[1] = MachineOperand::CreateReg(Reg: RegZ, /*isDef=*/false);
1215 Cond[2] = MachineOperand::CreateReg(Reg: LHS.getReg(), /*isDef=*/false);
1216 // We might extend the live range of Z, clear its kill flag to
1217 // account for this.
1218 MRI.clearKillFlags(Reg: RegZ);
1219 Modify = true;
1220 }
1221 }
1222
1223 if (!Modify)
1224 return false;
1225
1226 // Build the new branch and remove the old one.
1227 BuildMI(BB&: *MBB, I&: MI, MIMD: MI.getDebugLoc(),
1228 MCID: getBrCond(CC: static_cast<RISCVCC::CondCode>(Cond[0].getImm())))
1229 .add(MO: Cond[1])
1230 .add(MO: Cond[2])
1231 .addMBB(MBB: TBB);
1232 MI.eraseFromParent();
1233
1234 return true;
1235}
1236
1237MachineBasicBlock *
1238RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
1239 assert(MI.getDesc().isBranch() && "Unexpected opcode!");
1240 // The branch target is always the last operand.
1241 int NumOp = MI.getNumExplicitOperands();
1242 return MI.getOperand(i: NumOp - 1).getMBB();
1243}
1244
1245bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
1246 int64_t BrOffset) const {
1247 unsigned XLen = STI.getXLen();
1248 // Ideally we could determine the supported branch offset from the
1249 // RISCVII::FormMask, but this can't be used for Pseudo instructions like
1250 // PseudoBR.
1251 switch (BranchOp) {
1252 default:
1253 llvm_unreachable("Unexpected opcode!");
1254 case RISCV::BEQ:
1255 case RISCV::BNE:
1256 case RISCV::BLT:
1257 case RISCV::BGE:
1258 case RISCV::BLTU:
1259 case RISCV::BGEU:
1260 return isIntN(N: 13, x: BrOffset);
1261 case RISCV::JAL:
1262 case RISCV::PseudoBR:
1263 return isIntN(N: 21, x: BrOffset);
1264 case RISCV::PseudoJump:
1265 return isIntN(N: 32, x: SignExtend64(X: BrOffset + 0x800, B: XLen));
1266 }
1267}
1268
1269// If the operation has a predicated pseudo instruction, return the pseudo
1270// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
1271// TODO: Support more operations.
1272unsigned getPredicatedOpcode(unsigned Opcode) {
1273 switch (Opcode) {
1274 case RISCV::ADD: return RISCV::PseudoCCADD; break;
1275 case RISCV::SUB: return RISCV::PseudoCCSUB; break;
1276 case RISCV::SLL: return RISCV::PseudoCCSLL; break;
1277 case RISCV::SRL: return RISCV::PseudoCCSRL; break;
1278 case RISCV::SRA: return RISCV::PseudoCCSRA; break;
1279 case RISCV::AND: return RISCV::PseudoCCAND; break;
1280 case RISCV::OR: return RISCV::PseudoCCOR; break;
1281 case RISCV::XOR: return RISCV::PseudoCCXOR; break;
1282
1283 case RISCV::ADDI: return RISCV::PseudoCCADDI; break;
1284 case RISCV::SLLI: return RISCV::PseudoCCSLLI; break;
1285 case RISCV::SRLI: return RISCV::PseudoCCSRLI; break;
1286 case RISCV::SRAI: return RISCV::PseudoCCSRAI; break;
1287 case RISCV::ANDI: return RISCV::PseudoCCANDI; break;
1288 case RISCV::ORI: return RISCV::PseudoCCORI; break;
1289 case RISCV::XORI: return RISCV::PseudoCCXORI; break;
1290
1291 case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
1292 case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
1293 case RISCV::SLLW: return RISCV::PseudoCCSLLW; break;
1294 case RISCV::SRLW: return RISCV::PseudoCCSRLW; break;
1295 case RISCV::SRAW: return RISCV::PseudoCCSRAW; break;
1296
1297 case RISCV::ADDIW: return RISCV::PseudoCCADDIW; break;
1298 case RISCV::SLLIW: return RISCV::PseudoCCSLLIW; break;
1299 case RISCV::SRLIW: return RISCV::PseudoCCSRLIW; break;
1300 case RISCV::SRAIW: return RISCV::PseudoCCSRAIW; break;
1301
1302 case RISCV::ANDN: return RISCV::PseudoCCANDN; break;
1303 case RISCV::ORN: return RISCV::PseudoCCORN; break;
1304 case RISCV::XNOR: return RISCV::PseudoCCXNOR; break;
1305 }
1306
1307 return RISCV::INSTRUCTION_LIST_END;
1308}
1309
1310/// Identify instructions that can be folded into a CCMOV instruction, and
1311/// return the defining instruction.
1312static MachineInstr *canFoldAsPredicatedOp(Register Reg,
1313 const MachineRegisterInfo &MRI,
1314 const TargetInstrInfo *TII) {
1315 if (!Reg.isVirtual())
1316 return nullptr;
1317 if (!MRI.hasOneNonDBGUse(RegNo: Reg))
1318 return nullptr;
1319 MachineInstr *MI = MRI.getVRegDef(Reg);
1320 if (!MI)
1321 return nullptr;
1322 // Check if MI can be predicated and folded into the CCMOV.
1323 if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
1324 return nullptr;
1325 // Don't predicate li idiom.
1326 if (MI->getOpcode() == RISCV::ADDI && MI->getOperand(1).isReg() &&
1327 MI->getOperand(1).getReg() == RISCV::X0)
1328 return nullptr;
1329 // Check if MI has any other defs or physreg uses.
1330 for (const MachineOperand &MO : llvm::drop_begin(RangeOrContainer: MI->operands())) {
1331 // Reject frame index operands, PEI can't handle the predicated pseudos.
1332 if (MO.isFI() || MO.isCPI() || MO.isJTI())
1333 return nullptr;
1334 if (!MO.isReg())
1335 continue;
1336 // MI can't have any tied operands, that would conflict with predication.
1337 if (MO.isTied())
1338 return nullptr;
1339 if (MO.isDef())
1340 return nullptr;
1341 // Allow constant physregs.
1342 if (MO.getReg().isPhysical() && !MRI.isConstantPhysReg(PhysReg: MO.getReg()))
1343 return nullptr;
1344 }
1345 bool DontMoveAcrossStores = true;
1346 if (!MI->isSafeToMove(/* AliasAnalysis = */ AA: nullptr, SawStore&: DontMoveAcrossStores))
1347 return nullptr;
1348 return MI;
1349}
1350
1351bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
1352 SmallVectorImpl<MachineOperand> &Cond,
1353 unsigned &TrueOp, unsigned &FalseOp,
1354 bool &Optimizable) const {
1355 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1356 "Unknown select instruction");
1357 // CCMOV operands:
1358 // 0: Def.
1359 // 1: LHS of compare.
1360 // 2: RHS of compare.
1361 // 3: Condition code.
1362 // 4: False use.
1363 // 5: True use.
1364 TrueOp = 5;
1365 FalseOp = 4;
1366 Cond.push_back(Elt: MI.getOperand(i: 1));
1367 Cond.push_back(Elt: MI.getOperand(i: 2));
1368 Cond.push_back(Elt: MI.getOperand(i: 3));
1369 // We can only fold when we support short forward branch opt.
1370 Optimizable = STI.hasShortForwardBranchOpt();
1371 return false;
1372}
1373
1374MachineInstr *
1375RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
1376 SmallPtrSetImpl<MachineInstr *> &SeenMIs,
1377 bool PreferFalse) const {
1378 assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
1379 "Unknown select instruction");
1380 if (!STI.hasShortForwardBranchOpt())
1381 return nullptr;
1382
1383 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
1384 MachineInstr *DefMI =
1385 canFoldAsPredicatedOp(MI.getOperand(i: 5).getReg(), MRI, this);
1386 bool Invert = !DefMI;
1387 if (!DefMI)
1388 DefMI = canFoldAsPredicatedOp(MI.getOperand(i: 4).getReg(), MRI, this);
1389 if (!DefMI)
1390 return nullptr;
1391
1392 // Find new register class to use.
1393 MachineOperand FalseReg = MI.getOperand(i: Invert ? 5 : 4);
1394 Register DestReg = MI.getOperand(i: 0).getReg();
1395 const TargetRegisterClass *PreviousClass = MRI.getRegClass(Reg: FalseReg.getReg());
1396 if (!MRI.constrainRegClass(Reg: DestReg, RC: PreviousClass))
1397 return nullptr;
1398
1399 unsigned PredOpc = getPredicatedOpcode(Opcode: DefMI->getOpcode());
1400 assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
1401
1402 // Create a new predicated version of DefMI.
1403 MachineInstrBuilder NewMI =
1404 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
1405
1406 // Copy the condition portion.
1407 NewMI.add(MO: MI.getOperand(i: 1));
1408 NewMI.add(MO: MI.getOperand(i: 2));
1409
1410 // Add condition code, inverting if necessary.
1411 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
1412 if (Invert)
1413 CC = RISCVCC::getOppositeBranchCondition(CC);
1414 NewMI.addImm(Val: CC);
1415
1416 // Copy the false register.
1417 NewMI.add(MO: FalseReg);
1418
1419 // Copy all the DefMI operands.
1420 const MCInstrDesc &DefDesc = DefMI->getDesc();
1421 for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
1422 NewMI.add(MO: DefMI->getOperand(i));
1423
1424 // Update SeenMIs set: register newly created MI and erase removed DefMI.
1425 SeenMIs.insert(Ptr: NewMI);
1426 SeenMIs.erase(Ptr: DefMI);
1427
1428 // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
1429 // DefMI would be invalid when tranferred inside the loop. Checking for a
1430 // loop is expensive, but at least remove kill flags if they are in different
1431 // BBs.
1432 if (DefMI->getParent() != MI.getParent())
1433 NewMI->clearKillInfo();
1434
1435 // The caller will erase MI, but not DefMI.
1436 DefMI->eraseFromParent();
1437 return NewMI;
1438}
1439
1440unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
1441 if (MI.isMetaInstruction())
1442 return 0;
1443
1444 unsigned Opcode = MI.getOpcode();
1445
1446 if (Opcode == TargetOpcode::INLINEASM ||
1447 Opcode == TargetOpcode::INLINEASM_BR) {
1448 const MachineFunction &MF = *MI.getParent()->getParent();
1449 const auto &TM = static_cast<const RISCVTargetMachine &>(MF.getTarget());
1450 return getInlineAsmLength(MI.getOperand(i: 0).getSymbolName(),
1451 *TM.getMCAsmInfo());
1452 }
1453
1454 if (!MI.memoperands_empty()) {
1455 MachineMemOperand *MMO = *(MI.memoperands_begin());
1456 const MachineFunction &MF = *MI.getParent()->getParent();
1457 const auto &ST = MF.getSubtarget<RISCVSubtarget>();
1458 if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
1459 if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
1460 if (isCompressibleInst(MI, STI))
1461 return 4; // c.ntl.all + c.load/c.store
1462 return 6; // c.ntl.all + load/store
1463 }
1464 return 8; // ntl.all + load/store
1465 }
1466 }
1467
1468 if (Opcode == TargetOpcode::BUNDLE)
1469 return getInstBundleLength(MI);
1470
1471 if (MI.getParent() && MI.getParent()->getParent()) {
1472 if (isCompressibleInst(MI, STI))
1473 return 2;
1474 }
1475
1476 switch (Opcode) {
1477 case TargetOpcode::STACKMAP:
1478 // The upper bound for a stackmap intrinsic is the full length of its shadow
1479 return StackMapOpers(&MI).getNumPatchBytes();
1480 case TargetOpcode::PATCHPOINT:
1481 // The size of the patchpoint intrinsic is the number of bytes requested
1482 return PatchPointOpers(&MI).getNumPatchBytes();
1483 case TargetOpcode::STATEPOINT: {
1484 // The size of the statepoint intrinsic is the number of bytes requested
1485 unsigned NumBytes = StatepointOpers(&MI).getNumPatchBytes();
1486 // No patch bytes means at most a PseudoCall is emitted
1487 return std::max(a: NumBytes, b: 8U);
1488 }
1489 default:
1490 return get(Opcode).getSize();
1491 }
1492}
1493
1494unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
1495 unsigned Size = 0;
1496 MachineBasicBlock::const_instr_iterator I = MI.getIterator();
1497 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
1498 while (++I != E && I->isInsideBundle()) {
1499 assert(!I->isBundle() && "No nested bundle!");
1500 Size += getInstSizeInBytes(MI: *I);
1501 }
1502 return Size;
1503}
1504
1505bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
1506 const unsigned Opcode = MI.getOpcode();
1507 switch (Opcode) {
1508 default:
1509 break;
1510 case RISCV::FSGNJ_D:
1511 case RISCV::FSGNJ_S:
1512 case RISCV::FSGNJ_H:
1513 case RISCV::FSGNJ_D_INX:
1514 case RISCV::FSGNJ_D_IN32X:
1515 case RISCV::FSGNJ_S_INX:
1516 case RISCV::FSGNJ_H_INX:
1517 // The canonical floating-point move is fsgnj rd, rs, rs.
1518 return MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
1519 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg();
1520 case RISCV::ADDI:
1521 case RISCV::ORI:
1522 case RISCV::XORI:
1523 return (MI.getOperand(1).isReg() &&
1524 MI.getOperand(1).getReg() == RISCV::X0) ||
1525 (MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0);
1526 }
1527 return MI.isAsCheapAsAMove();
1528}
1529
1530std::optional<DestSourcePair>
1531RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
1532 if (MI.isMoveReg())
1533 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
1534 switch (MI.getOpcode()) {
1535 default:
1536 break;
1537 case RISCV::ADDI:
1538 // Operand 1 can be a frameindex but callers expect registers
1539 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() &&
1540 MI.getOperand(i: 2).getImm() == 0)
1541 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
1542 break;
1543 case RISCV::FSGNJ_D:
1544 case RISCV::FSGNJ_S:
1545 case RISCV::FSGNJ_H:
1546 case RISCV::FSGNJ_D_INX:
1547 case RISCV::FSGNJ_D_IN32X:
1548 case RISCV::FSGNJ_S_INX:
1549 case RISCV::FSGNJ_H_INX:
1550 // The canonical floating-point move is fsgnj rd, rs, rs.
1551 if (MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isReg() &&
1552 MI.getOperand(i: 1).getReg() == MI.getOperand(i: 2).getReg())
1553 return DestSourcePair{MI.getOperand(i: 0), MI.getOperand(i: 1)};
1554 break;
1555 }
1556 return std::nullopt;
1557}
1558
1559MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
1560 if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
1561 // The option is unused. Choose Local strategy only for in-order cores. When
1562 // scheduling model is unspecified, use MinInstrCount strategy as more
1563 // generic one.
1564 const auto &SchedModel = STI.getSchedModel();
1565 return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
1566 ? MachineTraceStrategy::TS_MinInstrCount
1567 : MachineTraceStrategy::TS_Local;
1568 }
1569 // The strategy was forced by the option.
1570 return ForceMachineCombinerStrategy;
1571}
1572
1573void RISCVInstrInfo::finalizeInsInstrs(
1574 MachineInstr &Root, unsigned &Pattern,
1575 SmallVectorImpl<MachineInstr *> &InsInstrs) const {
1576 int16_t FrmOpIdx =
1577 RISCV::getNamedOperandIdx(Root.getOpcode(), RISCV::OpName::frm);
1578 if (FrmOpIdx < 0) {
1579 assert(all_of(InsInstrs,
1580 [](MachineInstr *MI) {
1581 return RISCV::getNamedOperandIdx(MI->getOpcode(),
1582 RISCV::OpName::frm) < 0;
1583 }) &&
1584 "New instructions require FRM whereas the old one does not have it");
1585 return;
1586 }
1587
1588 const MachineOperand &FRM = Root.getOperand(i: FrmOpIdx);
1589 MachineFunction &MF = *Root.getMF();
1590
1591 for (auto *NewMI : InsInstrs) {
1592 // We'd already added the FRM operand.
1593 if (static_cast<unsigned>(RISCV::getNamedOperandIdx(
1594 NewMI->getOpcode(), RISCV::OpName::frm)) != NewMI->getNumOperands())
1595 continue;
1596 MachineInstrBuilder MIB(MF, NewMI);
1597 MIB.add(MO: FRM);
1598 if (FRM.getImm() == RISCVFPRndMode::DYN)
1599 MIB.addUse(RISCV::FRM, RegState::Implicit);
1600 }
1601}
1602
1603static bool isFADD(unsigned Opc) {
1604 switch (Opc) {
1605 default:
1606 return false;
1607 case RISCV::FADD_H:
1608 case RISCV::FADD_S:
1609 case RISCV::FADD_D:
1610 return true;
1611 }
1612}
1613
1614static bool isFSUB(unsigned Opc) {
1615 switch (Opc) {
1616 default:
1617 return false;
1618 case RISCV::FSUB_H:
1619 case RISCV::FSUB_S:
1620 case RISCV::FSUB_D:
1621 return true;
1622 }
1623}
1624
1625static bool isFMUL(unsigned Opc) {
1626 switch (Opc) {
1627 default:
1628 return false;
1629 case RISCV::FMUL_H:
1630 case RISCV::FMUL_S:
1631 case RISCV::FMUL_D:
1632 return true;
1633 }
1634}
1635
1636bool RISCVInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
1637 bool &Commuted) const {
1638 if (!TargetInstrInfo::hasReassociableSibling(Inst, Commuted))
1639 return false;
1640
1641 const MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
1642 unsigned OperandIdx = Commuted ? 2 : 1;
1643 const MachineInstr &Sibling =
1644 *MRI.getVRegDef(Reg: Inst.getOperand(i: OperandIdx).getReg());
1645
1646 int16_t InstFrmOpIdx =
1647 RISCV::getNamedOperandIdx(Inst.getOpcode(), RISCV::OpName::frm);
1648 int16_t SiblingFrmOpIdx =
1649 RISCV::getNamedOperandIdx(Sibling.getOpcode(), RISCV::OpName::frm);
1650
1651 return (InstFrmOpIdx < 0 && SiblingFrmOpIdx < 0) ||
1652 RISCV::hasEqualFRM(MI1: Inst, MI2: Sibling);
1653}
1654
1655bool RISCVInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
1656 bool Invert) const {
1657 unsigned Opc = Inst.getOpcode();
1658 if (Invert) {
1659 auto InverseOpcode = getInverseOpcode(Opcode: Opc);
1660 if (!InverseOpcode)
1661 return false;
1662 Opc = *InverseOpcode;
1663 }
1664
1665 if (isFADD(Opc) || isFMUL(Opc))
1666 return Inst.getFlag(Flag: MachineInstr::MIFlag::FmReassoc) &&
1667 Inst.getFlag(Flag: MachineInstr::MIFlag::FmNsz);
1668
1669 switch (Opc) {
1670 default:
1671 return false;
1672 case RISCV::ADD:
1673 case RISCV::ADDW:
1674 case RISCV::AND:
1675 case RISCV::OR:
1676 case RISCV::XOR:
1677 // From RISC-V ISA spec, if both the high and low bits of the same product
1678 // are required, then the recommended code sequence is:
1679 //
1680 // MULH[[S]U] rdh, rs1, rs2
1681 // MUL rdl, rs1, rs2
1682 // (source register specifiers must be in same order and rdh cannot be the
1683 // same as rs1 or rs2)
1684 //
1685 // Microarchitectures can then fuse these into a single multiply operation
1686 // instead of performing two separate multiplies.
1687 // MachineCombiner may reassociate MUL operands and lose the fusion
1688 // opportunity.
1689 case RISCV::MUL:
1690 case RISCV::MULW:
1691 case RISCV::MIN:
1692 case RISCV::MINU:
1693 case RISCV::MAX:
1694 case RISCV::MAXU:
1695 case RISCV::FMIN_H:
1696 case RISCV::FMIN_S:
1697 case RISCV::FMIN_D:
1698 case RISCV::FMAX_H:
1699 case RISCV::FMAX_S:
1700 case RISCV::FMAX_D:
1701 return true;
1702 }
1703
1704 return false;
1705}
1706
1707std::optional<unsigned>
1708RISCVInstrInfo::getInverseOpcode(unsigned Opcode) const {
1709 switch (Opcode) {
1710 default:
1711 return std::nullopt;
1712 case RISCV::FADD_H:
1713 return RISCV::FSUB_H;
1714 case RISCV::FADD_S:
1715 return RISCV::FSUB_S;
1716 case RISCV::FADD_D:
1717 return RISCV::FSUB_D;
1718 case RISCV::FSUB_H:
1719 return RISCV::FADD_H;
1720 case RISCV::FSUB_S:
1721 return RISCV::FADD_S;
1722 case RISCV::FSUB_D:
1723 return RISCV::FADD_D;
1724 case RISCV::ADD:
1725 return RISCV::SUB;
1726 case RISCV::SUB:
1727 return RISCV::ADD;
1728 case RISCV::ADDW:
1729 return RISCV::SUBW;
1730 case RISCV::SUBW:
1731 return RISCV::ADDW;
1732 }
1733}
1734
1735static bool canCombineFPFusedMultiply(const MachineInstr &Root,
1736 const MachineOperand &MO,
1737 bool DoRegPressureReduce) {
1738 if (!MO.isReg() || !MO.getReg().isVirtual())
1739 return false;
1740 const MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
1741 MachineInstr *MI = MRI.getVRegDef(Reg: MO.getReg());
1742 if (!MI || !isFMUL(Opc: MI->getOpcode()))
1743 return false;
1744
1745 if (!Root.getFlag(Flag: MachineInstr::MIFlag::FmContract) ||
1746 !MI->getFlag(Flag: MachineInstr::MIFlag::FmContract))
1747 return false;
1748
1749 // Try combining even if fmul has more than one use as it eliminates
1750 // dependency between fadd(fsub) and fmul. However, it can extend liveranges
1751 // for fmul operands, so reject the transformation in register pressure
1752 // reduction mode.
1753 if (DoRegPressureReduce && !MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
1754 return false;
1755
1756 // Do not combine instructions from different basic blocks.
1757 if (Root.getParent() != MI->getParent())
1758 return false;
1759 return RISCV::hasEqualFRM(MI1: Root, MI2: *MI);
1760}
1761
1762static bool getFPFusedMultiplyPatterns(MachineInstr &Root,
1763 SmallVectorImpl<unsigned> &Patterns,
1764 bool DoRegPressureReduce) {
1765 unsigned Opc = Root.getOpcode();
1766 bool IsFAdd = isFADD(Opc);
1767 if (!IsFAdd && !isFSUB(Opc))
1768 return false;
1769 bool Added = false;
1770 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 1),
1771 DoRegPressureReduce)) {
1772 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_AX
1773 : RISCVMachineCombinerPattern::FMSUB);
1774 Added = true;
1775 }
1776 if (canCombineFPFusedMultiply(Root, MO: Root.getOperand(i: 2),
1777 DoRegPressureReduce)) {
1778 Patterns.push_back(Elt: IsFAdd ? RISCVMachineCombinerPattern::FMADD_XA
1779 : RISCVMachineCombinerPattern::FNMSUB);
1780 Added = true;
1781 }
1782 return Added;
1783}
1784
1785static bool getFPPatterns(MachineInstr &Root,
1786 SmallVectorImpl<unsigned> &Patterns,
1787 bool DoRegPressureReduce) {
1788 return getFPFusedMultiplyPatterns(Root, Patterns, DoRegPressureReduce);
1789}
1790
1791/// Utility routine that checks if \param MO is defined by an
1792/// \param CombineOpc instruction in the basic block \param MBB
1793static const MachineInstr *canCombine(const MachineBasicBlock &MBB,
1794 const MachineOperand &MO,
1795 unsigned CombineOpc) {
1796 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
1797 const MachineInstr *MI = nullptr;
1798
1799 if (MO.isReg() && MO.getReg().isVirtual())
1800 MI = MRI.getUniqueVRegDef(Reg: MO.getReg());
1801 // And it needs to be in the trace (otherwise, it won't have a depth).
1802 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
1803 return nullptr;
1804 // Must only used by the user we combine with.
1805 if (!MRI.hasOneNonDBGUse(RegNo: MI->getOperand(i: 0).getReg()))
1806 return nullptr;
1807
1808 return MI;
1809}
1810
1811/// Utility routine that checks if \param MO is defined by a SLLI in \param
1812/// MBB that can be combined by splitting across 2 SHXADD instructions. The
1813/// first SHXADD shift amount is given by \param OuterShiftAmt.
1814static bool canCombineShiftIntoShXAdd(const MachineBasicBlock &MBB,
1815 const MachineOperand &MO,
1816 unsigned OuterShiftAmt) {
1817 const MachineInstr *ShiftMI = canCombine(MBB, MO, RISCV::SLLI);
1818 if (!ShiftMI)
1819 return false;
1820
1821 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
1822 if (InnerShiftAmt < OuterShiftAmt || (InnerShiftAmt - OuterShiftAmt) > 3)
1823 return false;
1824
1825 return true;
1826}
1827
1828// Returns the shift amount from a SHXADD instruction. Returns 0 if the
1829// instruction is not a SHXADD.
1830static unsigned getSHXADDShiftAmount(unsigned Opc) {
1831 switch (Opc) {
1832 default:
1833 return 0;
1834 case RISCV::SH1ADD:
1835 return 1;
1836 case RISCV::SH2ADD:
1837 return 2;
1838 case RISCV::SH3ADD:
1839 return 3;
1840 }
1841}
1842
1843// Look for opportunities to combine (sh3add Z, (add X, (slli Y, 5))) into
1844// (sh3add (sh2add Y, Z), X).
1845static bool getSHXADDPatterns(const MachineInstr &Root,
1846 SmallVectorImpl<unsigned> &Patterns) {
1847 unsigned ShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
1848 if (!ShiftAmt)
1849 return false;
1850
1851 const MachineBasicBlock &MBB = *Root.getParent();
1852
1853 const MachineInstr *AddMI = canCombine(MBB, Root.getOperand(2), RISCV::ADD);
1854 if (!AddMI)
1855 return false;
1856
1857 bool Found = false;
1858 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 1), OuterShiftAmt: ShiftAmt)) {
1859 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1);
1860 Found = true;
1861 }
1862 if (canCombineShiftIntoShXAdd(MBB, MO: AddMI->getOperand(i: 2), OuterShiftAmt: ShiftAmt)) {
1863 Patterns.push_back(Elt: RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2);
1864 Found = true;
1865 }
1866
1867 return Found;
1868}
1869
1870CombinerObjective RISCVInstrInfo::getCombinerObjective(unsigned Pattern) const {
1871 switch (Pattern) {
1872 case RISCVMachineCombinerPattern::FMADD_AX:
1873 case RISCVMachineCombinerPattern::FMADD_XA:
1874 case RISCVMachineCombinerPattern::FMSUB:
1875 case RISCVMachineCombinerPattern::FNMSUB:
1876 return CombinerObjective::MustReduceDepth;
1877 default:
1878 return TargetInstrInfo::getCombinerObjective(Pattern);
1879 }
1880}
1881
1882bool RISCVInstrInfo::getMachineCombinerPatterns(
1883 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
1884 bool DoRegPressureReduce) const {
1885
1886 if (getFPPatterns(Root, Patterns, DoRegPressureReduce))
1887 return true;
1888
1889 if (getSHXADDPatterns(Root, Patterns))
1890 return true;
1891
1892 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
1893 DoRegPressureReduce);
1894}
1895
1896static unsigned getFPFusedMultiplyOpcode(unsigned RootOpc, unsigned Pattern) {
1897 switch (RootOpc) {
1898 default:
1899 llvm_unreachable("Unexpected opcode");
1900 case RISCV::FADD_H:
1901 return RISCV::FMADD_H;
1902 case RISCV::FADD_S:
1903 return RISCV::FMADD_S;
1904 case RISCV::FADD_D:
1905 return RISCV::FMADD_D;
1906 case RISCV::FSUB_H:
1907 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_H
1908 : RISCV::FNMSUB_H;
1909 case RISCV::FSUB_S:
1910 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_S
1911 : RISCV::FNMSUB_S;
1912 case RISCV::FSUB_D:
1913 return Pattern == RISCVMachineCombinerPattern::FMSUB ? RISCV::FMSUB_D
1914 : RISCV::FNMSUB_D;
1915 }
1916}
1917
1918static unsigned getAddendOperandIdx(unsigned Pattern) {
1919 switch (Pattern) {
1920 default:
1921 llvm_unreachable("Unexpected pattern");
1922 case RISCVMachineCombinerPattern::FMADD_AX:
1923 case RISCVMachineCombinerPattern::FMSUB:
1924 return 2;
1925 case RISCVMachineCombinerPattern::FMADD_XA:
1926 case RISCVMachineCombinerPattern::FNMSUB:
1927 return 1;
1928 }
1929}
1930
1931static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
1932 unsigned Pattern,
1933 SmallVectorImpl<MachineInstr *> &InsInstrs,
1934 SmallVectorImpl<MachineInstr *> &DelInstrs) {
1935 MachineFunction *MF = Root.getMF();
1936 MachineRegisterInfo &MRI = MF->getRegInfo();
1937 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1938
1939 MachineOperand &Mul1 = Prev.getOperand(i: 1);
1940 MachineOperand &Mul2 = Prev.getOperand(i: 2);
1941 MachineOperand &Dst = Root.getOperand(i: 0);
1942 MachineOperand &Addend = Root.getOperand(i: getAddendOperandIdx(Pattern));
1943
1944 Register DstReg = Dst.getReg();
1945 unsigned FusedOpc = getFPFusedMultiplyOpcode(RootOpc: Root.getOpcode(), Pattern);
1946 uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
1947 DebugLoc MergedLoc =
1948 DILocation::getMergedLocation(LocA: Root.getDebugLoc(), LocB: Prev.getDebugLoc());
1949
1950 bool Mul1IsKill = Mul1.isKill();
1951 bool Mul2IsKill = Mul2.isKill();
1952 bool AddendIsKill = Addend.isKill();
1953
1954 // We need to clear kill flags since we may be extending the live range past
1955 // a kill. If the mul had kill flags, we can preserve those since we know
1956 // where the previous range stopped.
1957 MRI.clearKillFlags(Reg: Mul1.getReg());
1958 MRI.clearKillFlags(Reg: Mul2.getReg());
1959
1960 MachineInstrBuilder MIB =
1961 BuildMI(MF&: *MF, MIMD: MergedLoc, MCID: TII->get(Opcode: FusedOpc), DestReg: DstReg)
1962 .addReg(RegNo: Mul1.getReg(), flags: getKillRegState(B: Mul1IsKill))
1963 .addReg(RegNo: Mul2.getReg(), flags: getKillRegState(B: Mul2IsKill))
1964 .addReg(RegNo: Addend.getReg(), flags: getKillRegState(B: AddendIsKill))
1965 .setMIFlags(IntersectedFlags);
1966
1967 InsInstrs.push_back(Elt: MIB);
1968 if (MRI.hasOneNonDBGUse(RegNo: Prev.getOperand(i: 0).getReg()))
1969 DelInstrs.push_back(Elt: &Prev);
1970 DelInstrs.push_back(Elt: &Root);
1971}
1972
1973// Combine patterns like (sh3add Z, (add X, (slli Y, 5))) to
1974// (sh3add (sh2add Y, Z), X) if the shift amount can be split across two
1975// shXadd instructions. The outer shXadd keeps its original opcode.
1976static void
1977genShXAddAddShift(MachineInstr &Root, unsigned AddOpIdx,
1978 SmallVectorImpl<MachineInstr *> &InsInstrs,
1979 SmallVectorImpl<MachineInstr *> &DelInstrs,
1980 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
1981 MachineFunction *MF = Root.getMF();
1982 MachineRegisterInfo &MRI = MF->getRegInfo();
1983 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1984
1985 unsigned OuterShiftAmt = getSHXADDShiftAmount(Opc: Root.getOpcode());
1986 assert(OuterShiftAmt != 0 && "Unexpected opcode");
1987
1988 MachineInstr *AddMI = MRI.getUniqueVRegDef(Reg: Root.getOperand(i: 2).getReg());
1989 MachineInstr *ShiftMI =
1990 MRI.getUniqueVRegDef(Reg: AddMI->getOperand(i: AddOpIdx).getReg());
1991
1992 unsigned InnerShiftAmt = ShiftMI->getOperand(i: 2).getImm();
1993 assert(InnerShiftAmt >= OuterShiftAmt && "Unexpected shift amount");
1994
1995 unsigned InnerOpc;
1996 switch (InnerShiftAmt - OuterShiftAmt) {
1997 default:
1998 llvm_unreachable("Unexpected shift amount");
1999 case 0:
2000 InnerOpc = RISCV::ADD;
2001 break;
2002 case 1:
2003 InnerOpc = RISCV::SH1ADD;
2004 break;
2005 case 2:
2006 InnerOpc = RISCV::SH2ADD;
2007 break;
2008 case 3:
2009 InnerOpc = RISCV::SH3ADD;
2010 break;
2011 }
2012
2013 const MachineOperand &X = AddMI->getOperand(i: 3 - AddOpIdx);
2014 const MachineOperand &Y = ShiftMI->getOperand(i: 1);
2015 const MachineOperand &Z = Root.getOperand(i: 1);
2016
2017 Register NewVR = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2018
2019 auto MIB1 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: InnerOpc), DestReg: NewVR)
2020 .addReg(RegNo: Y.getReg(), flags: getKillRegState(B: Y.isKill()))
2021 .addReg(RegNo: Z.getReg(), flags: getKillRegState(B: Z.isKill()));
2022 auto MIB2 = BuildMI(MF&: *MF, MIMD: MIMetadata(Root), MCID: TII->get(Opcode: Root.getOpcode()),
2023 DestReg: Root.getOperand(i: 0).getReg())
2024 .addReg(RegNo: NewVR, flags: RegState::Kill)
2025 .addReg(RegNo: X.getReg(), flags: getKillRegState(B: X.isKill()));
2026
2027 InstrIdxForVirtReg.insert(KV: std::make_pair(x&: NewVR, y: 0));
2028 InsInstrs.push_back(Elt: MIB1);
2029 InsInstrs.push_back(Elt: MIB2);
2030 DelInstrs.push_back(Elt: ShiftMI);
2031 DelInstrs.push_back(Elt: AddMI);
2032 DelInstrs.push_back(Elt: &Root);
2033}
2034
2035void RISCVInstrInfo::genAlternativeCodeSequence(
2036 MachineInstr &Root, unsigned Pattern,
2037 SmallVectorImpl<MachineInstr *> &InsInstrs,
2038 SmallVectorImpl<MachineInstr *> &DelInstrs,
2039 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
2040 MachineRegisterInfo &MRI = Root.getMF()->getRegInfo();
2041 switch (Pattern) {
2042 default:
2043 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
2044 DelInstrs, InstrIdxForVirtReg);
2045 return;
2046 case RISCVMachineCombinerPattern::FMADD_AX:
2047 case RISCVMachineCombinerPattern::FMSUB: {
2048 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 1).getReg());
2049 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2050 return;
2051 }
2052 case RISCVMachineCombinerPattern::FMADD_XA:
2053 case RISCVMachineCombinerPattern::FNMSUB: {
2054 MachineInstr &Prev = *MRI.getVRegDef(Reg: Root.getOperand(i: 2).getReg());
2055 combineFPFusedMultiply(Root, Prev, Pattern, InsInstrs, DelInstrs);
2056 return;
2057 }
2058 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP1:
2059 genShXAddAddShift(Root, AddOpIdx: 1, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2060 return;
2061 case RISCVMachineCombinerPattern::SHXADD_ADD_SLLI_OP2:
2062 genShXAddAddShift(Root, AddOpIdx: 2, InsInstrs, DelInstrs, InstrIdxForVirtReg);
2063 return;
2064 }
2065}
2066
2067bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
2068 StringRef &ErrInfo) const {
2069 MCInstrDesc const &Desc = MI.getDesc();
2070
2071 for (const auto &[Index, Operand] : enumerate(First: Desc.operands())) {
2072 unsigned OpType = Operand.OperandType;
2073 if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
2074 OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
2075 const MachineOperand &MO = MI.getOperand(i: Index);
2076 if (MO.isImm()) {
2077 int64_t Imm = MO.getImm();
2078 bool Ok;
2079 switch (OpType) {
2080 default:
2081 llvm_unreachable("Unexpected operand type");
2082
2083 // clang-format off
2084#define CASE_OPERAND_UIMM(NUM) \
2085 case RISCVOp::OPERAND_UIMM##NUM: \
2086 Ok = isUInt<NUM>(Imm); \
2087 break;
2088 CASE_OPERAND_UIMM(1)
2089 CASE_OPERAND_UIMM(2)
2090 CASE_OPERAND_UIMM(3)
2091 CASE_OPERAND_UIMM(4)
2092 CASE_OPERAND_UIMM(5)
2093 CASE_OPERAND_UIMM(6)
2094 CASE_OPERAND_UIMM(7)
2095 CASE_OPERAND_UIMM(8)
2096 CASE_OPERAND_UIMM(12)
2097 CASE_OPERAND_UIMM(20)
2098 // clang-format on
2099 case RISCVOp::OPERAND_UIMM2_LSB0:
2100 Ok = isShiftedUInt<1, 1>(x: Imm);
2101 break;
2102 case RISCVOp::OPERAND_UIMM7_LSB00:
2103 Ok = isShiftedUInt<5, 2>(x: Imm);
2104 break;
2105 case RISCVOp::OPERAND_UIMM8_LSB00:
2106 Ok = isShiftedUInt<6, 2>(x: Imm);
2107 break;
2108 case RISCVOp::OPERAND_UIMM8_LSB000:
2109 Ok = isShiftedUInt<5, 3>(x: Imm);
2110 break;
2111 case RISCVOp::OPERAND_UIMM8_GE32:
2112 Ok = isUInt<8>(x: Imm) && Imm >= 32;
2113 break;
2114 case RISCVOp::OPERAND_UIMM9_LSB000:
2115 Ok = isShiftedUInt<6, 3>(x: Imm);
2116 break;
2117 case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
2118 Ok = isShiftedInt<6, 4>(x: Imm) && (Imm != 0);
2119 break;
2120 case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
2121 Ok = isShiftedUInt<8, 2>(x: Imm) && (Imm != 0);
2122 break;
2123 case RISCVOp::OPERAND_ZERO:
2124 Ok = Imm == 0;
2125 break;
2126 case RISCVOp::OPERAND_SIMM5:
2127 Ok = isInt<5>(x: Imm);
2128 break;
2129 case RISCVOp::OPERAND_SIMM5_PLUS1:
2130 Ok = (isInt<5>(x: Imm) && Imm != -16) || Imm == 16;
2131 break;
2132 case RISCVOp::OPERAND_SIMM6:
2133 Ok = isInt<6>(x: Imm);
2134 break;
2135 case RISCVOp::OPERAND_SIMM6_NONZERO:
2136 Ok = Imm != 0 && isInt<6>(x: Imm);
2137 break;
2138 case RISCVOp::OPERAND_VTYPEI10:
2139 Ok = isUInt<10>(x: Imm);
2140 break;
2141 case RISCVOp::OPERAND_VTYPEI11:
2142 Ok = isUInt<11>(x: Imm);
2143 break;
2144 case RISCVOp::OPERAND_SIMM12:
2145 Ok = isInt<12>(x: Imm);
2146 break;
2147 case RISCVOp::OPERAND_SIMM12_LSB00000:
2148 Ok = isShiftedInt<7, 5>(x: Imm);
2149 break;
2150 case RISCVOp::OPERAND_UIMMLOG2XLEN:
2151 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
2152 break;
2153 case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
2154 Ok = STI.is64Bit() ? isUInt<6>(x: Imm) : isUInt<5>(x: Imm);
2155 Ok = Ok && Imm != 0;
2156 break;
2157 case RISCVOp::OPERAND_CLUI_IMM:
2158 Ok = (isUInt<5>(x: Imm) && Imm != 0) ||
2159 (Imm >= 0xfffe0 && Imm <= 0xfffff);
2160 break;
2161 case RISCVOp::OPERAND_RVKRNUM:
2162 Ok = Imm >= 0 && Imm <= 10;
2163 break;
2164 case RISCVOp::OPERAND_RVKRNUM_0_7:
2165 Ok = Imm >= 0 && Imm <= 7;
2166 break;
2167 case RISCVOp::OPERAND_RVKRNUM_1_10:
2168 Ok = Imm >= 1 && Imm <= 10;
2169 break;
2170 case RISCVOp::OPERAND_RVKRNUM_2_14:
2171 Ok = Imm >= 2 && Imm <= 14;
2172 break;
2173 case RISCVOp::OPERAND_SPIMM:
2174 Ok = (Imm & 0xf) == 0;
2175 break;
2176 }
2177 if (!Ok) {
2178 ErrInfo = "Invalid immediate";
2179 return false;
2180 }
2181 }
2182 }
2183 }
2184
2185 const uint64_t TSFlags = Desc.TSFlags;
2186 if (RISCVII::hasVLOp(TSFlags)) {
2187 const MachineOperand &Op = MI.getOperand(i: RISCVII::getVLOpNum(Desc));
2188 if (!Op.isImm() && !Op.isReg()) {
2189 ErrInfo = "Invalid operand type for VL operand";
2190 return false;
2191 }
2192 if (Op.isReg() && Op.getReg() != RISCV::NoRegister) {
2193 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2194 auto *RC = MRI.getRegClass(Reg: Op.getReg());
2195 if (!RISCV::GPRRegClass.hasSubClassEq(RC)) {
2196 ErrInfo = "Invalid register class for VL operand";
2197 return false;
2198 }
2199 }
2200 if (!RISCVII::hasSEWOp(TSFlags)) {
2201 ErrInfo = "VL operand w/o SEW operand?";
2202 return false;
2203 }
2204 }
2205 if (RISCVII::hasSEWOp(TSFlags)) {
2206 unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
2207 if (!MI.getOperand(i: OpIdx).isImm()) {
2208 ErrInfo = "SEW value expected to be an immediate";
2209 return false;
2210 }
2211 uint64_t Log2SEW = MI.getOperand(i: OpIdx).getImm();
2212 if (Log2SEW > 31) {
2213 ErrInfo = "Unexpected SEW value";
2214 return false;
2215 }
2216 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2217 if (!RISCVVType::isValidSEW(SEW)) {
2218 ErrInfo = "Unexpected SEW value";
2219 return false;
2220 }
2221 }
2222 if (RISCVII::hasVecPolicyOp(TSFlags)) {
2223 unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
2224 if (!MI.getOperand(i: OpIdx).isImm()) {
2225 ErrInfo = "Policy operand expected to be an immediate";
2226 return false;
2227 }
2228 uint64_t Policy = MI.getOperand(i: OpIdx).getImm();
2229 if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
2230 ErrInfo = "Invalid Policy Value";
2231 return false;
2232 }
2233 if (!RISCVII::hasVLOp(TSFlags)) {
2234 ErrInfo = "policy operand w/o VL operand?";
2235 return false;
2236 }
2237
2238 // VecPolicy operands can only exist on instructions with passthru/merge
2239 // arguments. Note that not all arguments with passthru have vec policy
2240 // operands- some instructions have implicit policies.
2241 unsigned UseOpIdx;
2242 if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx)) {
2243 ErrInfo = "policy operand w/o tied operand?";
2244 return false;
2245 }
2246 }
2247
2248 return true;
2249}
2250
2251bool RISCVInstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg,
2252 const MachineInstr &AddrI,
2253 ExtAddrMode &AM) const {
2254 switch (MemI.getOpcode()) {
2255 default:
2256 return false;
2257 case RISCV::LB:
2258 case RISCV::LBU:
2259 case RISCV::LH:
2260 case RISCV::LHU:
2261 case RISCV::LW:
2262 case RISCV::LWU:
2263 case RISCV::LD:
2264 case RISCV::FLH:
2265 case RISCV::FLW:
2266 case RISCV::FLD:
2267 case RISCV::SB:
2268 case RISCV::SH:
2269 case RISCV::SW:
2270 case RISCV::SD:
2271 case RISCV::FSH:
2272 case RISCV::FSW:
2273 case RISCV::FSD:
2274 break;
2275 }
2276
2277 if (MemI.getOperand(i: 0).getReg() == Reg)
2278 return false;
2279
2280 if (AddrI.getOpcode() != RISCV::ADDI || !AddrI.getOperand(1).isReg() ||
2281 !AddrI.getOperand(2).isImm())
2282 return false;
2283
2284 int64_t OldOffset = MemI.getOperand(i: 2).getImm();
2285 int64_t Disp = AddrI.getOperand(i: 2).getImm();
2286 int64_t NewOffset = OldOffset + Disp;
2287 if (!STI.is64Bit())
2288 NewOffset = SignExtend64<32>(x: NewOffset);
2289
2290 if (!isInt<12>(x: NewOffset))
2291 return false;
2292
2293 AM.BaseReg = AddrI.getOperand(i: 1).getReg();
2294 AM.ScaledReg = 0;
2295 AM.Scale = 0;
2296 AM.Displacement = NewOffset;
2297 AM.Form = ExtAddrMode::Formula::Basic;
2298 return true;
2299}
2300
2301MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI,
2302 const ExtAddrMode &AM) const {
2303
2304 const DebugLoc &DL = MemI.getDebugLoc();
2305 MachineBasicBlock &MBB = *MemI.getParent();
2306
2307 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
2308 "Addressing mode not supported for folding");
2309
2310 return BuildMI(MBB, MemI, DL, get(MemI.getOpcode()))
2311 .addReg(MemI.getOperand(i: 0).getReg(),
2312 MemI.mayLoad() ? RegState::Define : 0)
2313 .addReg(AM.BaseReg)
2314 .addImm(AM.Displacement)
2315 .setMemRefs(MemI.memoperands())
2316 .setMIFlags(MemI.getFlags());
2317}
2318
2319bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
2320 const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2321 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2322 const TargetRegisterInfo *TRI) const {
2323 if (!LdSt.mayLoadOrStore())
2324 return false;
2325
2326 // Conservatively, only handle scalar loads/stores for now.
2327 switch (LdSt.getOpcode()) {
2328 case RISCV::LB:
2329 case RISCV::LBU:
2330 case RISCV::SB:
2331 case RISCV::LH:
2332 case RISCV::LHU:
2333 case RISCV::FLH:
2334 case RISCV::SH:
2335 case RISCV::FSH:
2336 case RISCV::LW:
2337 case RISCV::LWU:
2338 case RISCV::FLW:
2339 case RISCV::SW:
2340 case RISCV::FSW:
2341 case RISCV::LD:
2342 case RISCV::FLD:
2343 case RISCV::SD:
2344 case RISCV::FSD:
2345 break;
2346 default:
2347 return false;
2348 }
2349 const MachineOperand *BaseOp;
2350 OffsetIsScalable = false;
2351 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
2352 return false;
2353 BaseOps.push_back(Elt: BaseOp);
2354 return true;
2355}
2356
2357// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
2358// helper?
2359static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
2360 ArrayRef<const MachineOperand *> BaseOps1,
2361 const MachineInstr &MI2,
2362 ArrayRef<const MachineOperand *> BaseOps2) {
2363 // Only examine the first "base" operand of each instruction, on the
2364 // assumption that it represents the real base address of the memory access.
2365 // Other operands are typically offsets or indices from this base address.
2366 if (BaseOps1.front()->isIdenticalTo(Other: *BaseOps2.front()))
2367 return true;
2368
2369 if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
2370 return false;
2371
2372 auto MO1 = *MI1.memoperands_begin();
2373 auto MO2 = *MI2.memoperands_begin();
2374 if (MO1->getAddrSpace() != MO2->getAddrSpace())
2375 return false;
2376
2377 auto Base1 = MO1->getValue();
2378 auto Base2 = MO2->getValue();
2379 if (!Base1 || !Base2)
2380 return false;
2381 Base1 = getUnderlyingObject(V: Base1);
2382 Base2 = getUnderlyingObject(V: Base2);
2383
2384 if (isa<UndefValue>(Val: Base1) || isa<UndefValue>(Val: Base2))
2385 return false;
2386
2387 return Base1 == Base2;
2388}
2389
2390bool RISCVInstrInfo::shouldClusterMemOps(
2391 ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
2392 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
2393 int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
2394 unsigned NumBytes) const {
2395 // If the mem ops (to be clustered) do not have the same base ptr, then they
2396 // should not be clustered
2397 if (!BaseOps1.empty() && !BaseOps2.empty()) {
2398 const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
2399 const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
2400 if (!memOpsHaveSameBasePtr(MI1: FirstLdSt, BaseOps1, MI2: SecondLdSt, BaseOps2))
2401 return false;
2402 } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
2403 // If only one base op is empty, they do not have the same base ptr
2404 return false;
2405 }
2406
2407 unsigned CacheLineSize =
2408 BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
2409 // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
2410 CacheLineSize = CacheLineSize ? CacheLineSize : 64;
2411 // Cluster if the memory operations are on the same or a neighbouring cache
2412 // line, but limit the maximum ClusterSize to avoid creating too much
2413 // additional register pressure.
2414 return ClusterSize <= 4 && std::abs(i: Offset1 - Offset2) < CacheLineSize;
2415}
2416
2417// Set BaseReg (the base register operand), Offset (the byte offset being
2418// accessed) and the access Width of the passed instruction that reads/writes
2419// memory. Returns false if the instruction does not read/write memory or the
2420// BaseReg/Offset/Width can't be determined. Is not guaranteed to always
2421// recognise base operands and offsets in all cases.
2422// TODO: Add an IsScalable bool ref argument (like the equivalent AArch64
2423// function) and set it as appropriate.
2424bool RISCVInstrInfo::getMemOperandWithOffsetWidth(
2425 const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,
2426 LocationSize &Width, const TargetRegisterInfo *TRI) const {
2427 if (!LdSt.mayLoadOrStore())
2428 return false;
2429
2430 // Here we assume the standard RISC-V ISA, which uses a base+offset
2431 // addressing mode. You'll need to relax these conditions to support custom
2432 // load/store instructions.
2433 if (LdSt.getNumExplicitOperands() != 3)
2434 return false;
2435 if ((!LdSt.getOperand(i: 1).isReg() && !LdSt.getOperand(i: 1).isFI()) ||
2436 !LdSt.getOperand(i: 2).isImm())
2437 return false;
2438
2439 if (!LdSt.hasOneMemOperand())
2440 return false;
2441
2442 Width = (*LdSt.memoperands_begin())->getSize();
2443 BaseReg = &LdSt.getOperand(i: 1);
2444 Offset = LdSt.getOperand(i: 2).getImm();
2445 return true;
2446}
2447
2448bool RISCVInstrInfo::areMemAccessesTriviallyDisjoint(
2449 const MachineInstr &MIa, const MachineInstr &MIb) const {
2450 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
2451 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
2452
2453 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
2454 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
2455 return false;
2456
2457 // Retrieve the base register, offset from the base register and width. Width
2458 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If
2459 // base registers are identical, and the offset of a lower memory access +
2460 // the width doesn't overlap the offset of a higher memory access,
2461 // then the memory accesses are different.
2462 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2463 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
2464 int64_t OffsetA = 0, OffsetB = 0;
2465 LocationSize WidthA = 0, WidthB = 0;
2466 if (getMemOperandWithOffsetWidth(LdSt: MIa, BaseReg&: BaseOpA, Offset&: OffsetA, Width&: WidthA, TRI) &&
2467 getMemOperandWithOffsetWidth(LdSt: MIb, BaseReg&: BaseOpB, Offset&: OffsetB, Width&: WidthB, TRI)) {
2468 if (BaseOpA->isIdenticalTo(Other: *BaseOpB)) {
2469 int LowOffset = std::min(a: OffsetA, b: OffsetB);
2470 int HighOffset = std::max(a: OffsetA, b: OffsetB);
2471 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2472 if (LowWidth.hasValue() &&
2473 LowOffset + (int)LowWidth.getValue() <= HighOffset)
2474 return true;
2475 }
2476 }
2477 return false;
2478}
2479
2480std::pair<unsigned, unsigned>
2481RISCVInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
2482 const unsigned Mask = RISCVII::MO_DIRECT_FLAG_MASK;
2483 return std::make_pair(x: TF & Mask, y: TF & ~Mask);
2484}
2485
2486ArrayRef<std::pair<unsigned, const char *>>
2487RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
2488 using namespace RISCVII;
2489 static const std::pair<unsigned, const char *> TargetFlags[] = {
2490 {MO_CALL, "riscv-call"},
2491 {MO_LO, "riscv-lo"},
2492 {MO_HI, "riscv-hi"},
2493 {MO_PCREL_LO, "riscv-pcrel-lo"},
2494 {MO_PCREL_HI, "riscv-pcrel-hi"},
2495 {MO_GOT_HI, "riscv-got-hi"},
2496 {MO_TPREL_LO, "riscv-tprel-lo"},
2497 {MO_TPREL_HI, "riscv-tprel-hi"},
2498 {MO_TPREL_ADD, "riscv-tprel-add"},
2499 {MO_TLS_GOT_HI, "riscv-tls-got-hi"},
2500 {MO_TLS_GD_HI, "riscv-tls-gd-hi"},
2501 {MO_TLSDESC_HI, "riscv-tlsdesc-hi"},
2502 {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"},
2503 {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"},
2504 {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}};
2505 return ArrayRef(TargetFlags);
2506}
2507bool RISCVInstrInfo::isFunctionSafeToOutlineFrom(
2508 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
2509 const Function &F = MF.getFunction();
2510
2511 // Can F be deduplicated by the linker? If it can, don't outline from it.
2512 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
2513 return false;
2514
2515 // Don't outline from functions with section markings; the program could
2516 // expect that all the code is in the named section.
2517 if (F.hasSection())
2518 return false;
2519
2520 // It's safe to outline from MF.
2521 return true;
2522}
2523
2524bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
2525 unsigned &Flags) const {
2526 // More accurate safety checking is done in getOutliningCandidateInfo.
2527 return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags);
2528}
2529
2530// Enum values indicating how an outlined call should be constructed.
2531enum MachineOutlinerConstructionID {
2532 MachineOutlinerDefault
2533};
2534
2535bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
2536 MachineFunction &MF) const {
2537 return MF.getFunction().hasMinSize();
2538}
2539
2540std::optional<outliner::OutlinedFunction>
2541RISCVInstrInfo::getOutliningCandidateInfo(
2542 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
2543
2544 // First we need to filter out candidates where the X5 register (IE t0) can't
2545 // be used to setup the function call.
2546 auto CannotInsertCall = [](outliner::Candidate &C) {
2547 const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2548 return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2549 };
2550
2551 llvm::erase_if(C&: RepeatedSequenceLocs, P: CannotInsertCall);
2552
2553 // If the sequence doesn't have enough candidates left, then we're done.
2554 if (RepeatedSequenceLocs.size() < 2)
2555 return std::nullopt;
2556
2557 unsigned SequenceSize = 0;
2558
2559 for (auto &MI : RepeatedSequenceLocs[0])
2560 SequenceSize += getInstSizeInBytes(MI);
2561
2562 // call t0, function = 8 bytes.
2563 unsigned CallOverhead = 8;
2564 for (auto &C : RepeatedSequenceLocs)
2565 C.setCallInfo(CID: MachineOutlinerDefault, CO: CallOverhead);
2566
2567 // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2568 unsigned FrameOverhead = 4;
2569 if (RepeatedSequenceLocs[0]
2570 .getMF()
2571 ->getSubtarget<RISCVSubtarget>()
2572 .hasStdExtCOrZca())
2573 FrameOverhead = 2;
2574
2575 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
2576 FrameOverhead, MachineOutlinerDefault);
2577}
2578
2579outliner::InstrType
2580RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
2581 unsigned Flags) const {
2582 MachineInstr &MI = *MBBI;
2583 MachineBasicBlock *MBB = MI.getParent();
2584 const TargetRegisterInfo *TRI =
2585 MBB->getParent()->getSubtarget().getRegisterInfo();
2586 const auto &F = MI.getMF()->getFunction();
2587
2588 // We can manually strip out CFI instructions later.
2589 if (MI.isCFIInstruction())
2590 // If current function has exception handling code, we can't outline &
2591 // strip these CFI instructions since it may break .eh_frame section
2592 // needed in unwinding.
2593 return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
2594 : outliner::InstrType::Invisible;
2595
2596 // We need support for tail calls to outlined functions before return
2597 // statements can be allowed.
2598 if (MI.isReturn())
2599 return outliner::InstrType::Illegal;
2600
2601 // Don't allow modifying the X5 register which we use for return addresses for
2602 // these outlined functions.
2603 if (MI.modifiesRegister(RISCV::X5, TRI) ||
2604 MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
2605 return outliner::InstrType::Illegal;
2606
2607 // Make sure the operands don't reference something unsafe.
2608 for (const auto &MO : MI.operands()) {
2609
2610 // pcrel-hi and pcrel-lo can't put in separate sections, filter that out
2611 // if any possible.
2612 if (MO.getTargetFlags() == RISCVII::MO_PCREL_LO &&
2613 (MI.getMF()->getTarget().getFunctionSections() || F.hasComdat() ||
2614 F.hasSection()))
2615 return outliner::InstrType::Illegal;
2616 }
2617
2618 return outliner::InstrType::Legal;
2619}
2620
2621void RISCVInstrInfo::buildOutlinedFrame(
2622 MachineBasicBlock &MBB, MachineFunction &MF,
2623 const outliner::OutlinedFunction &OF) const {
2624
2625 // Strip out any CFI instructions
2626 bool Changed = true;
2627 while (Changed) {
2628 Changed = false;
2629 auto I = MBB.begin();
2630 auto E = MBB.end();
2631 for (; I != E; ++I) {
2632 if (I->isCFIInstruction()) {
2633 I->removeFromParent();
2634 Changed = true;
2635 break;
2636 }
2637 }
2638 }
2639
2640 MBB.addLiveIn(RISCV::X5);
2641
2642 // Add in a return instruction to the end of the outlined frame.
2643 MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
2644 .addReg(RISCV::X0, RegState::Define)
2645 .addReg(RISCV::X5)
2646 .addImm(0));
2647}
2648
2649MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
2650 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
2651 MachineFunction &MF, outliner::Candidate &C) const {
2652
2653 // Add in a call instruction to the outlined function at the given location.
2654 It = MBB.insert(It,
2655 BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
2656 .addGlobalAddress(M.getNamedValue(MF.getName()), 0,
2657 RISCVII::MO_CALL));
2658 return It;
2659}
2660
2661std::optional<RegImmPair> RISCVInstrInfo::isAddImmediate(const MachineInstr &MI,
2662 Register Reg) const {
2663 // TODO: Handle cases where Reg is a super- or sub-register of the
2664 // destination register.
2665 const MachineOperand &Op0 = MI.getOperand(i: 0);
2666 if (!Op0.isReg() || Reg != Op0.getReg())
2667 return std::nullopt;
2668
2669 // Don't consider ADDIW as a candidate because the caller may not be aware
2670 // of its sign extension behaviour.
2671 if (MI.getOpcode() == RISCV::ADDI && MI.getOperand(1).isReg() &&
2672 MI.getOperand(2).isImm())
2673 return RegImmPair{MI.getOperand(i: 1).getReg(), MI.getOperand(i: 2).getImm()};
2674
2675 return std::nullopt;
2676}
2677
2678// MIR printer helper function to annotate Operands with a comment.
2679std::string RISCVInstrInfo::createMIROperandComment(
2680 const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
2681 const TargetRegisterInfo *TRI) const {
2682 // Print a generic comment for this operand if there is one.
2683 std::string GenericComment =
2684 TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
2685 if (!GenericComment.empty())
2686 return GenericComment;
2687
2688 // If not, we must have an immediate operand.
2689 if (!Op.isImm())
2690 return std::string();
2691
2692 std::string Comment;
2693 raw_string_ostream OS(Comment);
2694
2695 uint64_t TSFlags = MI.getDesc().TSFlags;
2696
2697 // Print the full VType operand of vsetvli/vsetivli instructions, and the SEW
2698 // operand of vector codegen pseudos.
2699 if ((MI.getOpcode() == RISCV::VSETVLI || MI.getOpcode() == RISCV::VSETIVLI ||
2700 MI.getOpcode() == RISCV::PseudoVSETVLI ||
2701 MI.getOpcode() == RISCV::PseudoVSETIVLI ||
2702 MI.getOpcode() == RISCV::PseudoVSETVLIX0) &&
2703 OpIdx == 2) {
2704 unsigned Imm = MI.getOperand(i: OpIdx).getImm();
2705 RISCVVType::printVType(VType: Imm, OS);
2706 } else if (RISCVII::hasSEWOp(TSFlags) &&
2707 OpIdx == RISCVII::getSEWOpNum(Desc: MI.getDesc())) {
2708 unsigned Log2SEW = MI.getOperand(i: OpIdx).getImm();
2709 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
2710 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2711 OS << "e" << SEW;
2712 } else if (RISCVII::hasVecPolicyOp(TSFlags) &&
2713 OpIdx == RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())) {
2714 unsigned Policy = MI.getOperand(i: OpIdx).getImm();
2715 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
2716 "Invalid Policy Value");
2717 OS << (Policy & RISCVII::TAIL_AGNOSTIC ? "ta" : "tu") << ", "
2718 << (Policy & RISCVII::MASK_AGNOSTIC ? "ma" : "mu");
2719 }
2720
2721 OS.flush();
2722 return Comment;
2723}
2724
2725// clang-format off
2726#define CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL) \
2727 RISCV::Pseudo##OP##_##LMUL
2728
2729#define CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL) \
2730 RISCV::Pseudo##OP##_##LMUL##_MASK
2731
2732#define CASE_RVV_OPCODE_LMUL(OP, LMUL) \
2733 CASE_RVV_OPCODE_UNMASK_LMUL(OP, LMUL): \
2734 case CASE_RVV_OPCODE_MASK_LMUL(OP, LMUL)
2735
2736#define CASE_RVV_OPCODE_UNMASK_WIDEN(OP) \
2737 CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF8): \
2738 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF4): \
2739 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, MF2): \
2740 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M1): \
2741 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M2): \
2742 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M4)
2743
2744#define CASE_RVV_OPCODE_UNMASK(OP) \
2745 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
2746 case CASE_RVV_OPCODE_UNMASK_LMUL(OP, M8)
2747
2748#define CASE_RVV_OPCODE_MASK_WIDEN(OP) \
2749 CASE_RVV_OPCODE_MASK_LMUL(OP, MF8): \
2750 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF4): \
2751 case CASE_RVV_OPCODE_MASK_LMUL(OP, MF2): \
2752 case CASE_RVV_OPCODE_MASK_LMUL(OP, M1): \
2753 case CASE_RVV_OPCODE_MASK_LMUL(OP, M2): \
2754 case CASE_RVV_OPCODE_MASK_LMUL(OP, M4)
2755
2756#define CASE_RVV_OPCODE_MASK(OP) \
2757 CASE_RVV_OPCODE_MASK_WIDEN(OP): \
2758 case CASE_RVV_OPCODE_MASK_LMUL(OP, M8)
2759
2760#define CASE_RVV_OPCODE_WIDEN(OP) \
2761 CASE_RVV_OPCODE_UNMASK_WIDEN(OP): \
2762 case CASE_RVV_OPCODE_MASK_WIDEN(OP)
2763
2764#define CASE_RVV_OPCODE(OP) \
2765 CASE_RVV_OPCODE_UNMASK(OP): \
2766 case CASE_RVV_OPCODE_MASK(OP)
2767// clang-format on
2768
2769// clang-format off
2770#define CASE_VMA_OPCODE_COMMON(OP, TYPE, LMUL) \
2771 RISCV::PseudoV##OP##_##TYPE##_##LMUL
2772
2773#define CASE_VMA_OPCODE_LMULS_M1(OP, TYPE) \
2774 CASE_VMA_OPCODE_COMMON(OP, TYPE, M1): \
2775 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M2): \
2776 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M4): \
2777 case CASE_VMA_OPCODE_COMMON(OP, TYPE, M8)
2778
2779#define CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE) \
2780 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF2): \
2781 case CASE_VMA_OPCODE_LMULS_M1(OP, TYPE)
2782
2783#define CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE) \
2784 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF4): \
2785 case CASE_VMA_OPCODE_LMULS_MF2(OP, TYPE)
2786
2787#define CASE_VMA_OPCODE_LMULS(OP, TYPE) \
2788 CASE_VMA_OPCODE_COMMON(OP, TYPE, MF8): \
2789 case CASE_VMA_OPCODE_LMULS_MF4(OP, TYPE)
2790
2791// VFMA instructions are SEW specific.
2792#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL, SEW) \
2793 RISCV::PseudoV##OP##_##TYPE##_##LMUL##_##SEW
2794
2795#define CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW) \
2796 CASE_VFMA_OPCODE_COMMON(OP, TYPE, M1, SEW): \
2797 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M2, SEW): \
2798 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M4, SEW): \
2799 case CASE_VFMA_OPCODE_COMMON(OP, TYPE, M8, SEW)
2800
2801#define CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW) \
2802 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF2, SEW): \
2803 case CASE_VFMA_OPCODE_LMULS_M1(OP, TYPE, SEW)
2804
2805#define CASE_VFMA_OPCODE_LMULS_MF4(OP, TYPE, SEW) \
2806 CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF4, SEW): \
2807 case CASE_VFMA_OPCODE_LMULS_MF2(OP, TYPE, SEW)
2808
2809#define CASE_VFMA_OPCODE_VV(OP) \
2810 CASE_VFMA_OPCODE_LMULS_MF4(OP, VV, E16): \
2811 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VV, E32): \
2812 case CASE_VFMA_OPCODE_LMULS_M1(OP, VV, E64)
2813
2814#define CASE_VFMA_SPLATS(OP) \
2815 CASE_VFMA_OPCODE_LMULS_MF4(OP, VFPR16, E16): \
2816 case CASE_VFMA_OPCODE_LMULS_MF2(OP, VFPR32, E32): \
2817 case CASE_VFMA_OPCODE_LMULS_M1(OP, VFPR64, E64)
2818// clang-format on
2819
2820bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
2821 unsigned &SrcOpIdx1,
2822 unsigned &SrcOpIdx2) const {
2823 const MCInstrDesc &Desc = MI.getDesc();
2824 if (!Desc.isCommutable())
2825 return false;
2826
2827 switch (MI.getOpcode()) {
2828 case RISCV::TH_MVEQZ:
2829 case RISCV::TH_MVNEZ:
2830 // We can't commute operands if operand 2 (i.e., rs1 in
2831 // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
2832 // not valid as the in/out-operand 1).
2833 if (MI.getOperand(2).getReg() == RISCV::X0)
2834 return false;
2835 // Operands 1 and 2 are commutable, if we switch the opcode.
2836 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
2837 case RISCV::TH_MULA:
2838 case RISCV::TH_MULAW:
2839 case RISCV::TH_MULAH:
2840 case RISCV::TH_MULS:
2841 case RISCV::TH_MULSW:
2842 case RISCV::TH_MULSH:
2843 // Operands 2 and 3 are commutable.
2844 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
2845 case RISCV::PseudoCCMOVGPRNoX0:
2846 case RISCV::PseudoCCMOVGPR:
2847 // Operands 4 and 5 are commutable.
2848 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
2849 case CASE_RVV_OPCODE(VADD_VV):
2850 case CASE_RVV_OPCODE(VAND_VV):
2851 case CASE_RVV_OPCODE(VOR_VV):
2852 case CASE_RVV_OPCODE(VXOR_VV):
2853 case CASE_RVV_OPCODE_MASK(VMSEQ_VV):
2854 case CASE_RVV_OPCODE_MASK(VMSNE_VV):
2855 case CASE_RVV_OPCODE(VMIN_VV):
2856 case CASE_RVV_OPCODE(VMINU_VV):
2857 case CASE_RVV_OPCODE(VMAX_VV):
2858 case CASE_RVV_OPCODE(VMAXU_VV):
2859 case CASE_RVV_OPCODE(VMUL_VV):
2860 case CASE_RVV_OPCODE(VMULH_VV):
2861 case CASE_RVV_OPCODE(VMULHU_VV):
2862 case CASE_RVV_OPCODE_WIDEN(VWADD_VV):
2863 case CASE_RVV_OPCODE_WIDEN(VWADDU_VV):
2864 case CASE_RVV_OPCODE_WIDEN(VWMUL_VV):
2865 case CASE_RVV_OPCODE_WIDEN(VWMULU_VV):
2866 case CASE_RVV_OPCODE_WIDEN(VWMACC_VV):
2867 case CASE_RVV_OPCODE_WIDEN(VWMACCU_VV):
2868 case CASE_RVV_OPCODE_UNMASK(VADC_VVM):
2869 // Operands 2 and 3 are commutable.
2870 return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
2871 case CASE_VFMA_SPLATS(FMADD):
2872 case CASE_VFMA_SPLATS(FMSUB):
2873 case CASE_VFMA_SPLATS(FMACC):
2874 case CASE_VFMA_SPLATS(FMSAC):
2875 case CASE_VFMA_SPLATS(FNMADD):
2876 case CASE_VFMA_SPLATS(FNMSUB):
2877 case CASE_VFMA_SPLATS(FNMACC):
2878 case CASE_VFMA_SPLATS(FNMSAC):
2879 case CASE_VFMA_OPCODE_VV(FMACC):
2880 case CASE_VFMA_OPCODE_VV(FMSAC):
2881 case CASE_VFMA_OPCODE_VV(FNMACC):
2882 case CASE_VFMA_OPCODE_VV(FNMSAC):
2883 case CASE_VMA_OPCODE_LMULS(MADD, VX):
2884 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
2885 case CASE_VMA_OPCODE_LMULS(MACC, VX):
2886 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
2887 case CASE_VMA_OPCODE_LMULS(MACC, VV):
2888 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
2889 // If the tail policy is undisturbed we can't commute.
2890 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2891 if ((MI.getOperand(i: MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2892 return false;
2893
2894 // For these instructions we can only swap operand 1 and operand 3 by
2895 // changing the opcode.
2896 unsigned CommutableOpIdx1 = 1;
2897 unsigned CommutableOpIdx2 = 3;
2898 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2899 CommutableOpIdx2))
2900 return false;
2901 return true;
2902 }
2903 case CASE_VFMA_OPCODE_VV(FMADD):
2904 case CASE_VFMA_OPCODE_VV(FMSUB):
2905 case CASE_VFMA_OPCODE_VV(FNMADD):
2906 case CASE_VFMA_OPCODE_VV(FNMSUB):
2907 case CASE_VMA_OPCODE_LMULS(MADD, VV):
2908 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
2909 // If the tail policy is undisturbed we can't commute.
2910 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
2911 if ((MI.getOperand(i: MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
2912 return false;
2913
2914 // For these instructions we have more freedom. We can commute with the
2915 // other multiplicand or with the addend/subtrahend/minuend.
2916
2917 // Any fixed operand must be from source 1, 2 or 3.
2918 if (SrcOpIdx1 != CommuteAnyOperandIndex && SrcOpIdx1 > 3)
2919 return false;
2920 if (SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx2 > 3)
2921 return false;
2922
2923 // It both ops are fixed one must be the tied source.
2924 if (SrcOpIdx1 != CommuteAnyOperandIndex &&
2925 SrcOpIdx2 != CommuteAnyOperandIndex && SrcOpIdx1 != 1 && SrcOpIdx2 != 1)
2926 return false;
2927
2928 // Look for two different register operands assumed to be commutable
2929 // regardless of the FMA opcode. The FMA opcode is adjusted later if
2930 // needed.
2931 if (SrcOpIdx1 == CommuteAnyOperandIndex ||
2932 SrcOpIdx2 == CommuteAnyOperandIndex) {
2933 // At least one of operands to be commuted is not specified and
2934 // this method is free to choose appropriate commutable operands.
2935 unsigned CommutableOpIdx1 = SrcOpIdx1;
2936 if (SrcOpIdx1 == SrcOpIdx2) {
2937 // Both of operands are not fixed. Set one of commutable
2938 // operands to the tied source.
2939 CommutableOpIdx1 = 1;
2940 } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
2941 // Only one of the operands is not fixed.
2942 CommutableOpIdx1 = SrcOpIdx2;
2943 }
2944
2945 // CommutableOpIdx1 is well defined now. Let's choose another commutable
2946 // operand and assign its index to CommutableOpIdx2.
2947 unsigned CommutableOpIdx2;
2948 if (CommutableOpIdx1 != 1) {
2949 // If we haven't already used the tied source, we must use it now.
2950 CommutableOpIdx2 = 1;
2951 } else {
2952 Register Op1Reg = MI.getOperand(i: CommutableOpIdx1).getReg();
2953
2954 // The commuted operands should have different registers.
2955 // Otherwise, the commute transformation does not change anything and
2956 // is useless. We use this as a hint to make our decision.
2957 if (Op1Reg != MI.getOperand(i: 2).getReg())
2958 CommutableOpIdx2 = 2;
2959 else
2960 CommutableOpIdx2 = 3;
2961 }
2962
2963 // Assign the found pair of commutable indices to SrcOpIdx1 and
2964 // SrcOpIdx2 to return those values.
2965 if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
2966 CommutableOpIdx2))
2967 return false;
2968 }
2969
2970 return true;
2971 }
2972 }
2973
2974 return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
2975}
2976
2977// clang-format off
2978#define CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
2979 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
2980 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
2981 break;
2982
2983#define CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE) \
2984 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1) \
2985 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2) \
2986 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4) \
2987 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8)
2988
2989#define CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE) \
2990 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2) \
2991 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE)
2992
2993#define CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE) \
2994 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4) \
2995 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE)
2996
2997#define CASE_VMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
2998 CASE_VMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8) \
2999 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE)
3000
3001#define CASE_VMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3002 CASE_VMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16) \
3003 CASE_VMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32) \
3004 CASE_VMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64)
3005
3006// VFMA depends on SEW.
3007#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL, SEW) \
3008 case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_##SEW: \
3009 Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_##SEW; \
3010 break;
3011
3012#define CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW) \
3013 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M1, SEW) \
3014 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M2, SEW) \
3015 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M4, SEW) \
3016 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, M8, SEW)
3017
3018#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW) \
3019 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF2, SEW) \
3020 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, TYPE, SEW)
3021
3022#define CASE_VFMA_CHANGE_OPCODE_VV(OLDOP, NEWOP) \
3023 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VV, E16) \
3024 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VV, E32) \
3025 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VV, E64)
3026
3027#define CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW) \
3028 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF4, SEW) \
3029 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, TYPE, SEW)
3030
3031#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE, SEW) \
3032 CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, MF8, SEW) \
3033 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, TYPE, SEW)
3034
3035#define CASE_VFMA_CHANGE_OPCODE_SPLATS(OLDOP, NEWOP) \
3036 CASE_VFMA_CHANGE_OPCODE_LMULS_MF4(OLDOP, NEWOP, VFPR16, E16) \
3037 CASE_VFMA_CHANGE_OPCODE_LMULS_MF2(OLDOP, NEWOP, VFPR32, E32) \
3038 CASE_VFMA_CHANGE_OPCODE_LMULS_M1(OLDOP, NEWOP, VFPR64, E64)
3039
3040MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
3041 bool NewMI,
3042 unsigned OpIdx1,
3043 unsigned OpIdx2) const {
3044 auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
3045 if (NewMI)
3046 return *MI.getParent()->getParent()->CloneMachineInstr(Orig: &MI);
3047 return MI;
3048 };
3049
3050 switch (MI.getOpcode()) {
3051 case RISCV::TH_MVEQZ:
3052 case RISCV::TH_MVNEZ: {
3053 auto &WorkingMI = cloneIfNew(MI);
3054 WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
3055 : RISCV::TH_MVEQZ));
3056 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
3057 OpIdx2);
3058 }
3059 case RISCV::PseudoCCMOVGPRNoX0:
3060 case RISCV::PseudoCCMOVGPR: {
3061 // CCMOV can be commuted by inverting the condition.
3062 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(i: 3).getImm());
3063 CC = RISCVCC::getOppositeBranchCondition(CC);
3064 auto &WorkingMI = cloneIfNew(MI);
3065 WorkingMI.getOperand(i: 3).setImm(CC);
3066 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI*/ false,
3067 OpIdx1, OpIdx2);
3068 }
3069 case CASE_VFMA_SPLATS(FMACC):
3070 case CASE_VFMA_SPLATS(FMADD):
3071 case CASE_VFMA_SPLATS(FMSAC):
3072 case CASE_VFMA_SPLATS(FMSUB):
3073 case CASE_VFMA_SPLATS(FNMACC):
3074 case CASE_VFMA_SPLATS(FNMADD):
3075 case CASE_VFMA_SPLATS(FNMSAC):
3076 case CASE_VFMA_SPLATS(FNMSUB):
3077 case CASE_VFMA_OPCODE_VV(FMACC):
3078 case CASE_VFMA_OPCODE_VV(FMSAC):
3079 case CASE_VFMA_OPCODE_VV(FNMACC):
3080 case CASE_VFMA_OPCODE_VV(FNMSAC):
3081 case CASE_VMA_OPCODE_LMULS(MADD, VX):
3082 case CASE_VMA_OPCODE_LMULS(NMSUB, VX):
3083 case CASE_VMA_OPCODE_LMULS(MACC, VX):
3084 case CASE_VMA_OPCODE_LMULS(NMSAC, VX):
3085 case CASE_VMA_OPCODE_LMULS(MACC, VV):
3086 case CASE_VMA_OPCODE_LMULS(NMSAC, VV): {
3087 // It only make sense to toggle these between clobbering the
3088 // addend/subtrahend/minuend one of the multiplicands.
3089 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3090 assert((OpIdx1 == 3 || OpIdx2 == 3) && "Unexpected opcode index");
3091 unsigned Opc;
3092 switch (MI.getOpcode()) {
3093 default:
3094 llvm_unreachable("Unexpected opcode");
3095 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMACC, FMADD)
3096 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMADD, FMACC)
3097 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSAC, FMSUB)
3098 CASE_VFMA_CHANGE_OPCODE_SPLATS(FMSUB, FMSAC)
3099 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMACC, FNMADD)
3100 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMADD, FNMACC)
3101 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSAC, FNMSUB)
3102 CASE_VFMA_CHANGE_OPCODE_SPLATS(FNMSUB, FNMSAC)
3103 CASE_VFMA_CHANGE_OPCODE_VV(FMACC, FMADD)
3104 CASE_VFMA_CHANGE_OPCODE_VV(FMSAC, FMSUB)
3105 CASE_VFMA_CHANGE_OPCODE_VV(FNMACC, FNMADD)
3106 CASE_VFMA_CHANGE_OPCODE_VV(FNMSAC, FNMSUB)
3107 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VX)
3108 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VX)
3109 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VX)
3110 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VX)
3111 CASE_VMA_CHANGE_OPCODE_LMULS(MACC, MADD, VV)
3112 CASE_VMA_CHANGE_OPCODE_LMULS(NMSAC, NMSUB, VV)
3113 }
3114
3115 auto &WorkingMI = cloneIfNew(MI);
3116 WorkingMI.setDesc(get(Opc));
3117 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3118 OpIdx1, OpIdx2);
3119 }
3120 case CASE_VFMA_OPCODE_VV(FMADD):
3121 case CASE_VFMA_OPCODE_VV(FMSUB):
3122 case CASE_VFMA_OPCODE_VV(FNMADD):
3123 case CASE_VFMA_OPCODE_VV(FNMSUB):
3124 case CASE_VMA_OPCODE_LMULS(MADD, VV):
3125 case CASE_VMA_OPCODE_LMULS(NMSUB, VV): {
3126 assert((OpIdx1 == 1 || OpIdx2 == 1) && "Unexpected opcode index");
3127 // If one of the operands, is the addend we need to change opcode.
3128 // Otherwise we're just swapping 2 of the multiplicands.
3129 if (OpIdx1 == 3 || OpIdx2 == 3) {
3130 unsigned Opc;
3131 switch (MI.getOpcode()) {
3132 default:
3133 llvm_unreachable("Unexpected opcode");
3134 CASE_VFMA_CHANGE_OPCODE_VV(FMADD, FMACC)
3135 CASE_VFMA_CHANGE_OPCODE_VV(FMSUB, FMSAC)
3136 CASE_VFMA_CHANGE_OPCODE_VV(FNMADD, FNMACC)
3137 CASE_VFMA_CHANGE_OPCODE_VV(FNMSUB, FNMSAC)
3138 CASE_VMA_CHANGE_OPCODE_LMULS(MADD, MACC, VV)
3139 CASE_VMA_CHANGE_OPCODE_LMULS(NMSUB, NMSAC, VV)
3140 }
3141
3142 auto &WorkingMI = cloneIfNew(MI);
3143 WorkingMI.setDesc(get(Opc));
3144 return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
3145 OpIdx1, OpIdx2);
3146 }
3147 // Let the default code handle it.
3148 break;
3149 }
3150 }
3151
3152 return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
3153}
3154
3155#undef CASE_VMA_OPCODE_COMMON
3156#undef CASE_VMA_OPCODE_LMULS_M1
3157#undef CASE_VMA_OPCODE_LMULS_MF2
3158#undef CASE_VMA_OPCODE_LMULS_MF4
3159#undef CASE_VMA_OPCODE_LMULS
3160#undef CASE_VFMA_OPCODE_COMMON
3161#undef CASE_VFMA_OPCODE_LMULS_M1
3162#undef CASE_VFMA_OPCODE_LMULS_MF2
3163#undef CASE_VFMA_OPCODE_LMULS_MF4
3164#undef CASE_VFMA_OPCODE_VV
3165#undef CASE_VFMA_SPLATS
3166
3167// clang-format off
3168#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
3169 RISCV::PseudoV##OP##_##LMUL##_TIED
3170
3171#define CASE_WIDEOP_OPCODE_LMULS_MF4(OP) \
3172 CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
3173 case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
3174 case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
3175 case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
3176 case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
3177
3178#define CASE_WIDEOP_OPCODE_LMULS(OP) \
3179 CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
3180 case CASE_WIDEOP_OPCODE_LMULS_MF4(OP)
3181
3182#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
3183 case RISCV::PseudoV##OP##_##LMUL##_TIED: \
3184 NewOpc = RISCV::PseudoV##OP##_##LMUL; \
3185 break;
3186
3187#define CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3188 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
3189 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
3190 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
3191 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
3192 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
3193
3194#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3195 CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
3196 CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3197
3198// FP Widening Ops may by SEW aware. Create SEW aware cases for these cases.
3199#define CASE_FP_WIDEOP_OPCODE_COMMON(OP, LMUL, SEW) \
3200 RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED
3201
3202#define CASE_FP_WIDEOP_OPCODE_LMULS_MF4(OP) \
3203 CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF4, E16): \
3204 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E16): \
3205 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, MF2, E32): \
3206 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E16): \
3207 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M1, E32): \
3208 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E16): \
3209 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M2, E32): \
3210 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E16): \
3211 case CASE_FP_WIDEOP_OPCODE_COMMON(OP, M4, E32) \
3212
3213#define CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL, SEW) \
3214 case RISCV::PseudoV##OP##_##LMUL##_##SEW##_TIED: \
3215 NewOpc = RISCV::PseudoV##OP##_##LMUL##_##SEW; \
3216 break;
3217
3218#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP) \
3219 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4, E16) \
3220 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E16) \
3221 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2, E32) \
3222 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E16) \
3223 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1, E32) \
3224 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E16) \
3225 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2, E32) \
3226 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E16) \
3227 CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4, E32) \
3228
3229#define CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
3230 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(OP)
3231// clang-format on
3232
3233MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
3234 LiveVariables *LV,
3235 LiveIntervals *LIS) const {
3236 MachineInstrBuilder MIB;
3237 switch (MI.getOpcode()) {
3238 default:
3239 return nullptr;
3240 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
3241 case CASE_FP_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
3242 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3243 MI.getNumExplicitOperands() == 7 &&
3244 "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
3245 // If the tail policy is undisturbed we can't convert.
3246 if ((MI.getOperand(i: RISCVII::getVecPolicyOpNum(Desc: MI.getDesc())).getImm() &
3247 1) == 0)
3248 return nullptr;
3249 // clang-format off
3250 unsigned NewOpc;
3251 switch (MI.getOpcode()) {
3252 default:
3253 llvm_unreachable("Unexpected opcode");
3254 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
3255 CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
3256 }
3257 // clang-format on
3258
3259 MachineBasicBlock &MBB = *MI.getParent();
3260 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3261 .add(MI.getOperand(i: 0))
3262 .addReg(MI.getOperand(i: 0).getReg(), RegState::Undef)
3263 .add(MI.getOperand(i: 1))
3264 .add(MI.getOperand(i: 2))
3265 .add(MI.getOperand(i: 3))
3266 .add(MI.getOperand(i: 4))
3267 .add(MI.getOperand(i: 5))
3268 .add(MI.getOperand(i: 6));
3269 break;
3270 }
3271 case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
3272 case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
3273 case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
3274 case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
3275 // If the tail policy is undisturbed we can't convert.
3276 assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
3277 MI.getNumExplicitOperands() == 6);
3278 if ((MI.getOperand(i: 5).getImm() & 1) == 0)
3279 return nullptr;
3280
3281 // clang-format off
3282 unsigned NewOpc;
3283 switch (MI.getOpcode()) {
3284 default:
3285 llvm_unreachable("Unexpected opcode");
3286 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
3287 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
3288 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
3289 CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
3290 }
3291 // clang-format on
3292
3293 MachineBasicBlock &MBB = *MI.getParent();
3294 MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
3295 .add(MI.getOperand(i: 0))
3296 .addReg(MI.getOperand(i: 0).getReg(), RegState::Undef)
3297 .add(MI.getOperand(i: 1))
3298 .add(MI.getOperand(i: 2))
3299 .add(MI.getOperand(i: 3))
3300 .add(MI.getOperand(i: 4))
3301 .add(MI.getOperand(i: 5));
3302 break;
3303 }
3304 }
3305 MIB.copyImplicitOps(OtherMI: MI);
3306
3307 if (LV) {
3308 unsigned NumOps = MI.getNumOperands();
3309 for (unsigned I = 1; I < NumOps; ++I) {
3310 MachineOperand &Op = MI.getOperand(i: I);
3311 if (Op.isReg() && Op.isKill())
3312 LV->replaceKillInstruction(Reg: Op.getReg(), OldMI&: MI, NewMI&: *MIB);
3313 }
3314 }
3315
3316 if (LIS) {
3317 SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, NewMI&: *MIB);
3318
3319 if (MI.getOperand(i: 0).isEarlyClobber()) {
3320 // Use operand 1 was tied to early-clobber def operand 0, so its live
3321 // interval could have ended at an early-clobber slot. Now they are not
3322 // tied we need to update it to the normal register slot.
3323 LiveInterval &LI = LIS->getInterval(Reg: MI.getOperand(i: 1).getReg());
3324 LiveRange::Segment *S = LI.getSegmentContaining(Idx);
3325 if (S->end == Idx.getRegSlot(EC: true))
3326 S->end = Idx.getRegSlot();
3327 }
3328 }
3329
3330 return MIB;
3331}
3332
3333#undef CASE_WIDEOP_OPCODE_COMMON
3334#undef CASE_WIDEOP_OPCODE_LMULS_MF4
3335#undef CASE_WIDEOP_OPCODE_LMULS
3336#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
3337#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3338#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
3339#undef CASE_FP_WIDEOP_OPCODE_COMMON
3340#undef CASE_FP_WIDEOP_OPCODE_LMULS_MF4
3341#undef CASE_FP_WIDEOP_CHANGE_OPCODE_COMMON
3342#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS_MF4
3343#undef CASE_FP_WIDEOP_CHANGE_OPCODE_LMULS
3344
3345void RISCVInstrInfo::mulImm(MachineFunction &MF, MachineBasicBlock &MBB,
3346 MachineBasicBlock::iterator II, const DebugLoc &DL,
3347 Register DestReg, uint32_t Amount,
3348 MachineInstr::MIFlag Flag) const {
3349 MachineRegisterInfo &MRI = MF.getRegInfo();
3350 if (llvm::has_single_bit<uint32_t>(Value: Amount)) {
3351 uint32_t ShiftAmount = Log2_32(Value: Amount);
3352 if (ShiftAmount == 0)
3353 return;
3354 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3355 .addReg(DestReg, RegState::Kill)
3356 .addImm(ShiftAmount)
3357 .setMIFlag(Flag);
3358 } else if (STI.hasStdExtZba() &&
3359 ((Amount % 3 == 0 && isPowerOf2_64(Value: Amount / 3)) ||
3360 (Amount % 5 == 0 && isPowerOf2_64(Value: Amount / 5)) ||
3361 (Amount % 9 == 0 && isPowerOf2_64(Value: Amount / 9)))) {
3362 // We can use Zba SHXADD+SLLI instructions for multiply in some cases.
3363 unsigned Opc;
3364 uint32_t ShiftAmount;
3365 if (Amount % 9 == 0) {
3366 Opc = RISCV::SH3ADD;
3367 ShiftAmount = Log2_64(Value: Amount / 9);
3368 } else if (Amount % 5 == 0) {
3369 Opc = RISCV::SH2ADD;
3370 ShiftAmount = Log2_64(Value: Amount / 5);
3371 } else if (Amount % 3 == 0) {
3372 Opc = RISCV::SH1ADD;
3373 ShiftAmount = Log2_64(Value: Amount / 3);
3374 } else {
3375 llvm_unreachable("implied by if-clause");
3376 }
3377 if (ShiftAmount)
3378 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3379 .addReg(DestReg, RegState::Kill)
3380 .addImm(ShiftAmount)
3381 .setMIFlag(Flag);
3382 BuildMI(MBB, II, DL, get(Opc), DestReg)
3383 .addReg(DestReg, RegState::Kill)
3384 .addReg(DestReg)
3385 .setMIFlag(Flag);
3386 } else if (llvm::has_single_bit<uint32_t>(Value: Amount - 1)) {
3387 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3388 uint32_t ShiftAmount = Log2_32(Value: Amount - 1);
3389 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3390 .addReg(DestReg)
3391 .addImm(ShiftAmount)
3392 .setMIFlag(Flag);
3393 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3394 .addReg(ScaledRegister, RegState::Kill)
3395 .addReg(DestReg, RegState::Kill)
3396 .setMIFlag(Flag);
3397 } else if (llvm::has_single_bit<uint32_t>(Value: Amount + 1)) {
3398 Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3399 uint32_t ShiftAmount = Log2_32(Value: Amount + 1);
3400 BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
3401 .addReg(DestReg)
3402 .addImm(ShiftAmount)
3403 .setMIFlag(Flag);
3404 BuildMI(MBB, II, DL, get(RISCV::SUB), DestReg)
3405 .addReg(ScaledRegister, RegState::Kill)
3406 .addReg(DestReg, RegState::Kill)
3407 .setMIFlag(Flag);
3408 } else if (STI.hasStdExtM() || STI.hasStdExtZmmul()) {
3409 Register N = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3410 movImm(MBB, MBBI: II, DL, DstReg: N, Val: Amount, Flag);
3411 BuildMI(MBB, II, DL, get(RISCV::MUL), DestReg)
3412 .addReg(DestReg, RegState::Kill)
3413 .addReg(N, RegState::Kill)
3414 .setMIFlag(Flag);
3415 } else {
3416 Register Acc;
3417 uint32_t PrevShiftAmount = 0;
3418 for (uint32_t ShiftAmount = 0; Amount >> ShiftAmount; ShiftAmount++) {
3419 if (Amount & (1U << ShiftAmount)) {
3420 if (ShiftAmount)
3421 BuildMI(MBB, II, DL, get(RISCV::SLLI), DestReg)
3422 .addReg(DestReg, RegState::Kill)
3423 .addImm(ShiftAmount - PrevShiftAmount)
3424 .setMIFlag(Flag);
3425 if (Amount >> (ShiftAmount + 1)) {
3426 // If we don't have an accmulator yet, create it and copy DestReg.
3427 if (!Acc) {
3428 Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3429 BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3430 .addReg(DestReg)
3431 .setMIFlag(Flag);
3432 } else {
3433 BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3434 .addReg(Acc, RegState::Kill)
3435 .addReg(DestReg)
3436 .setMIFlag(Flag);
3437 }
3438 }
3439 PrevShiftAmount = ShiftAmount;
3440 }
3441 }
3442 assert(Acc && "Expected valid accumulator");
3443 BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
3444 .addReg(DestReg, RegState::Kill)
3445 .addReg(Acc, RegState::Kill)
3446 .setMIFlag(Flag);
3447 }
3448}
3449
3450ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
3451RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
3452 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
3453 {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
3454 {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
3455 return ArrayRef(TargetFlags);
3456}
3457
3458// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
3459bool RISCV::isSEXT_W(const MachineInstr &MI) {
3460 return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&
3461 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0;
3462}
3463
3464// Returns true if this is the zext.w pattern, adduw rd, rs1, x0.
3465bool RISCV::isZEXT_W(const MachineInstr &MI) {
3466 return MI.getOpcode() == RISCV::ADD_UW && MI.getOperand(1).isReg() &&
3467 MI.getOperand(2).isReg() && MI.getOperand(2).getReg() == RISCV::X0;
3468}
3469
3470// Returns true if this is the zext.b pattern, andi rd, rs1, 255.
3471bool RISCV::isZEXT_B(const MachineInstr &MI) {
3472 return MI.getOpcode() == RISCV::ANDI && MI.getOperand(1).isReg() &&
3473 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 255;
3474}
3475
3476static bool isRVVWholeLoadStore(unsigned Opcode) {
3477 switch (Opcode) {
3478 default:
3479 return false;
3480 case RISCV::VS1R_V:
3481 case RISCV::VS2R_V:
3482 case RISCV::VS4R_V:
3483 case RISCV::VS8R_V:
3484 case RISCV::VL1RE8_V:
3485 case RISCV::VL2RE8_V:
3486 case RISCV::VL4RE8_V:
3487 case RISCV::VL8RE8_V:
3488 case RISCV::VL1RE16_V:
3489 case RISCV::VL2RE16_V:
3490 case RISCV::VL4RE16_V:
3491 case RISCV::VL8RE16_V:
3492 case RISCV::VL1RE32_V:
3493 case RISCV::VL2RE32_V:
3494 case RISCV::VL4RE32_V:
3495 case RISCV::VL8RE32_V:
3496 case RISCV::VL1RE64_V:
3497 case RISCV::VL2RE64_V:
3498 case RISCV::VL4RE64_V:
3499 case RISCV::VL8RE64_V:
3500 return true;
3501 }
3502}
3503
3504bool RISCV::isRVVSpill(const MachineInstr &MI) {
3505 // RVV lacks any support for immediate addressing for stack addresses, so be
3506 // conservative.
3507 unsigned Opcode = MI.getOpcode();
3508 if (!RISCVVPseudosTable::getPseudoInfo(Opcode) &&
3509 !isRVVWholeLoadStore(Opcode) && !isRVVSpillForZvlsseg(Opcode))
3510 return false;
3511 return true;
3512}
3513
3514std::optional<std::pair<unsigned, unsigned>>
3515RISCV::isRVVSpillForZvlsseg(unsigned Opcode) {
3516 switch (Opcode) {
3517 default:
3518 return std::nullopt;
3519 case RISCV::PseudoVSPILL2_M1:
3520 case RISCV::PseudoVRELOAD2_M1:
3521 return std::make_pair(x: 2u, y: 1u);
3522 case RISCV::PseudoVSPILL2_M2:
3523 case RISCV::PseudoVRELOAD2_M2:
3524 return std::make_pair(x: 2u, y: 2u);
3525 case RISCV::PseudoVSPILL2_M4:
3526 case RISCV::PseudoVRELOAD2_M4:
3527 return std::make_pair(x: 2u, y: 4u);
3528 case RISCV::PseudoVSPILL3_M1:
3529 case RISCV::PseudoVRELOAD3_M1:
3530 return std::make_pair(x: 3u, y: 1u);
3531 case RISCV::PseudoVSPILL3_M2:
3532 case RISCV::PseudoVRELOAD3_M2:
3533 return std::make_pair(x: 3u, y: 2u);
3534 case RISCV::PseudoVSPILL4_M1:
3535 case RISCV::PseudoVRELOAD4_M1:
3536 return std::make_pair(x: 4u, y: 1u);
3537 case RISCV::PseudoVSPILL4_M2:
3538 case RISCV::PseudoVRELOAD4_M2:
3539 return std::make_pair(x: 4u, y: 2u);
3540 case RISCV::PseudoVSPILL5_M1:
3541 case RISCV::PseudoVRELOAD5_M1:
3542 return std::make_pair(x: 5u, y: 1u);
3543 case RISCV::PseudoVSPILL6_M1:
3544 case RISCV::PseudoVRELOAD6_M1:
3545 return std::make_pair(x: 6u, y: 1u);
3546 case RISCV::PseudoVSPILL7_M1:
3547 case RISCV::PseudoVRELOAD7_M1:
3548 return std::make_pair(x: 7u, y: 1u);
3549 case RISCV::PseudoVSPILL8_M1:
3550 case RISCV::PseudoVRELOAD8_M1:
3551 return std::make_pair(x: 8u, y: 1u);
3552 }
3553}
3554
3555bool RISCV::isFaultFirstLoad(const MachineInstr &MI) {
3556 return MI.getNumExplicitDefs() == 2 &&
3557 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) && !MI.isInlineAsm();
3558}
3559
3560bool RISCV::hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2) {
3561 int16_t MI1FrmOpIdx =
3562 RISCV::getNamedOperandIdx(MI1.getOpcode(), RISCV::OpName::frm);
3563 int16_t MI2FrmOpIdx =
3564 RISCV::getNamedOperandIdx(MI2.getOpcode(), RISCV::OpName::frm);
3565 if (MI1FrmOpIdx < 0 || MI2FrmOpIdx < 0)
3566 return false;
3567 MachineOperand FrmOp1 = MI1.getOperand(i: MI1FrmOpIdx);
3568 MachineOperand FrmOp2 = MI2.getOperand(i: MI2FrmOpIdx);
3569 return FrmOp1.getImm() == FrmOp2.getImm();
3570}
3571
3572std::optional<unsigned>
3573RISCV::getVectorLowDemandedScalarBits(uint16_t Opcode, unsigned Log2SEW) {
3574 // TODO: Handle Zvbb instructions
3575 switch (Opcode) {
3576 default:
3577 return std::nullopt;
3578
3579 // 11.6. Vector Single-Width Shift Instructions
3580 case RISCV::VSLL_VX:
3581 case RISCV::VSRL_VX:
3582 case RISCV::VSRA_VX:
3583 // 12.4. Vector Single-Width Scaling Shift Instructions
3584 case RISCV::VSSRL_VX:
3585 case RISCV::VSSRA_VX:
3586 // Only the low lg2(SEW) bits of the shift-amount value are used.
3587 return Log2SEW;
3588
3589 // 11.7 Vector Narrowing Integer Right Shift Instructions
3590 case RISCV::VNSRL_WX:
3591 case RISCV::VNSRA_WX:
3592 // 12.5. Vector Narrowing Fixed-Point Clip Instructions
3593 case RISCV::VNCLIPU_WX:
3594 case RISCV::VNCLIP_WX:
3595 // Only the low lg2(2*SEW) bits of the shift-amount value are used.
3596 return Log2SEW + 1;
3597
3598 // 11.1. Vector Single-Width Integer Add and Subtract
3599 case RISCV::VADD_VX:
3600 case RISCV::VSUB_VX:
3601 case RISCV::VRSUB_VX:
3602 // 11.2. Vector Widening Integer Add/Subtract
3603 case RISCV::VWADDU_VX:
3604 case RISCV::VWSUBU_VX:
3605 case RISCV::VWADD_VX:
3606 case RISCV::VWSUB_VX:
3607 case RISCV::VWADDU_WX:
3608 case RISCV::VWSUBU_WX:
3609 case RISCV::VWADD_WX:
3610 case RISCV::VWSUB_WX:
3611 // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
3612 case RISCV::VADC_VXM:
3613 case RISCV::VADC_VIM:
3614 case RISCV::VMADC_VXM:
3615 case RISCV::VMADC_VIM:
3616 case RISCV::VMADC_VX:
3617 case RISCV::VSBC_VXM:
3618 case RISCV::VMSBC_VXM:
3619 case RISCV::VMSBC_VX:
3620 // 11.5 Vector Bitwise Logical Instructions
3621 case RISCV::VAND_VX:
3622 case RISCV::VOR_VX:
3623 case RISCV::VXOR_VX:
3624 // 11.8. Vector Integer Compare Instructions
3625 case RISCV::VMSEQ_VX:
3626 case RISCV::VMSNE_VX:
3627 case RISCV::VMSLTU_VX:
3628 case RISCV::VMSLT_VX:
3629 case RISCV::VMSLEU_VX:
3630 case RISCV::VMSLE_VX:
3631 case RISCV::VMSGTU_VX:
3632 case RISCV::VMSGT_VX:
3633 // 11.9. Vector Integer Min/Max Instructions
3634 case RISCV::VMINU_VX:
3635 case RISCV::VMIN_VX:
3636 case RISCV::VMAXU_VX:
3637 case RISCV::VMAX_VX:
3638 // 11.10. Vector Single-Width Integer Multiply Instructions
3639 case RISCV::VMUL_VX:
3640 case RISCV::VMULH_VX:
3641 case RISCV::VMULHU_VX:
3642 case RISCV::VMULHSU_VX:
3643 // 11.11. Vector Integer Divide Instructions
3644 case RISCV::VDIVU_VX:
3645 case RISCV::VDIV_VX:
3646 case RISCV::VREMU_VX:
3647 case RISCV::VREM_VX:
3648 // 11.12. Vector Widening Integer Multiply Instructions
3649 case RISCV::VWMUL_VX:
3650 case RISCV::VWMULU_VX:
3651 case RISCV::VWMULSU_VX:
3652 // 11.13. Vector Single-Width Integer Multiply-Add Instructions
3653 case RISCV::VMACC_VX:
3654 case RISCV::VNMSAC_VX:
3655 case RISCV::VMADD_VX:
3656 case RISCV::VNMSUB_VX:
3657 // 11.14. Vector Widening Integer Multiply-Add Instructions
3658 case RISCV::VWMACCU_VX:
3659 case RISCV::VWMACC_VX:
3660 case RISCV::VWMACCSU_VX:
3661 case RISCV::VWMACCUS_VX:
3662 // 11.15. Vector Integer Merge Instructions
3663 case RISCV::VMERGE_VXM:
3664 // 11.16. Vector Integer Move Instructions
3665 case RISCV::VMV_V_X:
3666 // 12.1. Vector Single-Width Saturating Add and Subtract
3667 case RISCV::VSADDU_VX:
3668 case RISCV::VSADD_VX:
3669 case RISCV::VSSUBU_VX:
3670 case RISCV::VSSUB_VX:
3671 // 12.2. Vector Single-Width Averaging Add and Subtract
3672 case RISCV::VAADDU_VX:
3673 case RISCV::VAADD_VX:
3674 case RISCV::VASUBU_VX:
3675 case RISCV::VASUB_VX:
3676 // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
3677 case RISCV::VSMUL_VX:
3678 // 16.1. Integer Scalar Move Instructions
3679 case RISCV::VMV_S_X:
3680 return 1U << Log2SEW;
3681 }
3682}
3683
3684unsigned RISCV::getRVVMCOpcode(unsigned RVVPseudoOpcode) {
3685 const RISCVVPseudosTable::PseudoInfo *RVV =
3686 RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
3687 if (!RVV)
3688 return 0;
3689 return RVV->BaseInstr;
3690}
3691

source code of llvm/lib/Target/RISCV/RISCVInstrInfo.cpp