1//===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a function pass that inserts VSETVLI instructions where
10// needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
11// instructions.
12//
13// This pass consists of 3 phases:
14//
15// Phase 1 collects how each basic block affects VL/VTYPE.
16//
17// Phase 2 uses the information from phase 1 to do a data flow analysis to
18// propagate the VL/VTYPE changes through the function. This gives us the
19// VL/VTYPE at the start of each basic block.
20//
21// Phase 3 inserts VSETVLI instructions in each basic block. Information from
22// phase 2 is used to prevent inserting a VSETVLI before the first vector
23// instruction in the block if possible.
24//
25//===----------------------------------------------------------------------===//
26
27#include "RISCV.h"
28#include "RISCVSubtarget.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/CodeGen/LiveDebugVariables.h"
31#include "llvm/CodeGen/LiveIntervals.h"
32#include "llvm/CodeGen/LiveStacks.h"
33#include "llvm/CodeGen/MachineFunctionPass.h"
34#include <queue>
35using namespace llvm;
36
37#define DEBUG_TYPE "riscv-insert-vsetvli"
38#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
39#define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass"
40
41STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted");
42STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced");
43
44static cl::opt<bool> DisableInsertVSETVLPHIOpt(
45 "riscv-disable-insert-vsetvl-phi-opt", cl::init(Val: false), cl::Hidden,
46 cl::desc("Disable looking through phis when inserting vsetvlis."));
47
48static cl::opt<bool> UseStrictAsserts(
49 "riscv-insert-vsetvl-strict-asserts", cl::init(Val: true), cl::Hidden,
50 cl::desc("Enable strict assertion checking for the dataflow algorithm"));
51
52namespace {
53
54static unsigned getVLOpNum(const MachineInstr &MI) {
55 return RISCVII::getVLOpNum(Desc: MI.getDesc());
56}
57
58static unsigned getSEWOpNum(const MachineInstr &MI) {
59 return RISCVII::getSEWOpNum(Desc: MI.getDesc());
60}
61
62static bool isVectorConfigInstr(const MachineInstr &MI) {
63 return MI.getOpcode() == RISCV::PseudoVSETVLI ||
64 MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
65 MI.getOpcode() == RISCV::PseudoVSETIVLI;
66}
67
68/// Return true if this is 'vsetvli x0, x0, vtype' which preserves
69/// VL and only sets VTYPE.
70static bool isVLPreservingConfig(const MachineInstr &MI) {
71 if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
72 return false;
73 assert(RISCV::X0 == MI.getOperand(1).getReg());
74 return RISCV::X0 == MI.getOperand(i: 0).getReg();
75}
76
77static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) {
78 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
79 default:
80 return false;
81 case RISCV::VFMV_S_F:
82 case RISCV::VFMV_V_F:
83 return true;
84 }
85}
86
87static bool isScalarExtractInstr(const MachineInstr &MI) {
88 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
89 default:
90 return false;
91 case RISCV::VMV_X_S:
92 case RISCV::VFMV_F_S:
93 return true;
94 }
95}
96
97static bool isScalarInsertInstr(const MachineInstr &MI) {
98 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
99 default:
100 return false;
101 case RISCV::VMV_S_X:
102 case RISCV::VFMV_S_F:
103 return true;
104 }
105}
106
107static bool isScalarSplatInstr(const MachineInstr &MI) {
108 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
109 default:
110 return false;
111 case RISCV::VMV_V_I:
112 case RISCV::VMV_V_X:
113 case RISCV::VFMV_V_F:
114 return true;
115 }
116}
117
118static bool isVSlideInstr(const MachineInstr &MI) {
119 switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
120 default:
121 return false;
122 case RISCV::VSLIDEDOWN_VX:
123 case RISCV::VSLIDEDOWN_VI:
124 case RISCV::VSLIDEUP_VX:
125 case RISCV::VSLIDEUP_VI:
126 return true;
127 }
128}
129
130/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
131/// not a load or store which ignores SEW.
132static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
133 switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) {
134 default:
135 return std::nullopt;
136 case RISCV::VLE8_V:
137 case RISCV::VLSE8_V:
138 case RISCV::VSE8_V:
139 case RISCV::VSSE8_V:
140 return 8;
141 case RISCV::VLE16_V:
142 case RISCV::VLSE16_V:
143 case RISCV::VSE16_V:
144 case RISCV::VSSE16_V:
145 return 16;
146 case RISCV::VLE32_V:
147 case RISCV::VLSE32_V:
148 case RISCV::VSE32_V:
149 case RISCV::VSSE32_V:
150 return 32;
151 case RISCV::VLE64_V:
152 case RISCV::VLSE64_V:
153 case RISCV::VSE64_V:
154 case RISCV::VSSE64_V:
155 return 64;
156 }
157}
158
159static bool isNonZeroLoadImmediate(MachineInstr &MI) {
160 return MI.getOpcode() == RISCV::ADDI &&
161 MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() &&
162 MI.getOperand(i: 1).getReg() == RISCV::X0 &&
163 MI.getOperand(i: 2).getImm() != 0;
164}
165
166/// Return true if this is an operation on mask registers. Note that
167/// this includes both arithmetic/logical ops and load/store (vlm/vsm).
168static bool isMaskRegOp(const MachineInstr &MI) {
169 if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags))
170 return false;
171 const unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm();
172 // A Log2SEW of 0 is an operation on mask registers only.
173 return Log2SEW == 0;
174}
175
176/// Return true if the inactive elements in the result are entirely undefined.
177/// Note that this is different from "agnostic" as defined by the vector
178/// specification. Agnostic requires each lane to either be undisturbed, or
179/// take the value -1; no other value is allowed.
180static bool hasUndefinedMergeOp(const MachineInstr &MI,
181 const MachineRegisterInfo &MRI) {
182
183 unsigned UseOpIdx;
184 if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx))
185 // If there is no passthrough operand, then the pass through
186 // lanes are undefined.
187 return true;
188
189 // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose
190 // operands are solely IMPLICIT_DEFS, then the pass through lanes are
191 // undefined.
192 const MachineOperand &UseMO = MI.getOperand(i: UseOpIdx);
193 if (UseMO.getReg() == RISCV::NoRegister)
194 return true;
195
196 if (UseMO.isUndef())
197 return true;
198 if (UseMO.getReg().isPhysical())
199 return false;
200
201 if (MachineInstr *UseMI = MRI.getVRegDef(Reg: UseMO.getReg())) {
202 if (UseMI->isImplicitDef())
203 return true;
204
205 if (UseMI->isRegSequence()) {
206 for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
207 MachineInstr *SourceMI = MRI.getVRegDef(Reg: UseMI->getOperand(i).getReg());
208 if (!SourceMI || !SourceMI->isImplicitDef())
209 return false;
210 }
211 return true;
212 }
213 }
214 return false;
215}
216
217/// Which subfields of VL or VTYPE have values we need to preserve?
218struct DemandedFields {
219 // Some unknown property of VL is used. If demanded, must preserve entire
220 // value.
221 bool VLAny = false;
222 // Only zero vs non-zero is used. If demanded, can change non-zero values.
223 bool VLZeroness = false;
224 // What properties of SEW we need to preserve.
225 enum : uint8_t {
226 SEWEqual = 3, // The exact value of SEW needs to be preserved.
227 SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater
228 // than or equal to the original value.
229 SEWGreaterThanOrEqualAndLessThan64 =
230 1, // SEW can be changed as long as it's greater
231 // than or equal to the original value, but must be less
232 // than 64.
233 SEWNone = 0 // We don't need to preserve SEW at all.
234 } SEW = SEWNone;
235 bool LMUL = false;
236 bool SEWLMULRatio = false;
237 bool TailPolicy = false;
238 bool MaskPolicy = false;
239
240 // Return true if any part of VTYPE was used
241 bool usedVTYPE() const {
242 return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
243 }
244
245 // Return true if any property of VL was used
246 bool usedVL() {
247 return VLAny || VLZeroness;
248 }
249
250 // Mark all VTYPE subfields and properties as demanded
251 void demandVTYPE() {
252 SEW = SEWEqual;
253 LMUL = true;
254 SEWLMULRatio = true;
255 TailPolicy = true;
256 MaskPolicy = true;
257 }
258
259 // Mark all VL properties as demanded
260 void demandVL() {
261 VLAny = true;
262 VLZeroness = true;
263 }
264
265#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
266 /// Support for debugging, callable in GDB: V->dump()
267 LLVM_DUMP_METHOD void dump() const {
268 print(OS&: dbgs());
269 dbgs() << "\n";
270 }
271
272 /// Implement operator<<.
273 void print(raw_ostream &OS) const {
274 OS << "{";
275 OS << "VLAny=" << VLAny << ", ";
276 OS << "VLZeroness=" << VLZeroness << ", ";
277 OS << "SEW=";
278 switch (SEW) {
279 case SEWEqual:
280 OS << "SEWEqual";
281 break;
282 case SEWGreaterThanOrEqual:
283 OS << "SEWGreaterThanOrEqual";
284 break;
285 case SEWGreaterThanOrEqualAndLessThan64:
286 OS << "SEWGreaterThanOrEqualAndLessThan64";
287 break;
288 case SEWNone:
289 OS << "SEWNone";
290 break;
291 };
292 OS << ", ";
293 OS << "LMUL=" << LMUL << ", ";
294 OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
295 OS << "TailPolicy=" << TailPolicy << ", ";
296 OS << "MaskPolicy=" << MaskPolicy;
297 OS << "}";
298 }
299#endif
300};
301
302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
303LLVM_ATTRIBUTE_USED
304inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
305 DF.print(OS);
306 return OS;
307}
308#endif
309
310/// Return true if moving from CurVType to NewVType is
311/// indistinguishable from the perspective of an instruction (or set
312/// of instructions) which use only the Used subfields and properties.
313static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
314 const DemandedFields &Used) {
315 switch (Used.SEW) {
316 case DemandedFields::SEWNone:
317 break;
318 case DemandedFields::SEWEqual:
319 if (RISCVVType::getSEW(VType: CurVType) != RISCVVType::getSEW(VType: NewVType))
320 return false;
321 break;
322 case DemandedFields::SEWGreaterThanOrEqual:
323 if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType))
324 return false;
325 break;
326 case DemandedFields::SEWGreaterThanOrEqualAndLessThan64:
327 if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType) ||
328 RISCVVType::getSEW(VType: NewVType) >= 64)
329 return false;
330 break;
331 }
332
333 if (Used.LMUL &&
334 RISCVVType::getVLMUL(VType: CurVType) != RISCVVType::getVLMUL(VType: NewVType))
335 return false;
336
337 if (Used.SEWLMULRatio) {
338 auto Ratio1 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: CurVType),
339 VLMul: RISCVVType::getVLMUL(VType: CurVType));
340 auto Ratio2 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: NewVType),
341 VLMul: RISCVVType::getVLMUL(VType: NewVType));
342 if (Ratio1 != Ratio2)
343 return false;
344 }
345
346 if (Used.TailPolicy && RISCVVType::isTailAgnostic(VType: CurVType) !=
347 RISCVVType::isTailAgnostic(VType: NewVType))
348 return false;
349 if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(VType: CurVType) !=
350 RISCVVType::isMaskAgnostic(VType: NewVType))
351 return false;
352 return true;
353}
354
355/// Return the fields and properties demanded by the provided instruction.
356DemandedFields getDemanded(const MachineInstr &MI,
357 const MachineRegisterInfo *MRI,
358 const RISCVSubtarget *ST) {
359 // Warning: This function has to work on both the lowered (i.e. post
360 // emitVSETVLIs) and pre-lowering forms. The main implication of this is
361 // that it can't use the value of a SEW, VL, or Policy operand as they might
362 // be stale after lowering.
363
364 // Most instructions don't use any of these subfeilds.
365 DemandedFields Res;
366 // Start conservative if registers are used
367 if (MI.isCall() || MI.isInlineAsm() ||
368 MI.readsRegister(RISCV::Reg: VL, /*TRI=*/nullptr))
369 Res.demandVL();
370 if (MI.isCall() || MI.isInlineAsm() ||
371 MI.readsRegister(RISCV::Reg: VTYPE, /*TRI=*/nullptr))
372 Res.demandVTYPE();
373 // Start conservative on the unlowered form too
374 uint64_t TSFlags = MI.getDesc().TSFlags;
375 if (RISCVII::hasSEWOp(TSFlags)) {
376 Res.demandVTYPE();
377 if (RISCVII::hasVLOp(TSFlags))
378 Res.demandVL();
379
380 // Behavior is independent of mask policy.
381 if (!RISCVII::usesMaskPolicy(TSFlags))
382 Res.MaskPolicy = false;
383 }
384
385 // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
386 // They instead demand the ratio of the two which is used in computing
387 // EMUL, but which allows us the flexibility to change SEW and LMUL
388 // provided we don't change the ratio.
389 // Note: We assume that the instructions initial SEW is the EEW encoded
390 // in the opcode. This is asserted when constructing the VSETVLIInfo.
391 if (getEEWForLoadStore(MI)) {
392 Res.SEW = DemandedFields::SEWNone;
393 Res.LMUL = false;
394 }
395
396 // Store instructions don't use the policy fields.
397 if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
398 Res.TailPolicy = false;
399 Res.MaskPolicy = false;
400 }
401
402 // If this is a mask reg operation, it only cares about VLMAX.
403 // TODO: Possible extensions to this logic
404 // * Probably ok if available VLMax is larger than demanded
405 // * The policy bits can probably be ignored..
406 if (isMaskRegOp(MI)) {
407 Res.SEW = DemandedFields::SEWNone;
408 Res.LMUL = false;
409 }
410
411 // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
412 if (isScalarInsertInstr(MI)) {
413 Res.LMUL = false;
414 Res.SEWLMULRatio = false;
415 Res.VLAny = false;
416 // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
417 // need to preserve any other bits and are thus compatible with any larger,
418 // etype and can disregard policy bits. Warning: It's tempting to try doing
419 // this for any tail agnostic operation, but we can't as TA requires
420 // tail lanes to either be the original value or -1. We are writing
421 // unknown bits to the lanes here.
422 if (hasUndefinedMergeOp(MI, MRI: *MRI)) {
423 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
424 Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
425 else
426 Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
427 Res.TailPolicy = false;
428 }
429 }
430
431 // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW.
432 if (isScalarExtractInstr(MI)) {
433 assert(!RISCVII::hasVLOp(TSFlags));
434 Res.LMUL = false;
435 Res.SEWLMULRatio = false;
436 Res.TailPolicy = false;
437 Res.MaskPolicy = false;
438 }
439
440 return Res;
441}
442
443/// Defines the abstract state with which the forward dataflow models the
444/// values of the VL and VTYPE registers after insertion.
445class VSETVLIInfo {
446 union {
447 Register AVLReg;
448 unsigned AVLImm;
449 };
450
451 enum : uint8_t {
452 Uninitialized,
453 AVLIsReg,
454 AVLIsImm,
455 Unknown,
456 } State = Uninitialized;
457
458 // Fields from VTYPE.
459 RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
460 uint8_t SEW = 0;
461 uint8_t TailAgnostic : 1;
462 uint8_t MaskAgnostic : 1;
463 uint8_t SEWLMULRatioOnly : 1;
464
465public:
466 VSETVLIInfo()
467 : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
468 SEWLMULRatioOnly(false) {}
469
470 static VSETVLIInfo getUnknown() {
471 VSETVLIInfo Info;
472 Info.setUnknown();
473 return Info;
474 }
475
476 bool isValid() const { return State != Uninitialized; }
477 void setUnknown() { State = Unknown; }
478 bool isUnknown() const { return State == Unknown; }
479
480 void setAVLReg(Register Reg) {
481 assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister);
482 AVLReg = Reg;
483 State = AVLIsReg;
484 }
485
486 void setAVLImm(unsigned Imm) {
487 AVLImm = Imm;
488 State = AVLIsImm;
489 }
490
491 bool hasAVLImm() const { return State == AVLIsImm; }
492 bool hasAVLReg() const { return State == AVLIsReg; }
493 Register getAVLReg() const {
494 assert(hasAVLReg());
495 return AVLReg;
496 }
497 unsigned getAVLImm() const {
498 assert(hasAVLImm());
499 return AVLImm;
500 }
501
502 void setAVL(VSETVLIInfo Info) {
503 assert(Info.isValid());
504 if (Info.isUnknown())
505 setUnknown();
506 else if (Info.hasAVLReg())
507 setAVLReg(Info.getAVLReg());
508 else {
509 assert(Info.hasAVLImm());
510 setAVLImm(Info.getAVLImm());
511 }
512 }
513
514 unsigned getSEW() const { return SEW; }
515 RISCVII::VLMUL getVLMUL() const { return VLMul; }
516 bool getTailAgnostic() const { return TailAgnostic; }
517 bool getMaskAgnostic() const { return MaskAgnostic; }
518
519 bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
520 if (hasAVLImm())
521 return getAVLImm() > 0;
522 if (hasAVLReg()) {
523 if (getAVLReg() == RISCV::X0)
524 return true;
525 if (MachineInstr *MI = MRI.getVRegDef(Reg: getAVLReg());
526 MI && isNonZeroLoadImmediate(MI&: *MI))
527 return true;
528 return false;
529 }
530 return false;
531 }
532
533 bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
534 const MachineRegisterInfo &MRI) const {
535 if (hasSameAVL(Other))
536 return true;
537 return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
538 }
539
540 bool hasSameAVL(const VSETVLIInfo &Other) const {
541 if (hasAVLReg() && Other.hasAVLReg())
542 return getAVLReg() == Other.getAVLReg();
543
544 if (hasAVLImm() && Other.hasAVLImm())
545 return getAVLImm() == Other.getAVLImm();
546
547 return false;
548 }
549
550 void setVTYPE(unsigned VType) {
551 assert(isValid() && !isUnknown() &&
552 "Can't set VTYPE for uninitialized or unknown");
553 VLMul = RISCVVType::getVLMUL(VType);
554 SEW = RISCVVType::getSEW(VType);
555 TailAgnostic = RISCVVType::isTailAgnostic(VType);
556 MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
557 }
558 void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
559 assert(isValid() && !isUnknown() &&
560 "Can't set VTYPE for uninitialized or unknown");
561 VLMul = L;
562 SEW = S;
563 TailAgnostic = TA;
564 MaskAgnostic = MA;
565 }
566
567 void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }
568
569 unsigned encodeVTYPE() const {
570 assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
571 "Can't encode VTYPE for uninitialized or unknown");
572 return RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, TailAgnostic, MaskAgnostic);
573 }
574
575 bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
576
577 bool hasSameVTYPE(const VSETVLIInfo &Other) const {
578 assert(isValid() && Other.isValid() &&
579 "Can't compare invalid VSETVLIInfos");
580 assert(!isUnknown() && !Other.isUnknown() &&
581 "Can't compare VTYPE in unknown state");
582 assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
583 "Can't compare when only LMUL/SEW ratio is valid.");
584 return std::tie(args: VLMul, args: SEW, args: TailAgnostic, args: MaskAgnostic) ==
585 std::tie(args: Other.VLMul, args: Other.SEW, args: Other.TailAgnostic,
586 args: Other.MaskAgnostic);
587 }
588
589 unsigned getSEWLMULRatio() const {
590 assert(isValid() && !isUnknown() &&
591 "Can't use VTYPE for uninitialized or unknown");
592 return RISCVVType::getSEWLMULRatio(SEW, VLMul);
593 }
594
595 // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
596 // Note that having the same VLMAX ensures that both share the same
597 // function from AVL to VL; that is, they must produce the same VL value
598 // for any given AVL value.
599 bool hasSameVLMAX(const VSETVLIInfo &Other) const {
600 assert(isValid() && Other.isValid() &&
601 "Can't compare invalid VSETVLIInfos");
602 assert(!isUnknown() && !Other.isUnknown() &&
603 "Can't compare VTYPE in unknown state");
604 return getSEWLMULRatio() == Other.getSEWLMULRatio();
605 }
606
607 bool hasCompatibleVTYPE(const DemandedFields &Used,
608 const VSETVLIInfo &Require) const {
609 return areCompatibleVTYPEs(CurVType: Require.encodeVTYPE(), NewVType: encodeVTYPE(), Used);
610 }
611
612 // Determine whether the vector instructions requirements represented by
613 // Require are compatible with the previous vsetvli instruction represented
614 // by this. MI is the instruction whose requirements we're considering.
615 bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
616 const MachineRegisterInfo &MRI) const {
617 assert(isValid() && Require.isValid() &&
618 "Can't compare invalid VSETVLIInfos");
619 assert(!Require.SEWLMULRatioOnly &&
620 "Expected a valid VTYPE for instruction!");
621 // Nothing is compatible with Unknown.
622 if (isUnknown() || Require.isUnknown())
623 return false;
624
625 // If only our VLMAX ratio is valid, then this isn't compatible.
626 if (SEWLMULRatioOnly)
627 return false;
628
629 if (Used.VLAny && !(hasSameAVL(Other: Require) && hasSameVLMAX(Other: Require)))
630 return false;
631
632 if (Used.VLZeroness && !hasEquallyZeroAVL(Other: Require, MRI))
633 return false;
634
635 return hasCompatibleVTYPE(Used, Require);
636 }
637
638 bool operator==(const VSETVLIInfo &Other) const {
639 // Uninitialized is only equal to another Uninitialized.
640 if (!isValid())
641 return !Other.isValid();
642 if (!Other.isValid())
643 return !isValid();
644
645 // Unknown is only equal to another Unknown.
646 if (isUnknown())
647 return Other.isUnknown();
648 if (Other.isUnknown())
649 return isUnknown();
650
651 if (!hasSameAVL(Other))
652 return false;
653
654 // If the SEWLMULRatioOnly bits are different, then they aren't equal.
655 if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
656 return false;
657
658 // If only the VLMAX is valid, check that it is the same.
659 if (SEWLMULRatioOnly)
660 return hasSameVLMAX(Other);
661
662 // If the full VTYPE is valid, check that it is the same.
663 return hasSameVTYPE(Other);
664 }
665
666 bool operator!=(const VSETVLIInfo &Other) const {
667 return !(*this == Other);
668 }
669
670 // Calculate the VSETVLIInfo visible to a block assuming this and Other are
671 // both predecessors.
672 VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
673 // If the new value isn't valid, ignore it.
674 if (!Other.isValid())
675 return *this;
676
677 // If this value isn't valid, this must be the first predecessor, use it.
678 if (!isValid())
679 return Other;
680
681 // If either is unknown, the result is unknown.
682 if (isUnknown() || Other.isUnknown())
683 return VSETVLIInfo::getUnknown();
684
685 // If we have an exact, match return this.
686 if (*this == Other)
687 return *this;
688
689 // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
690 // return an SEW/LMUL ratio only value.
691 if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
692 VSETVLIInfo MergeInfo = *this;
693 MergeInfo.SEWLMULRatioOnly = true;
694 return MergeInfo;
695 }
696
697 // Otherwise the result is unknown.
698 return VSETVLIInfo::getUnknown();
699 }
700
701#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
702 /// Support for debugging, callable in GDB: V->dump()
703 LLVM_DUMP_METHOD void dump() const {
704 print(OS&: dbgs());
705 dbgs() << "\n";
706 }
707
708 /// Implement operator<<.
709 /// @{
710 void print(raw_ostream &OS) const {
711 OS << "{";
712 if (!isValid())
713 OS << "Uninitialized";
714 if (isUnknown())
715 OS << "unknown";
716 if (hasAVLReg())
717 OS << "AVLReg=" << (unsigned)AVLReg;
718 if (hasAVLImm())
719 OS << "AVLImm=" << (unsigned)AVLImm;
720 OS << ", "
721 << "VLMul=" << (unsigned)VLMul << ", "
722 << "SEW=" << (unsigned)SEW << ", "
723 << "TailAgnostic=" << (bool)TailAgnostic << ", "
724 << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
725 << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
726 }
727#endif
728};
729
730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
731LLVM_ATTRIBUTE_USED
732inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
733 V.print(OS);
734 return OS;
735}
736#endif
737
738struct BlockData {
739 // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
740 // block. Calculated in Phase 2.
741 VSETVLIInfo Exit;
742
743 // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
744 // blocks. Calculated in Phase 2, and used by Phase 3.
745 VSETVLIInfo Pred;
746
747 // Keeps track of whether the block is already in the queue.
748 bool InQueue = false;
749
750 BlockData() = default;
751};
752
753class RISCVInsertVSETVLI : public MachineFunctionPass {
754 const RISCVSubtarget *ST;
755 const TargetInstrInfo *TII;
756 MachineRegisterInfo *MRI;
757
758 std::vector<BlockData> BlockInfo;
759 std::queue<const MachineBasicBlock *> WorkList;
760
761public:
762 static char ID;
763
764 RISCVInsertVSETVLI() : MachineFunctionPass(ID) {}
765 bool runOnMachineFunction(MachineFunction &MF) override;
766
767 void getAnalysisUsage(AnalysisUsage &AU) const override {
768 AU.setPreservesCFG();
769 MachineFunctionPass::getAnalysisUsage(AU);
770 }
771
772 StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
773
774private:
775 bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
776 const VSETVLIInfo &CurInfo) const;
777 bool needVSETVLIPHI(const VSETVLIInfo &Require,
778 const MachineBasicBlock &MBB) const;
779 void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
780 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
781 void insertVSETVLI(MachineBasicBlock &MBB,
782 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
783 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
784
785 void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const;
786 void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const;
787 bool computeVLVTYPEChanges(const MachineBasicBlock &MBB,
788 VSETVLIInfo &Info) const;
789 void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
790 void emitVSETVLIs(MachineBasicBlock &MBB);
791 void doPRE(MachineBasicBlock &MBB);
792 void insertReadVL(MachineBasicBlock &MBB);
793};
794
795class RISCVCoalesceVSETVLI : public MachineFunctionPass {
796public:
797 static char ID;
798 const RISCVSubtarget *ST;
799 const TargetInstrInfo *TII;
800 MachineRegisterInfo *MRI;
801 LiveIntervals *LIS;
802
803 RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {}
804 bool runOnMachineFunction(MachineFunction &MF) override;
805
806 void getAnalysisUsage(AnalysisUsage &AU) const override {
807 AU.setPreservesCFG();
808
809 AU.addRequired<LiveIntervals>();
810 AU.addPreserved<LiveIntervals>();
811 AU.addRequired<SlotIndexes>();
812 AU.addPreserved<SlotIndexes>();
813 AU.addPreserved<LiveDebugVariables>();
814 AU.addPreserved<LiveStacks>();
815
816 MachineFunctionPass::getAnalysisUsage(AU);
817 }
818
819 StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; }
820
821private:
822 bool coalesceVSETVLIs(MachineBasicBlock &MBB);
823};
824
825} // end anonymous namespace
826
827char RISCVInsertVSETVLI::ID = 0;
828
829INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
830 false, false)
831
832char RISCVCoalesceVSETVLI::ID = 0;
833
834INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli",
835 RISCV_COALESCE_VSETVLI_NAME, false, false)
836
837// Return a VSETVLIInfo representing the changes made by this VSETVLI or
838// VSETIVLI instruction.
839static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
840 VSETVLIInfo NewInfo;
841 if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
842 NewInfo.setAVLImm(MI.getOperand(i: 1).getImm());
843 } else {
844 assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
845 MI.getOpcode() == RISCV::PseudoVSETVLIX0);
846 Register AVLReg = MI.getOperand(i: 1).getReg();
847 assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
848 "Can't handle X0, X0 vsetvli yet");
849 NewInfo.setAVLReg(AVLReg);
850 }
851 NewInfo.setVTYPE(MI.getOperand(i: 2).getImm());
852
853 return NewInfo;
854}
855
856static unsigned computeVLMAX(unsigned VLEN, unsigned SEW,
857 RISCVII::VLMUL VLMul) {
858 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: VLMul);
859 if (Fractional)
860 VLEN = VLEN / LMul;
861 else
862 VLEN = VLEN * LMul;
863 return VLEN/SEW;
864}
865
866static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
867 const RISCVSubtarget &ST,
868 const MachineRegisterInfo *MRI) {
869 VSETVLIInfo InstrInfo;
870
871 bool TailAgnostic = true;
872 bool MaskAgnostic = true;
873 if (!hasUndefinedMergeOp(MI, MRI: *MRI)) {
874 // Start with undisturbed.
875 TailAgnostic = false;
876 MaskAgnostic = false;
877
878 // If there is a policy operand, use it.
879 if (RISCVII::hasVecPolicyOp(TSFlags)) {
880 const MachineOperand &Op = MI.getOperand(i: MI.getNumExplicitOperands() - 1);
881 uint64_t Policy = Op.getImm();
882 assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
883 "Invalid Policy Value");
884 TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
885 MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
886 }
887
888 // Some pseudo instructions force a tail agnostic policy despite having a
889 // tied def.
890 if (RISCVII::doesForceTailAgnostic(TSFlags))
891 TailAgnostic = true;
892
893 if (!RISCVII::usesMaskPolicy(TSFlags))
894 MaskAgnostic = true;
895 }
896
897 RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
898
899 unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm();
900 // A Log2SEW of 0 is an operation on mask registers only.
901 unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
902 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
903
904 if (RISCVII::hasVLOp(TSFlags)) {
905 const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
906 if (VLOp.isImm()) {
907 int64_t Imm = VLOp.getImm();
908 // Conver the VLMax sentintel to X0 register.
909 if (Imm == RISCV::VLMaxSentinel) {
910 // If we know the exact VLEN, see if we can use the constant encoding
911 // for the VLMAX instead. This reduces register pressure slightly.
912 const unsigned VLMAX = computeVLMAX(VLEN: ST.getRealMaxVLen(), SEW, VLMul);
913 if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31)
914 InstrInfo.setAVLImm(VLMAX);
915 else
916 InstrInfo.setAVLReg(RISCV::X0);
917 }
918 else
919 InstrInfo.setAVLImm(Imm);
920 } else {
921 InstrInfo.setAVLReg(VLOp.getReg());
922 }
923 } else {
924 assert(isScalarExtractInstr(MI));
925 InstrInfo.setAVLReg(RISCV::NoRegister);
926 }
927#ifndef NDEBUG
928 if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
929 assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
930 }
931#endif
932 InstrInfo.setVTYPE(L: VLMul, S: SEW, TA: TailAgnostic, MA: MaskAgnostic);
933
934 // If AVL is defined by a vsetvli with the same VLMAX, we can replace the
935 // AVL operand with the AVL of the defining vsetvli. We avoid general
936 // register AVLs to avoid extending live ranges without being sure we can
937 // kill the original source reg entirely.
938 if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) {
939 MachineInstr *DefMI = MRI->getVRegDef(Reg: InstrInfo.getAVLReg());
940 if (DefMI && isVectorConfigInstr(MI: *DefMI)) {
941 VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(MI: *DefMI);
942 if (DefInstrInfo.hasSameVLMAX(InstrInfo) &&
943 (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) {
944 InstrInfo.setAVL(DefInstrInfo);
945 }
946 }
947 }
948
949 return InstrInfo;
950}
951
952void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
953 const VSETVLIInfo &Info,
954 const VSETVLIInfo &PrevInfo) {
955 DebugLoc DL = MI.getDebugLoc();
956 insertVSETVLI(MBB, InsertPt: MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
957}
958
959void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
960 MachineBasicBlock::iterator InsertPt, DebugLoc DL,
961 const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
962
963 ++NumInsertedVSETVL;
964 if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
965 // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
966 // VLMAX.
967 if (Info.hasSameAVL(Other: PrevInfo) && Info.hasSameVLMAX(Other: PrevInfo)) {
968 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
969 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
970 .addReg(RISCV::X0, RegState::Kill)
971 .addImm(Info.encodeVTYPE())
972 .addReg(RISCV::VL, RegState::Implicit);
973 return;
974 }
975
976 // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
977 // it has the same VLMAX we want and the last VL/VTYPE we observed is the
978 // same, we can use the X0, X0 form.
979 if (Info.hasSameVLMAX(Other: PrevInfo) && Info.hasAVLReg() &&
980 Info.getAVLReg().isVirtual()) {
981 if (MachineInstr *DefMI = MRI->getVRegDef(Reg: Info.getAVLReg())) {
982 if (isVectorConfigInstr(MI: *DefMI)) {
983 VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI);
984 if (DefInfo.hasSameAVL(Other: PrevInfo) && DefInfo.hasSameVLMAX(Other: PrevInfo)) {
985 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
986 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
987 .addReg(RISCV::X0, RegState::Kill)
988 .addImm(Info.encodeVTYPE())
989 .addReg(RISCV::VL, RegState::Implicit);
990 return;
991 }
992 }
993 }
994 }
995 }
996
997 if (Info.hasAVLImm()) {
998 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
999 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1000 .addImm(Info.getAVLImm())
1001 .addImm(Info.encodeVTYPE());
1002 return;
1003 }
1004
1005 Register AVLReg = Info.getAVLReg();
1006 if (AVLReg == RISCV::NoRegister) {
1007 // We can only use x0, x0 if there's no chance of the vtype change causing
1008 // the previous vl to become invalid.
1009 if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
1010 Info.hasSameVLMAX(Other: PrevInfo)) {
1011 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
1012 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1013 .addReg(RISCV::X0, RegState::Kill)
1014 .addImm(Info.encodeVTYPE())
1015 .addReg(RISCV::VL, RegState::Implicit);
1016 return;
1017 }
1018 // Otherwise use an AVL of 1 to avoid depending on previous vl.
1019 BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
1020 .addReg(RISCV::X0, RegState::Define | RegState::Dead)
1021 .addImm(1)
1022 .addImm(Info.encodeVTYPE());
1023 return;
1024 }
1025
1026 if (AVLReg.isVirtual())
1027 MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
1028
1029 // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
1030 // opcode if the AVLReg is X0 as they have different register classes for
1031 // the AVL operand.
1032 Register DestReg = RISCV::X0;
1033 unsigned Opcode = RISCV::PseudoVSETVLI;
1034 if (AVLReg == RISCV::X0) {
1035 DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
1036 Opcode = RISCV::PseudoVSETVLIX0;
1037 }
1038 BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode))
1039 .addReg(RegNo: DestReg, flags: RegState::Define | RegState::Dead)
1040 .addReg(RegNo: AVLReg)
1041 .addImm(Val: Info.encodeVTYPE());
1042}
1043
1044static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
1045 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: LMUL);
1046 return Fractional || LMul == 1;
1047}
1048
1049/// Return true if a VSETVLI is required to transition from CurInfo to Require
1050/// before MI.
1051bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
1052 const VSETVLIInfo &Require,
1053 const VSETVLIInfo &CurInfo) const {
1054 assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI));
1055
1056 if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
1057 return true;
1058
1059 DemandedFields Used = getDemanded(MI, MRI, ST);
1060
1061 // A slidedown/slideup with an *undefined* merge op can freely clobber
1062 // elements not copied from the source vector (e.g. masked off, tail, or
1063 // slideup's prefix). Notes:
1064 // * We can't modify SEW here since the slide amount is in units of SEW.
1065 // * VL=1 is special only because we have existing support for zero vs
1066 // non-zero VL. We could generalize this if we had a VL > C predicate.
1067 // * The LMUL1 restriction is for machines whose latency may depend on VL.
1068 // * As above, this is only legal for tail "undefined" not "agnostic".
1069 if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1070 isLMUL1OrSmaller(LMUL: CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, MRI: *MRI)) {
1071 Used.VLAny = false;
1072 Used.VLZeroness = true;
1073 Used.LMUL = false;
1074 Used.TailPolicy = false;
1075 }
1076
1077 // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
1078 // semantically as vmv.s.x. This is particularly useful since we don't have an
1079 // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
1080 // Since a splat is non-constant time in LMUL, we do need to be careful to not
1081 // increase the number of active vector registers (unlike for vmv.s.x.)
1082 if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
1083 isLMUL1OrSmaller(LMUL: CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, MRI: *MRI)) {
1084 Used.LMUL = false;
1085 Used.SEWLMULRatio = false;
1086 Used.VLAny = false;
1087 if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64())
1088 Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64;
1089 else
1090 Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
1091 Used.TailPolicy = false;
1092 }
1093
1094 if (CurInfo.isCompatible(Used, Require, MRI: *MRI))
1095 return false;
1096
1097 // We didn't find a compatible value. If our AVL is a virtual register,
1098 // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
1099 // and the last VL/VTYPE we observed is the same, we don't need a
1100 // VSETVLI here.
1101 if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
1102 CurInfo.hasCompatibleVTYPE(Used, Require)) {
1103 if (MachineInstr *DefMI = MRI->getVRegDef(Reg: Require.getAVLReg())) {
1104 if (isVectorConfigInstr(MI: *DefMI)) {
1105 VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI);
1106 if (DefInfo.hasSameAVL(Other: CurInfo) && DefInfo.hasSameVLMAX(Other: CurInfo))
1107 return false;
1108 }
1109 }
1110 }
1111
1112 return true;
1113}
1114
1115// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
1116// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
1117// places.
1118static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo,
1119 DemandedFields &Demanded) {
1120 VSETVLIInfo Info = NewInfo;
1121
1122 if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() &&
1123 !PrevInfo.isUnknown()) {
1124 if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
1125 SEW: PrevInfo.getSEW(), VLMUL: PrevInfo.getVLMUL(), EEW: Info.getSEW()))
1126 Info.setVLMul(*NewVLMul);
1127 Demanded.LMUL = true;
1128 }
1129
1130 return Info;
1131}
1132
1133// Given an incoming state reaching MI, minimally modifies that state so that it
1134// is compatible with MI. The resulting state is guaranteed to be semantically
1135// legal for MI, but may not be the state requested by MI.
1136void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
1137 const MachineInstr &MI) const {
1138 uint64_t TSFlags = MI.getDesc().TSFlags;
1139 if (!RISCVII::hasSEWOp(TSFlags))
1140 return;
1141
1142 const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, ST: *ST, MRI);
1143 assert(NewInfo.isValid() && !NewInfo.isUnknown());
1144 if (Info.isValid() && !needVSETVLI(MI, Require: NewInfo, CurInfo: Info))
1145 return;
1146
1147 const VSETVLIInfo PrevInfo = Info;
1148 if (!Info.isValid() || Info.isUnknown())
1149 Info = NewInfo;
1150
1151 DemandedFields Demanded = getDemanded(MI, MRI, ST);
1152 const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded);
1153
1154 // If MI only demands that VL has the same zeroness, we only need to set the
1155 // AVL if the zeroness differs. This removes a vsetvli entirely if the types
1156 // match or allows use of cheaper avl preserving variant if VLMAX doesn't
1157 // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype"
1158 // variant, so we avoid the transform to prevent extending live range of an
1159 // avl register operand.
1160 // TODO: We can probably relax this for immediates.
1161 bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(Other: PrevInfo, MRI: *MRI) &&
1162 IncomingInfo.hasSameVLMAX(Other: PrevInfo);
1163 if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero))
1164 Info.setAVL(IncomingInfo);
1165
1166 Info.setVTYPE(
1167 L: ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info)
1168 .getVLMUL(),
1169 S: ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(),
1170 // Prefer tail/mask agnostic since it can be relaxed to undisturbed later
1171 // if needed.
1172 TA: (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() ||
1173 IncomingInfo.getTailAgnostic(),
1174 MA: (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() ||
1175 IncomingInfo.getMaskAgnostic());
1176
1177 // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep
1178 // the AVL.
1179 if (Info.hasSEWLMULRatioOnly()) {
1180 VSETVLIInfo RatiolessInfo = IncomingInfo;
1181 RatiolessInfo.setAVL(Info);
1182 Info = RatiolessInfo;
1183 }
1184}
1185
1186// Given a state with which we evaluated MI (see transferBefore above for why
1187// this might be different that the state MI requested), modify the state to
1188// reflect the changes MI might make.
1189void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info,
1190 const MachineInstr &MI) const {
1191 if (isVectorConfigInstr(MI)) {
1192 Info = getInfoForVSETVLI(MI);
1193 return;
1194 }
1195
1196 if (RISCV::isFaultFirstLoad(MI)) {
1197 // Update AVL to vl-output of the fault first load.
1198 Info.setAVLReg(MI.getOperand(i: 1).getReg());
1199 return;
1200 }
1201
1202 // If this is something that updates VL/VTYPE that we don't know about, set
1203 // the state to unknown.
1204 if (MI.isCall() || MI.isInlineAsm() ||
1205 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1206 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1207 Info = VSETVLIInfo::getUnknown();
1208}
1209
1210bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB,
1211 VSETVLIInfo &Info) const {
1212 bool HadVectorOp = false;
1213
1214 Info = BlockInfo[MBB.getNumber()].Pred;
1215 for (const MachineInstr &MI : MBB) {
1216 transferBefore(Info, MI);
1217
1218 if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags))
1219 HadVectorOp = true;
1220
1221 transferAfter(Info, MI);
1222 }
1223
1224 return HadVectorOp;
1225}
1226
1227void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
1228
1229 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1230
1231 BBInfo.InQueue = false;
1232
1233 // Start with the previous entry so that we keep the most conservative state
1234 // we have ever found.
1235 VSETVLIInfo InInfo = BBInfo.Pred;
1236 if (MBB.pred_empty()) {
1237 // There are no predecessors, so use the default starting status.
1238 InInfo.setUnknown();
1239 } else {
1240 for (MachineBasicBlock *P : MBB.predecessors())
1241 InInfo = InInfo.intersect(Other: BlockInfo[P->getNumber()].Exit);
1242 }
1243
1244 // If we don't have any valid predecessor value, wait until we do.
1245 if (!InInfo.isValid())
1246 return;
1247
1248 // If no change, no need to rerun block
1249 if (InInfo == BBInfo.Pred)
1250 return;
1251
1252 BBInfo.Pred = InInfo;
1253 LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
1254 << " changed to " << BBInfo.Pred << "\n");
1255
1256 // Note: It's tempting to cache the state changes here, but due to the
1257 // compatibility checks performed a blocks output state can change based on
1258 // the input state. To cache, we'd have to add logic for finding
1259 // never-compatible state changes.
1260 VSETVLIInfo TmpStatus;
1261 computeVLVTYPEChanges(MBB, Info&: TmpStatus);
1262
1263 // If the new exit value matches the old exit value, we don't need to revisit
1264 // any blocks.
1265 if (BBInfo.Exit == TmpStatus)
1266 return;
1267
1268 BBInfo.Exit = TmpStatus;
1269 LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
1270 << " changed to " << BBInfo.Exit << "\n");
1271
1272 // Add the successors to the work list so we can propagate the changed exit
1273 // status.
1274 for (MachineBasicBlock *S : MBB.successors())
1275 if (!BlockInfo[S->getNumber()].InQueue) {
1276 BlockInfo[S->getNumber()].InQueue = true;
1277 WorkList.push(x: S);
1278 }
1279}
1280
1281// If we weren't able to prove a vsetvli was directly unneeded, it might still
1282// be unneeded if the AVL is a phi node where all incoming values are VL
1283// outputs from the last VSETVLI in their respective basic blocks.
1284bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
1285 const MachineBasicBlock &MBB) const {
1286 if (DisableInsertVSETVLPHIOpt)
1287 return true;
1288
1289 if (!Require.hasAVLReg())
1290 return true;
1291
1292 Register AVLReg = Require.getAVLReg();
1293 if (!AVLReg.isVirtual())
1294 return true;
1295
1296 // We need the AVL to be produce by a PHI node in this basic block.
1297 MachineInstr *PHI = MRI->getVRegDef(Reg: AVLReg);
1298 if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
1299 return true;
1300
1301 for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
1302 PHIOp += 2) {
1303 Register InReg = PHI->getOperand(i: PHIOp).getReg();
1304 MachineBasicBlock *PBB = PHI->getOperand(i: PHIOp + 1).getMBB();
1305 const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
1306 // If the exit from the predecessor has the VTYPE we are looking for
1307 // we might be able to avoid a VSETVLI.
1308 if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Other: Require))
1309 return true;
1310
1311 // We need the PHI input to the be the output of a VSET(I)VLI.
1312 MachineInstr *DefMI = MRI->getVRegDef(Reg: InReg);
1313 if (!DefMI || !isVectorConfigInstr(MI: *DefMI))
1314 return true;
1315
1316 // We found a VSET(I)VLI make sure it matches the output of the
1317 // predecessor block.
1318 VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI);
1319 if (!DefInfo.hasSameAVL(Other: PBBInfo.Exit) ||
1320 !DefInfo.hasSameVTYPE(Other: PBBInfo.Exit))
1321 return true;
1322 }
1323
1324 // If all the incoming values to the PHI checked out, we don't need
1325 // to insert a VSETVLI.
1326 return false;
1327}
1328
1329void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
1330 VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
1331 // Track whether the prefix of the block we've scanned is transparent
1332 // (meaning has not yet changed the abstract state).
1333 bool PrefixTransparent = true;
1334 for (MachineInstr &MI : MBB) {
1335 const VSETVLIInfo PrevInfo = CurInfo;
1336 transferBefore(Info&: CurInfo, MI);
1337
1338 // If this is an explicit VSETVLI or VSETIVLI, update our state.
1339 if (isVectorConfigInstr(MI)) {
1340 // Conservatively, mark the VL and VTYPE as live.
1341 assert(MI.getOperand(3).getReg() == RISCV::VL &&
1342 MI.getOperand(4).getReg() == RISCV::VTYPE &&
1343 "Unexpected operands where VL and VTYPE should be");
1344 MI.getOperand(i: 3).setIsDead(false);
1345 MI.getOperand(i: 4).setIsDead(false);
1346 PrefixTransparent = false;
1347 }
1348
1349 uint64_t TSFlags = MI.getDesc().TSFlags;
1350 if (RISCVII::hasSEWOp(TSFlags)) {
1351 if (PrevInfo != CurInfo) {
1352 // If this is the first implicit state change, and the state change
1353 // requested can be proven to produce the same register contents, we
1354 // can skip emitting the actual state change and continue as if we
1355 // had since we know the GPR result of the implicit state change
1356 // wouldn't be used and VL/VTYPE registers are correct. Note that
1357 // we *do* need to model the state as if it changed as while the
1358 // register contents are unchanged, the abstract model can change.
1359 if (!PrefixTransparent || needVSETVLIPHI(Require: CurInfo, MBB))
1360 insertVSETVLI(MBB, MI, Info: CurInfo, PrevInfo);
1361 PrefixTransparent = false;
1362 }
1363
1364 if (RISCVII::hasVLOp(TSFlags)) {
1365 MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI));
1366 if (VLOp.isReg()) {
1367 Register Reg = VLOp.getReg();
1368 MachineInstr *VLOpDef = MRI->getVRegDef(Reg);
1369
1370 // Erase the AVL operand from the instruction.
1371 VLOp.setReg(RISCV::NoRegister);
1372 VLOp.setIsKill(false);
1373
1374 // If the AVL was an immediate > 31, then it would have been emitted
1375 // as an ADDI. However, the ADDI might not have been used in the
1376 // vsetvli, or a vsetvli might not have been emitted, so it may be
1377 // dead now.
1378 if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg) &&
1379 MRI->use_nodbg_empty(RegNo: Reg))
1380 VLOpDef->eraseFromParent();
1381 }
1382 MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
1383 /*isImp*/ true));
1384 }
1385 MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
1386 /*isImp*/ true));
1387 }
1388
1389 if (MI.isCall() || MI.isInlineAsm() ||
1390 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1391 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1392 PrefixTransparent = false;
1393
1394 transferAfter(Info&: CurInfo, MI);
1395 }
1396
1397 // If we reach the end of the block and our current info doesn't match the
1398 // expected info, insert a vsetvli to correct.
1399 if (!UseStrictAsserts) {
1400 const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
1401 if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
1402 CurInfo != ExitInfo) {
1403 // Note there's an implicit assumption here that terminators never use
1404 // or modify VL or VTYPE. Also, fallthrough will return end().
1405 auto InsertPt = MBB.getFirstInstrTerminator();
1406 insertVSETVLI(MBB, InsertPt, DL: MBB.findDebugLoc(MBBI: InsertPt), Info: ExitInfo,
1407 PrevInfo: CurInfo);
1408 CurInfo = ExitInfo;
1409 }
1410 }
1411
1412 if (UseStrictAsserts && CurInfo.isValid()) {
1413 const auto &Info = BlockInfo[MBB.getNumber()];
1414 if (CurInfo != Info.Exit) {
1415 LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
1416 LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
1417 LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
1418 LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
1419 }
1420 assert(CurInfo == Info.Exit &&
1421 "InsertVSETVLI dataflow invariant violated");
1422 }
1423}
1424
1425/// Perform simple partial redundancy elimination of the VSETVLI instructions
1426/// we're about to insert by looking for cases where we can PRE from the
1427/// beginning of one block to the end of one of its predecessors. Specifically,
1428/// this is geared to catch the common case of a fixed length vsetvl in a single
1429/// block loop when it could execute once in the preheader instead.
1430void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
1431 if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
1432 return;
1433
1434 MachineBasicBlock *UnavailablePred = nullptr;
1435 VSETVLIInfo AvailableInfo;
1436 for (MachineBasicBlock *P : MBB.predecessors()) {
1437 const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
1438 if (PredInfo.isUnknown()) {
1439 if (UnavailablePred)
1440 return;
1441 UnavailablePred = P;
1442 } else if (!AvailableInfo.isValid()) {
1443 AvailableInfo = PredInfo;
1444 } else if (AvailableInfo != PredInfo) {
1445 return;
1446 }
1447 }
1448
1449 // Unreachable, single pred, or full redundancy. Note that FRE is handled by
1450 // phase 3.
1451 if (!UnavailablePred || !AvailableInfo.isValid())
1452 return;
1453
1454 // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of
1455 // the unavailable pred.
1456 if (AvailableInfo.hasSEWLMULRatioOnly())
1457 return;
1458
1459 // Critical edge - TODO: consider splitting?
1460 if (UnavailablePred->succ_size() != 1)
1461 return;
1462
1463 // If the AVL value is a register (other than our VLMAX sentinel),
1464 // we need to prove the value is available at the point we're going
1465 // to insert the vsetvli at.
1466 if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) {
1467 MachineInstr *AVLDefMI = MRI->getVRegDef(Reg: AvailableInfo.getAVLReg());
1468 if (!AVLDefMI)
1469 return;
1470 // This is an inline dominance check which covers the case of
1471 // UnavailablePred being the preheader of a loop.
1472 if (AVLDefMI->getParent() != UnavailablePred)
1473 return;
1474 for (auto &TermMI : UnavailablePred->terminators())
1475 if (&TermMI == AVLDefMI)
1476 return;
1477 }
1478
1479 // Model the effect of changing the input state of the block MBB to
1480 // AvailableInfo. We're looking for two issues here; one legality,
1481 // one profitability.
1482 // 1) If the block doesn't use some of the fields from VL or VTYPE, we
1483 // may hit the end of the block with a different end state. We can
1484 // not make this change without reflowing later blocks as well.
1485 // 2) If we don't actually remove a transition, inserting a vsetvli
1486 // into the predecessor block would be correct, but unprofitable.
1487 VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
1488 VSETVLIInfo CurInfo = AvailableInfo;
1489 int TransitionsRemoved = 0;
1490 for (const MachineInstr &MI : MBB) {
1491 const VSETVLIInfo LastInfo = CurInfo;
1492 const VSETVLIInfo LastOldInfo = OldInfo;
1493 transferBefore(Info&: CurInfo, MI);
1494 transferBefore(Info&: OldInfo, MI);
1495 if (CurInfo == LastInfo)
1496 TransitionsRemoved++;
1497 if (LastOldInfo == OldInfo)
1498 TransitionsRemoved--;
1499 transferAfter(Info&: CurInfo, MI);
1500 transferAfter(Info&: OldInfo, MI);
1501 if (CurInfo == OldInfo)
1502 // Convergence. All transitions after this must match by construction.
1503 break;
1504 }
1505 if (CurInfo != OldInfo || TransitionsRemoved <= 0)
1506 // Issues 1 and 2 above
1507 return;
1508
1509 // Finally, update both data flow state and insert the actual vsetvli.
1510 // Doing both keeps the code in sync with the dataflow results, which
1511 // is critical for correctness of phase 3.
1512 auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
1513 LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
1514 << UnavailablePred->getName() << " with state "
1515 << AvailableInfo << "\n");
1516 BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
1517 BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
1518
1519 // Note there's an implicit assumption here that terminators never use
1520 // or modify VL or VTYPE. Also, fallthrough will return end().
1521 auto InsertPt = UnavailablePred->getFirstInstrTerminator();
1522 insertVSETVLI(MBB&: *UnavailablePred, InsertPt,
1523 DL: UnavailablePred->findDebugLoc(MBBI: InsertPt),
1524 Info: AvailableInfo, PrevInfo: OldExit);
1525}
1526
1527static void doUnion(DemandedFields &A, DemandedFields B) {
1528 A.VLAny |= B.VLAny;
1529 A.VLZeroness |= B.VLZeroness;
1530 A.SEW = std::max(a: A.SEW, b: B.SEW);
1531 A.LMUL |= B.LMUL;
1532 A.SEWLMULRatio |= B.SEWLMULRatio;
1533 A.TailPolicy |= B.TailPolicy;
1534 A.MaskPolicy |= B.MaskPolicy;
1535}
1536
1537// Return true if we can mutate PrevMI to match MI without changing any the
1538// fields which would be observed.
1539static bool canMutatePriorConfig(const MachineInstr &PrevMI,
1540 const MachineInstr &MI,
1541 const DemandedFields &Used,
1542 const MachineRegisterInfo &MRI) {
1543 // If the VL values aren't equal, return false if either a) the former is
1544 // demanded, or b) we can't rewrite the former to be the later for
1545 // implementation reasons.
1546 if (!isVLPreservingConfig(MI)) {
1547 if (Used.VLAny)
1548 return false;
1549
1550 if (Used.VLZeroness) {
1551 if (isVLPreservingConfig(MI: PrevMI))
1552 return false;
1553 if (!getInfoForVSETVLI(MI: PrevMI).hasEquallyZeroAVL(Other: getInfoForVSETVLI(MI),
1554 MRI))
1555 return false;
1556 }
1557
1558 auto &AVL = MI.getOperand(i: 1);
1559 auto &PrevAVL = PrevMI.getOperand(i: 1);
1560
1561 // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
1562 // For now just check that PrevMI uses the same virtual register.
1563 if (AVL.isReg() && AVL.getReg() != RISCV::X0 &&
1564 (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() ||
1565 PrevAVL.getReg() != AVL.getReg()))
1566 return false;
1567 }
1568
1569 assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm());
1570 auto PriorVType = PrevMI.getOperand(i: 2).getImm();
1571 auto VType = MI.getOperand(i: 2).getImm();
1572 return areCompatibleVTYPEs(CurVType: PriorVType, NewVType: VType, Used);
1573}
1574
1575bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) {
1576 MachineInstr *NextMI = nullptr;
1577 // We can have arbitrary code in successors, so VL and VTYPE
1578 // must be considered demanded.
1579 DemandedFields Used;
1580 Used.demandVL();
1581 Used.demandVTYPE();
1582 SmallVector<MachineInstr*> ToDelete;
1583 for (MachineInstr &MI : make_range(x: MBB.rbegin(), y: MBB.rend())) {
1584
1585 if (!isVectorConfigInstr(MI)) {
1586 doUnion(A&: Used, B: getDemanded(MI, MRI, ST));
1587 if (MI.isCall() || MI.isInlineAsm() ||
1588 MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) ||
1589 MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr))
1590 NextMI = nullptr;
1591 continue;
1592 }
1593
1594 Register RegDef = MI.getOperand(i: 0).getReg();
1595 assert(RegDef == RISCV::X0 || RegDef.isVirtual());
1596 if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef))
1597 Used.demandVL();
1598
1599 if (NextMI) {
1600 if (!Used.usedVL() && !Used.usedVTYPE()) {
1601 ToDelete.push_back(Elt: &MI);
1602 // Leave NextMI unchanged
1603 continue;
1604 }
1605
1606 if (canMutatePriorConfig(PrevMI: MI, MI: *NextMI, Used, MRI: *MRI)) {
1607 if (!isVLPreservingConfig(MI: *NextMI)) {
1608 Register DefReg = NextMI->getOperand(i: 0).getReg();
1609
1610 MI.getOperand(i: 0).setReg(DefReg);
1611 MI.getOperand(i: 0).setIsDead(false);
1612
1613 // The def of DefReg moved to MI, so extend the LiveInterval up to
1614 // it.
1615 if (DefReg.isVirtual()) {
1616 LiveInterval &DefLI = LIS->getInterval(Reg: DefReg);
1617 SlotIndex MISlot = LIS->getInstructionIndex(Instr: MI).getRegSlot();
1618 VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: DefLI.beginIndex());
1619 LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
1620 DefLI.addSegment(S);
1621 DefVNI->def = MISlot;
1622 // Mark DefLI as spillable if it was previously unspillable
1623 DefLI.setWeight(0);
1624
1625 // DefReg may have had no uses, in which case we need to shrink
1626 // the LiveInterval up to MI.
1627 LIS->shrinkToUses(li: &DefLI);
1628 }
1629
1630 Register OldVLReg;
1631 if (MI.getOperand(i: 1).isReg())
1632 OldVLReg = MI.getOperand(i: 1).getReg();
1633 if (NextMI->getOperand(i: 1).isImm())
1634 MI.getOperand(i: 1).ChangeToImmediate(ImmVal: NextMI->getOperand(i: 1).getImm());
1635 else
1636 MI.getOperand(i: 1).ChangeToRegister(Reg: NextMI->getOperand(i: 1).getReg(), isDef: false);
1637
1638 // Clear NextMI's AVL early so we're not counting it as a use.
1639 if (NextMI->getOperand(1).isReg())
1640 NextMI->getOperand(1).setReg(RISCV::NoRegister);
1641
1642 if (OldVLReg && OldVLReg.isVirtual()) {
1643 // NextMI no longer uses OldVLReg so shrink its LiveInterval.
1644 LIS->shrinkToUses(li: &LIS->getInterval(Reg: OldVLReg));
1645
1646 MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg: OldVLReg);
1647 if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg: OldVLReg) &&
1648 MRI->use_nodbg_empty(RegNo: OldVLReg)) {
1649 VLOpDef->eraseFromParent();
1650 LIS->removeInterval(Reg: OldVLReg);
1651 }
1652 }
1653 MI.setDesc(NextMI->getDesc());
1654 }
1655 MI.getOperand(i: 2).setImm(NextMI->getOperand(i: 2).getImm());
1656 ToDelete.push_back(Elt: NextMI);
1657 // fallthrough
1658 }
1659 }
1660 NextMI = &MI;
1661 Used = getDemanded(MI, MRI, ST);
1662 }
1663
1664 NumCoalescedVSETVL += ToDelete.size();
1665 for (auto *MI : ToDelete) {
1666 LIS->RemoveMachineInstrFromMaps(MI&: *MI);
1667 MI->eraseFromParent();
1668 }
1669
1670 return !ToDelete.empty();
1671}
1672
1673void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
1674 for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
1675 MachineInstr &MI = *I++;
1676 if (RISCV::isFaultFirstLoad(MI)) {
1677 Register VLOutput = MI.getOperand(i: 1).getReg();
1678 if (!MRI->use_nodbg_empty(VLOutput))
1679 BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
1680 VLOutput);
1681 // We don't use the vl output of the VLEFF/VLSEGFF anymore.
1682 MI.getOperand(1).setReg(RISCV::X0);
1683 }
1684 }
1685}
1686
1687bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1688 // Skip if the vector extension is not enabled.
1689 ST = &MF.getSubtarget<RISCVSubtarget>();
1690 if (!ST->hasVInstructions())
1691 return false;
1692
1693 LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
1694
1695 TII = ST->getInstrInfo();
1696 MRI = &MF.getRegInfo();
1697
1698 assert(BlockInfo.empty() && "Expect empty block infos");
1699 BlockInfo.resize(new_size: MF.getNumBlockIDs());
1700
1701 bool HaveVectorOp = false;
1702
1703 // Phase 1 - determine how VL/VTYPE are affected by the each block.
1704 for (const MachineBasicBlock &MBB : MF) {
1705 VSETVLIInfo TmpStatus;
1706 HaveVectorOp |= computeVLVTYPEChanges(MBB, Info&: TmpStatus);
1707 // Initial exit state is whatever change we found in the block.
1708 BlockData &BBInfo = BlockInfo[MBB.getNumber()];
1709 BBInfo.Exit = TmpStatus;
1710 LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
1711 << " is " << BBInfo.Exit << "\n");
1712
1713 }
1714
1715 // If we didn't find any instructions that need VSETVLI, we're done.
1716 if (!HaveVectorOp) {
1717 BlockInfo.clear();
1718 return false;
1719 }
1720
1721 // Phase 2 - determine the exit VL/VTYPE from each block. We add all
1722 // blocks to the list here, but will also add any that need to be revisited
1723 // during Phase 2 processing.
1724 for (const MachineBasicBlock &MBB : MF) {
1725 WorkList.push(x: &MBB);
1726 BlockInfo[MBB.getNumber()].InQueue = true;
1727 }
1728 while (!WorkList.empty()) {
1729 const MachineBasicBlock &MBB = *WorkList.front();
1730 WorkList.pop();
1731 computeIncomingVLVTYPE(MBB);
1732 }
1733
1734 // Perform partial redundancy elimination of vsetvli transitions.
1735 for (MachineBasicBlock &MBB : MF)
1736 doPRE(MBB);
1737
1738 // Phase 3 - add any vsetvli instructions needed in the block. Use the
1739 // Phase 2 information to avoid adding vsetvlis before the first vector
1740 // instruction in the block if the VL/VTYPE is satisfied by its
1741 // predecessors.
1742 for (MachineBasicBlock &MBB : MF)
1743 emitVSETVLIs(MBB);
1744
1745 // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
1746 // of VLEFF/VLSEGFF.
1747 for (MachineBasicBlock &MBB : MF)
1748 insertReadVL(MBB);
1749
1750 BlockInfo.clear();
1751 return HaveVectorOp;
1752}
1753
1754/// Returns an instance of the Insert VSETVLI pass.
1755FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
1756 return new RISCVInsertVSETVLI();
1757}
1758
1759// Now that all vsetvlis are explicit, go through and do block local
1760// DSE and peephole based demanded fields based transforms. Note that
1761// this *must* be done outside the main dataflow so long as we allow
1762// any cross block analysis within the dataflow. We can't have both
1763// demanded fields based mutation and non-local analysis in the
1764// dataflow at the same time without introducing inconsistencies.
1765bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) {
1766 // Skip if the vector extension is not enabled.
1767 ST = &MF.getSubtarget<RISCVSubtarget>();
1768 if (!ST->hasVInstructions())
1769 return false;
1770 TII = ST->getInstrInfo();
1771 MRI = &MF.getRegInfo();
1772 LIS = &getAnalysis<LiveIntervals>();
1773
1774 bool Changed = false;
1775 for (MachineBasicBlock &MBB : MF)
1776 Changed |= coalesceVSETVLIs(MBB);
1777
1778 return Changed;
1779}
1780
1781FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() {
1782 return new RISCVCoalesceVSETVLI();
1783}
1784

source code of llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp