1 | //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements a function pass that inserts VSETVLI instructions where |
10 | // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL |
11 | // instructions. |
12 | // |
13 | // This pass consists of 3 phases: |
14 | // |
15 | // Phase 1 collects how each basic block affects VL/VTYPE. |
16 | // |
17 | // Phase 2 uses the information from phase 1 to do a data flow analysis to |
18 | // propagate the VL/VTYPE changes through the function. This gives us the |
19 | // VL/VTYPE at the start of each basic block. |
20 | // |
21 | // Phase 3 inserts VSETVLI instructions in each basic block. Information from |
22 | // phase 2 is used to prevent inserting a VSETVLI before the first vector |
23 | // instruction in the block if possible. |
24 | // |
25 | //===----------------------------------------------------------------------===// |
26 | |
27 | #include "RISCV.h" |
28 | #include "RISCVSubtarget.h" |
29 | #include "llvm/ADT/Statistic.h" |
30 | #include "llvm/CodeGen/LiveDebugVariables.h" |
31 | #include "llvm/CodeGen/LiveIntervals.h" |
32 | #include "llvm/CodeGen/LiveStacks.h" |
33 | #include "llvm/CodeGen/MachineFunctionPass.h" |
34 | #include <queue> |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "riscv-insert-vsetvli" |
38 | #define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass" |
39 | #define RISCV_COALESCE_VSETVLI_NAME "RISC-V Coalesce VSETVLI pass" |
40 | |
41 | STATISTIC(NumInsertedVSETVL, "Number of VSETVL inst inserted" ); |
42 | STATISTIC(NumCoalescedVSETVL, "Number of VSETVL inst coalesced" ); |
43 | |
44 | static cl::opt<bool> DisableInsertVSETVLPHIOpt( |
45 | "riscv-disable-insert-vsetvl-phi-opt" , cl::init(Val: false), cl::Hidden, |
46 | cl::desc("Disable looking through phis when inserting vsetvlis." )); |
47 | |
48 | static cl::opt<bool> UseStrictAsserts( |
49 | "riscv-insert-vsetvl-strict-asserts" , cl::init(Val: true), cl::Hidden, |
50 | cl::desc("Enable strict assertion checking for the dataflow algorithm" )); |
51 | |
52 | namespace { |
53 | |
54 | static unsigned getVLOpNum(const MachineInstr &MI) { |
55 | return RISCVII::getVLOpNum(Desc: MI.getDesc()); |
56 | } |
57 | |
58 | static unsigned getSEWOpNum(const MachineInstr &MI) { |
59 | return RISCVII::getSEWOpNum(Desc: MI.getDesc()); |
60 | } |
61 | |
62 | static bool isVectorConfigInstr(const MachineInstr &MI) { |
63 | return MI.getOpcode() == RISCV::PseudoVSETVLI || |
64 | MI.getOpcode() == RISCV::PseudoVSETVLIX0 || |
65 | MI.getOpcode() == RISCV::PseudoVSETIVLI; |
66 | } |
67 | |
68 | /// Return true if this is 'vsetvli x0, x0, vtype' which preserves |
69 | /// VL and only sets VTYPE. |
70 | static bool isVLPreservingConfig(const MachineInstr &MI) { |
71 | if (MI.getOpcode() != RISCV::PseudoVSETVLIX0) |
72 | return false; |
73 | assert(RISCV::X0 == MI.getOperand(1).getReg()); |
74 | return RISCV::X0 == MI.getOperand(i: 0).getReg(); |
75 | } |
76 | |
77 | static bool isFloatScalarMoveOrScalarSplatInstr(const MachineInstr &MI) { |
78 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
79 | default: |
80 | return false; |
81 | case RISCV::VFMV_S_F: |
82 | case RISCV::VFMV_V_F: |
83 | return true; |
84 | } |
85 | } |
86 | |
87 | static bool (const MachineInstr &MI) { |
88 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
89 | default: |
90 | return false; |
91 | case RISCV::VMV_X_S: |
92 | case RISCV::VFMV_F_S: |
93 | return true; |
94 | } |
95 | } |
96 | |
97 | static bool isScalarInsertInstr(const MachineInstr &MI) { |
98 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
99 | default: |
100 | return false; |
101 | case RISCV::VMV_S_X: |
102 | case RISCV::VFMV_S_F: |
103 | return true; |
104 | } |
105 | } |
106 | |
107 | static bool isScalarSplatInstr(const MachineInstr &MI) { |
108 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
109 | default: |
110 | return false; |
111 | case RISCV::VMV_V_I: |
112 | case RISCV::VMV_V_X: |
113 | case RISCV::VFMV_V_F: |
114 | return true; |
115 | } |
116 | } |
117 | |
118 | static bool isVSlideInstr(const MachineInstr &MI) { |
119 | switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { |
120 | default: |
121 | return false; |
122 | case RISCV::VSLIDEDOWN_VX: |
123 | case RISCV::VSLIDEDOWN_VI: |
124 | case RISCV::VSLIDEUP_VX: |
125 | case RISCV::VSLIDEUP_VI: |
126 | return true; |
127 | } |
128 | } |
129 | |
130 | /// Get the EEW for a load or store instruction. Return std::nullopt if MI is |
131 | /// not a load or store which ignores SEW. |
132 | static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) { |
133 | switch (RISCV::getRVVMCOpcode(RVVPseudoOpcode: MI.getOpcode())) { |
134 | default: |
135 | return std::nullopt; |
136 | case RISCV::VLE8_V: |
137 | case RISCV::VLSE8_V: |
138 | case RISCV::VSE8_V: |
139 | case RISCV::VSSE8_V: |
140 | return 8; |
141 | case RISCV::VLE16_V: |
142 | case RISCV::VLSE16_V: |
143 | case RISCV::VSE16_V: |
144 | case RISCV::VSSE16_V: |
145 | return 16; |
146 | case RISCV::VLE32_V: |
147 | case RISCV::VLSE32_V: |
148 | case RISCV::VSE32_V: |
149 | case RISCV::VSSE32_V: |
150 | return 32; |
151 | case RISCV::VLE64_V: |
152 | case RISCV::VLSE64_V: |
153 | case RISCV::VSE64_V: |
154 | case RISCV::VSSE64_V: |
155 | return 64; |
156 | } |
157 | } |
158 | |
159 | static bool isNonZeroLoadImmediate(MachineInstr &MI) { |
160 | return MI.getOpcode() == RISCV::ADDI && |
161 | MI.getOperand(i: 1).isReg() && MI.getOperand(i: 2).isImm() && |
162 | MI.getOperand(i: 1).getReg() == RISCV::X0 && |
163 | MI.getOperand(i: 2).getImm() != 0; |
164 | } |
165 | |
166 | /// Return true if this is an operation on mask registers. Note that |
167 | /// this includes both arithmetic/logical ops and load/store (vlm/vsm). |
168 | static bool isMaskRegOp(const MachineInstr &MI) { |
169 | if (!RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
170 | return false; |
171 | const unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm(); |
172 | // A Log2SEW of 0 is an operation on mask registers only. |
173 | return Log2SEW == 0; |
174 | } |
175 | |
176 | /// Return true if the inactive elements in the result are entirely undefined. |
177 | /// Note that this is different from "agnostic" as defined by the vector |
178 | /// specification. Agnostic requires each lane to either be undisturbed, or |
179 | /// take the value -1; no other value is allowed. |
180 | static bool hasUndefinedMergeOp(const MachineInstr &MI, |
181 | const MachineRegisterInfo &MRI) { |
182 | |
183 | unsigned UseOpIdx; |
184 | if (!MI.isRegTiedToUseOperand(DefOpIdx: 0, UseOpIdx: &UseOpIdx)) |
185 | // If there is no passthrough operand, then the pass through |
186 | // lanes are undefined. |
187 | return true; |
188 | |
189 | // If the tied operand is NoReg, an IMPLICIT_DEF, or a REG_SEQEUENCE whose |
190 | // operands are solely IMPLICIT_DEFS, then the pass through lanes are |
191 | // undefined. |
192 | const MachineOperand &UseMO = MI.getOperand(i: UseOpIdx); |
193 | if (UseMO.getReg() == RISCV::NoRegister) |
194 | return true; |
195 | |
196 | if (UseMO.isUndef()) |
197 | return true; |
198 | if (UseMO.getReg().isPhysical()) |
199 | return false; |
200 | |
201 | if (MachineInstr *UseMI = MRI.getVRegDef(Reg: UseMO.getReg())) { |
202 | if (UseMI->isImplicitDef()) |
203 | return true; |
204 | |
205 | if (UseMI->isRegSequence()) { |
206 | for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { |
207 | MachineInstr *SourceMI = MRI.getVRegDef(Reg: UseMI->getOperand(i).getReg()); |
208 | if (!SourceMI || !SourceMI->isImplicitDef()) |
209 | return false; |
210 | } |
211 | return true; |
212 | } |
213 | } |
214 | return false; |
215 | } |
216 | |
217 | /// Which subfields of VL or VTYPE have values we need to preserve? |
218 | struct DemandedFields { |
219 | // Some unknown property of VL is used. If demanded, must preserve entire |
220 | // value. |
221 | bool VLAny = false; |
222 | // Only zero vs non-zero is used. If demanded, can change non-zero values. |
223 | bool VLZeroness = false; |
224 | // What properties of SEW we need to preserve. |
225 | enum : uint8_t { |
226 | SEWEqual = 3, // The exact value of SEW needs to be preserved. |
227 | SEWGreaterThanOrEqual = 2, // SEW can be changed as long as it's greater |
228 | // than or equal to the original value. |
229 | SEWGreaterThanOrEqualAndLessThan64 = |
230 | 1, // SEW can be changed as long as it's greater |
231 | // than or equal to the original value, but must be less |
232 | // than 64. |
233 | SEWNone = 0 // We don't need to preserve SEW at all. |
234 | } SEW = SEWNone; |
235 | bool LMUL = false; |
236 | bool SEWLMULRatio = false; |
237 | bool TailPolicy = false; |
238 | bool MaskPolicy = false; |
239 | |
240 | // Return true if any part of VTYPE was used |
241 | bool usedVTYPE() const { |
242 | return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy; |
243 | } |
244 | |
245 | // Return true if any property of VL was used |
246 | bool usedVL() { |
247 | return VLAny || VLZeroness; |
248 | } |
249 | |
250 | // Mark all VTYPE subfields and properties as demanded |
251 | void demandVTYPE() { |
252 | SEW = SEWEqual; |
253 | LMUL = true; |
254 | SEWLMULRatio = true; |
255 | TailPolicy = true; |
256 | MaskPolicy = true; |
257 | } |
258 | |
259 | // Mark all VL properties as demanded |
260 | void demandVL() { |
261 | VLAny = true; |
262 | VLZeroness = true; |
263 | } |
264 | |
265 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
266 | /// Support for debugging, callable in GDB: V->dump() |
267 | LLVM_DUMP_METHOD void dump() const { |
268 | print(OS&: dbgs()); |
269 | dbgs() << "\n" ; |
270 | } |
271 | |
272 | /// Implement operator<<. |
273 | void print(raw_ostream &OS) const { |
274 | OS << "{" ; |
275 | OS << "VLAny=" << VLAny << ", " ; |
276 | OS << "VLZeroness=" << VLZeroness << ", " ; |
277 | OS << "SEW=" ; |
278 | switch (SEW) { |
279 | case SEWEqual: |
280 | OS << "SEWEqual" ; |
281 | break; |
282 | case SEWGreaterThanOrEqual: |
283 | OS << "SEWGreaterThanOrEqual" ; |
284 | break; |
285 | case SEWGreaterThanOrEqualAndLessThan64: |
286 | OS << "SEWGreaterThanOrEqualAndLessThan64" ; |
287 | break; |
288 | case SEWNone: |
289 | OS << "SEWNone" ; |
290 | break; |
291 | }; |
292 | OS << ", " ; |
293 | OS << "LMUL=" << LMUL << ", " ; |
294 | OS << "SEWLMULRatio=" << SEWLMULRatio << ", " ; |
295 | OS << "TailPolicy=" << TailPolicy << ", " ; |
296 | OS << "MaskPolicy=" << MaskPolicy; |
297 | OS << "}" ; |
298 | } |
299 | #endif |
300 | }; |
301 | |
302 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
303 | LLVM_ATTRIBUTE_USED |
304 | inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) { |
305 | DF.print(OS); |
306 | return OS; |
307 | } |
308 | #endif |
309 | |
310 | /// Return true if moving from CurVType to NewVType is |
311 | /// indistinguishable from the perspective of an instruction (or set |
312 | /// of instructions) which use only the Used subfields and properties. |
313 | static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType, |
314 | const DemandedFields &Used) { |
315 | switch (Used.SEW) { |
316 | case DemandedFields::SEWNone: |
317 | break; |
318 | case DemandedFields::SEWEqual: |
319 | if (RISCVVType::getSEW(VType: CurVType) != RISCVVType::getSEW(VType: NewVType)) |
320 | return false; |
321 | break; |
322 | case DemandedFields::SEWGreaterThanOrEqual: |
323 | if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType)) |
324 | return false; |
325 | break; |
326 | case DemandedFields::SEWGreaterThanOrEqualAndLessThan64: |
327 | if (RISCVVType::getSEW(VType: NewVType) < RISCVVType::getSEW(VType: CurVType) || |
328 | RISCVVType::getSEW(VType: NewVType) >= 64) |
329 | return false; |
330 | break; |
331 | } |
332 | |
333 | if (Used.LMUL && |
334 | RISCVVType::getVLMUL(VType: CurVType) != RISCVVType::getVLMUL(VType: NewVType)) |
335 | return false; |
336 | |
337 | if (Used.SEWLMULRatio) { |
338 | auto Ratio1 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: CurVType), |
339 | VLMul: RISCVVType::getVLMUL(VType: CurVType)); |
340 | auto Ratio2 = RISCVVType::getSEWLMULRatio(SEW: RISCVVType::getSEW(VType: NewVType), |
341 | VLMul: RISCVVType::getVLMUL(VType: NewVType)); |
342 | if (Ratio1 != Ratio2) |
343 | return false; |
344 | } |
345 | |
346 | if (Used.TailPolicy && RISCVVType::isTailAgnostic(VType: CurVType) != |
347 | RISCVVType::isTailAgnostic(VType: NewVType)) |
348 | return false; |
349 | if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(VType: CurVType) != |
350 | RISCVVType::isMaskAgnostic(VType: NewVType)) |
351 | return false; |
352 | return true; |
353 | } |
354 | |
355 | /// Return the fields and properties demanded by the provided instruction. |
356 | DemandedFields getDemanded(const MachineInstr &MI, |
357 | const MachineRegisterInfo *MRI, |
358 | const RISCVSubtarget *ST) { |
359 | // Warning: This function has to work on both the lowered (i.e. post |
360 | // emitVSETVLIs) and pre-lowering forms. The main implication of this is |
361 | // that it can't use the value of a SEW, VL, or Policy operand as they might |
362 | // be stale after lowering. |
363 | |
364 | // Most instructions don't use any of these subfeilds. |
365 | DemandedFields Res; |
366 | // Start conservative if registers are used |
367 | if (MI.isCall() || MI.isInlineAsm() || |
368 | MI.readsRegister(RISCV::Reg: VL, /*TRI=*/nullptr)) |
369 | Res.demandVL(); |
370 | if (MI.isCall() || MI.isInlineAsm() || |
371 | MI.readsRegister(RISCV::Reg: VTYPE, /*TRI=*/nullptr)) |
372 | Res.demandVTYPE(); |
373 | // Start conservative on the unlowered form too |
374 | uint64_t TSFlags = MI.getDesc().TSFlags; |
375 | if (RISCVII::hasSEWOp(TSFlags)) { |
376 | Res.demandVTYPE(); |
377 | if (RISCVII::hasVLOp(TSFlags)) |
378 | Res.demandVL(); |
379 | |
380 | // Behavior is independent of mask policy. |
381 | if (!RISCVII::usesMaskPolicy(TSFlags)) |
382 | Res.MaskPolicy = false; |
383 | } |
384 | |
385 | // Loads and stores with implicit EEW do not demand SEW or LMUL directly. |
386 | // They instead demand the ratio of the two which is used in computing |
387 | // EMUL, but which allows us the flexibility to change SEW and LMUL |
388 | // provided we don't change the ratio. |
389 | // Note: We assume that the instructions initial SEW is the EEW encoded |
390 | // in the opcode. This is asserted when constructing the VSETVLIInfo. |
391 | if (getEEWForLoadStore(MI)) { |
392 | Res.SEW = DemandedFields::SEWNone; |
393 | Res.LMUL = false; |
394 | } |
395 | |
396 | // Store instructions don't use the policy fields. |
397 | if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) { |
398 | Res.TailPolicy = false; |
399 | Res.MaskPolicy = false; |
400 | } |
401 | |
402 | // If this is a mask reg operation, it only cares about VLMAX. |
403 | // TODO: Possible extensions to this logic |
404 | // * Probably ok if available VLMax is larger than demanded |
405 | // * The policy bits can probably be ignored.. |
406 | if (isMaskRegOp(MI)) { |
407 | Res.SEW = DemandedFields::SEWNone; |
408 | Res.LMUL = false; |
409 | } |
410 | |
411 | // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0. |
412 | if (isScalarInsertInstr(MI)) { |
413 | Res.LMUL = false; |
414 | Res.SEWLMULRatio = false; |
415 | Res.VLAny = false; |
416 | // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't |
417 | // need to preserve any other bits and are thus compatible with any larger, |
418 | // etype and can disregard policy bits. Warning: It's tempting to try doing |
419 | // this for any tail agnostic operation, but we can't as TA requires |
420 | // tail lanes to either be the original value or -1. We are writing |
421 | // unknown bits to the lanes here. |
422 | if (hasUndefinedMergeOp(MI, MRI: *MRI)) { |
423 | if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
424 | Res.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
425 | else |
426 | Res.SEW = DemandedFields::SEWGreaterThanOrEqual; |
427 | Res.TailPolicy = false; |
428 | } |
429 | } |
430 | |
431 | // vmv.x.s, and vmv.f.s are unconditional and ignore everything except SEW. |
432 | if (isScalarExtractInstr(MI)) { |
433 | assert(!RISCVII::hasVLOp(TSFlags)); |
434 | Res.LMUL = false; |
435 | Res.SEWLMULRatio = false; |
436 | Res.TailPolicy = false; |
437 | Res.MaskPolicy = false; |
438 | } |
439 | |
440 | return Res; |
441 | } |
442 | |
443 | /// Defines the abstract state with which the forward dataflow models the |
444 | /// values of the VL and VTYPE registers after insertion. |
445 | class VSETVLIInfo { |
446 | union { |
447 | Register AVLReg; |
448 | unsigned AVLImm; |
449 | }; |
450 | |
451 | enum : uint8_t { |
452 | Uninitialized, |
453 | AVLIsReg, |
454 | AVLIsImm, |
455 | Unknown, |
456 | } State = Uninitialized; |
457 | |
458 | // Fields from VTYPE. |
459 | RISCVII::VLMUL VLMul = RISCVII::LMUL_1; |
460 | uint8_t SEW = 0; |
461 | uint8_t TailAgnostic : 1; |
462 | uint8_t MaskAgnostic : 1; |
463 | uint8_t SEWLMULRatioOnly : 1; |
464 | |
465 | public: |
466 | VSETVLIInfo() |
467 | : AVLImm(0), TailAgnostic(false), MaskAgnostic(false), |
468 | SEWLMULRatioOnly(false) {} |
469 | |
470 | static VSETVLIInfo getUnknown() { |
471 | VSETVLIInfo Info; |
472 | Info.setUnknown(); |
473 | return Info; |
474 | } |
475 | |
476 | bool isValid() const { return State != Uninitialized; } |
477 | void setUnknown() { State = Unknown; } |
478 | bool isUnknown() const { return State == Unknown; } |
479 | |
480 | void setAVLReg(Register Reg) { |
481 | assert(Reg.isVirtual() || Reg == RISCV::X0 || Reg == RISCV::NoRegister); |
482 | AVLReg = Reg; |
483 | State = AVLIsReg; |
484 | } |
485 | |
486 | void setAVLImm(unsigned Imm) { |
487 | AVLImm = Imm; |
488 | State = AVLIsImm; |
489 | } |
490 | |
491 | bool hasAVLImm() const { return State == AVLIsImm; } |
492 | bool hasAVLReg() const { return State == AVLIsReg; } |
493 | Register getAVLReg() const { |
494 | assert(hasAVLReg()); |
495 | return AVLReg; |
496 | } |
497 | unsigned getAVLImm() const { |
498 | assert(hasAVLImm()); |
499 | return AVLImm; |
500 | } |
501 | |
502 | void setAVL(VSETVLIInfo Info) { |
503 | assert(Info.isValid()); |
504 | if (Info.isUnknown()) |
505 | setUnknown(); |
506 | else if (Info.hasAVLReg()) |
507 | setAVLReg(Info.getAVLReg()); |
508 | else { |
509 | assert(Info.hasAVLImm()); |
510 | setAVLImm(Info.getAVLImm()); |
511 | } |
512 | } |
513 | |
514 | unsigned getSEW() const { return SEW; } |
515 | RISCVII::VLMUL getVLMUL() const { return VLMul; } |
516 | bool getTailAgnostic() const { return TailAgnostic; } |
517 | bool getMaskAgnostic() const { return MaskAgnostic; } |
518 | |
519 | bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const { |
520 | if (hasAVLImm()) |
521 | return getAVLImm() > 0; |
522 | if (hasAVLReg()) { |
523 | if (getAVLReg() == RISCV::X0) |
524 | return true; |
525 | if (MachineInstr *MI = MRI.getVRegDef(Reg: getAVLReg()); |
526 | MI && isNonZeroLoadImmediate(MI&: *MI)) |
527 | return true; |
528 | return false; |
529 | } |
530 | return false; |
531 | } |
532 | |
533 | bool hasEquallyZeroAVL(const VSETVLIInfo &Other, |
534 | const MachineRegisterInfo &MRI) const { |
535 | if (hasSameAVL(Other)) |
536 | return true; |
537 | return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI)); |
538 | } |
539 | |
540 | bool hasSameAVL(const VSETVLIInfo &Other) const { |
541 | if (hasAVLReg() && Other.hasAVLReg()) |
542 | return getAVLReg() == Other.getAVLReg(); |
543 | |
544 | if (hasAVLImm() && Other.hasAVLImm()) |
545 | return getAVLImm() == Other.getAVLImm(); |
546 | |
547 | return false; |
548 | } |
549 | |
550 | void setVTYPE(unsigned VType) { |
551 | assert(isValid() && !isUnknown() && |
552 | "Can't set VTYPE for uninitialized or unknown" ); |
553 | VLMul = RISCVVType::getVLMUL(VType); |
554 | SEW = RISCVVType::getSEW(VType); |
555 | TailAgnostic = RISCVVType::isTailAgnostic(VType); |
556 | MaskAgnostic = RISCVVType::isMaskAgnostic(VType); |
557 | } |
558 | void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) { |
559 | assert(isValid() && !isUnknown() && |
560 | "Can't set VTYPE for uninitialized or unknown" ); |
561 | VLMul = L; |
562 | SEW = S; |
563 | TailAgnostic = TA; |
564 | MaskAgnostic = MA; |
565 | } |
566 | |
567 | void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; } |
568 | |
569 | unsigned encodeVTYPE() const { |
570 | assert(isValid() && !isUnknown() && !SEWLMULRatioOnly && |
571 | "Can't encode VTYPE for uninitialized or unknown" ); |
572 | return RISCVVType::encodeVTYPE(VLMUL: VLMul, SEW, TailAgnostic, MaskAgnostic); |
573 | } |
574 | |
575 | bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; } |
576 | |
577 | bool hasSameVTYPE(const VSETVLIInfo &Other) const { |
578 | assert(isValid() && Other.isValid() && |
579 | "Can't compare invalid VSETVLIInfos" ); |
580 | assert(!isUnknown() && !Other.isUnknown() && |
581 | "Can't compare VTYPE in unknown state" ); |
582 | assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly && |
583 | "Can't compare when only LMUL/SEW ratio is valid." ); |
584 | return std::tie(args: VLMul, args: SEW, args: TailAgnostic, args: MaskAgnostic) == |
585 | std::tie(args: Other.VLMul, args: Other.SEW, args: Other.TailAgnostic, |
586 | args: Other.MaskAgnostic); |
587 | } |
588 | |
589 | unsigned getSEWLMULRatio() const { |
590 | assert(isValid() && !isUnknown() && |
591 | "Can't use VTYPE for uninitialized or unknown" ); |
592 | return RISCVVType::getSEWLMULRatio(SEW, VLMul); |
593 | } |
594 | |
595 | // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX. |
596 | // Note that having the same VLMAX ensures that both share the same |
597 | // function from AVL to VL; that is, they must produce the same VL value |
598 | // for any given AVL value. |
599 | bool hasSameVLMAX(const VSETVLIInfo &Other) const { |
600 | assert(isValid() && Other.isValid() && |
601 | "Can't compare invalid VSETVLIInfos" ); |
602 | assert(!isUnknown() && !Other.isUnknown() && |
603 | "Can't compare VTYPE in unknown state" ); |
604 | return getSEWLMULRatio() == Other.getSEWLMULRatio(); |
605 | } |
606 | |
607 | bool hasCompatibleVTYPE(const DemandedFields &Used, |
608 | const VSETVLIInfo &Require) const { |
609 | return areCompatibleVTYPEs(CurVType: Require.encodeVTYPE(), NewVType: encodeVTYPE(), Used); |
610 | } |
611 | |
612 | // Determine whether the vector instructions requirements represented by |
613 | // Require are compatible with the previous vsetvli instruction represented |
614 | // by this. MI is the instruction whose requirements we're considering. |
615 | bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require, |
616 | const MachineRegisterInfo &MRI) const { |
617 | assert(isValid() && Require.isValid() && |
618 | "Can't compare invalid VSETVLIInfos" ); |
619 | assert(!Require.SEWLMULRatioOnly && |
620 | "Expected a valid VTYPE for instruction!" ); |
621 | // Nothing is compatible with Unknown. |
622 | if (isUnknown() || Require.isUnknown()) |
623 | return false; |
624 | |
625 | // If only our VLMAX ratio is valid, then this isn't compatible. |
626 | if (SEWLMULRatioOnly) |
627 | return false; |
628 | |
629 | if (Used.VLAny && !(hasSameAVL(Other: Require) && hasSameVLMAX(Other: Require))) |
630 | return false; |
631 | |
632 | if (Used.VLZeroness && !hasEquallyZeroAVL(Other: Require, MRI)) |
633 | return false; |
634 | |
635 | return hasCompatibleVTYPE(Used, Require); |
636 | } |
637 | |
638 | bool operator==(const VSETVLIInfo &Other) const { |
639 | // Uninitialized is only equal to another Uninitialized. |
640 | if (!isValid()) |
641 | return !Other.isValid(); |
642 | if (!Other.isValid()) |
643 | return !isValid(); |
644 | |
645 | // Unknown is only equal to another Unknown. |
646 | if (isUnknown()) |
647 | return Other.isUnknown(); |
648 | if (Other.isUnknown()) |
649 | return isUnknown(); |
650 | |
651 | if (!hasSameAVL(Other)) |
652 | return false; |
653 | |
654 | // If the SEWLMULRatioOnly bits are different, then they aren't equal. |
655 | if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly) |
656 | return false; |
657 | |
658 | // If only the VLMAX is valid, check that it is the same. |
659 | if (SEWLMULRatioOnly) |
660 | return hasSameVLMAX(Other); |
661 | |
662 | // If the full VTYPE is valid, check that it is the same. |
663 | return hasSameVTYPE(Other); |
664 | } |
665 | |
666 | bool operator!=(const VSETVLIInfo &Other) const { |
667 | return !(*this == Other); |
668 | } |
669 | |
670 | // Calculate the VSETVLIInfo visible to a block assuming this and Other are |
671 | // both predecessors. |
672 | VSETVLIInfo intersect(const VSETVLIInfo &Other) const { |
673 | // If the new value isn't valid, ignore it. |
674 | if (!Other.isValid()) |
675 | return *this; |
676 | |
677 | // If this value isn't valid, this must be the first predecessor, use it. |
678 | if (!isValid()) |
679 | return Other; |
680 | |
681 | // If either is unknown, the result is unknown. |
682 | if (isUnknown() || Other.isUnknown()) |
683 | return VSETVLIInfo::getUnknown(); |
684 | |
685 | // If we have an exact, match return this. |
686 | if (*this == Other) |
687 | return *this; |
688 | |
689 | // Not an exact match, but maybe the AVL and VLMAX are the same. If so, |
690 | // return an SEW/LMUL ratio only value. |
691 | if (hasSameAVL(Other) && hasSameVLMAX(Other)) { |
692 | VSETVLIInfo MergeInfo = *this; |
693 | MergeInfo.SEWLMULRatioOnly = true; |
694 | return MergeInfo; |
695 | } |
696 | |
697 | // Otherwise the result is unknown. |
698 | return VSETVLIInfo::getUnknown(); |
699 | } |
700 | |
701 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
702 | /// Support for debugging, callable in GDB: V->dump() |
703 | LLVM_DUMP_METHOD void dump() const { |
704 | print(OS&: dbgs()); |
705 | dbgs() << "\n" ; |
706 | } |
707 | |
708 | /// Implement operator<<. |
709 | /// @{ |
710 | void print(raw_ostream &OS) const { |
711 | OS << "{" ; |
712 | if (!isValid()) |
713 | OS << "Uninitialized" ; |
714 | if (isUnknown()) |
715 | OS << "unknown" ; |
716 | if (hasAVLReg()) |
717 | OS << "AVLReg=" << (unsigned)AVLReg; |
718 | if (hasAVLImm()) |
719 | OS << "AVLImm=" << (unsigned)AVLImm; |
720 | OS << ", " |
721 | << "VLMul=" << (unsigned)VLMul << ", " |
722 | << "SEW=" << (unsigned)SEW << ", " |
723 | << "TailAgnostic=" << (bool)TailAgnostic << ", " |
724 | << "MaskAgnostic=" << (bool)MaskAgnostic << ", " |
725 | << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}" ; |
726 | } |
727 | #endif |
728 | }; |
729 | |
730 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
731 | LLVM_ATTRIBUTE_USED |
732 | inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) { |
733 | V.print(OS); |
734 | return OS; |
735 | } |
736 | #endif |
737 | |
738 | struct BlockData { |
739 | // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this |
740 | // block. Calculated in Phase 2. |
741 | VSETVLIInfo Exit; |
742 | |
743 | // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor |
744 | // blocks. Calculated in Phase 2, and used by Phase 3. |
745 | VSETVLIInfo Pred; |
746 | |
747 | // Keeps track of whether the block is already in the queue. |
748 | bool InQueue = false; |
749 | |
750 | BlockData() = default; |
751 | }; |
752 | |
753 | class RISCVInsertVSETVLI : public MachineFunctionPass { |
754 | const RISCVSubtarget *ST; |
755 | const TargetInstrInfo *TII; |
756 | MachineRegisterInfo *MRI; |
757 | |
758 | std::vector<BlockData> BlockInfo; |
759 | std::queue<const MachineBasicBlock *> WorkList; |
760 | |
761 | public: |
762 | static char ID; |
763 | |
764 | RISCVInsertVSETVLI() : MachineFunctionPass(ID) {} |
765 | bool runOnMachineFunction(MachineFunction &MF) override; |
766 | |
767 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
768 | AU.setPreservesCFG(); |
769 | MachineFunctionPass::getAnalysisUsage(AU); |
770 | } |
771 | |
772 | StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; } |
773 | |
774 | private: |
775 | bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require, |
776 | const VSETVLIInfo &CurInfo) const; |
777 | bool needVSETVLIPHI(const VSETVLIInfo &Require, |
778 | const MachineBasicBlock &MBB) const; |
779 | void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, |
780 | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); |
781 | void insertVSETVLI(MachineBasicBlock &MBB, |
782 | MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
783 | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo); |
784 | |
785 | void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) const; |
786 | void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) const; |
787 | bool computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
788 | VSETVLIInfo &Info) const; |
789 | void computeIncomingVLVTYPE(const MachineBasicBlock &MBB); |
790 | void emitVSETVLIs(MachineBasicBlock &MBB); |
791 | void doPRE(MachineBasicBlock &MBB); |
792 | void insertReadVL(MachineBasicBlock &MBB); |
793 | }; |
794 | |
795 | class RISCVCoalesceVSETVLI : public MachineFunctionPass { |
796 | public: |
797 | static char ID; |
798 | const RISCVSubtarget *ST; |
799 | const TargetInstrInfo *TII; |
800 | MachineRegisterInfo *MRI; |
801 | LiveIntervals *LIS; |
802 | |
803 | RISCVCoalesceVSETVLI() : MachineFunctionPass(ID) {} |
804 | bool runOnMachineFunction(MachineFunction &MF) override; |
805 | |
806 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
807 | AU.setPreservesCFG(); |
808 | |
809 | AU.addRequired<LiveIntervals>(); |
810 | AU.addPreserved<LiveIntervals>(); |
811 | AU.addRequired<SlotIndexes>(); |
812 | AU.addPreserved<SlotIndexes>(); |
813 | AU.addPreserved<LiveDebugVariables>(); |
814 | AU.addPreserved<LiveStacks>(); |
815 | |
816 | MachineFunctionPass::getAnalysisUsage(AU); |
817 | } |
818 | |
819 | StringRef getPassName() const override { return RISCV_COALESCE_VSETVLI_NAME; } |
820 | |
821 | private: |
822 | bool coalesceVSETVLIs(MachineBasicBlock &MBB); |
823 | }; |
824 | |
825 | } // end anonymous namespace |
826 | |
827 | char RISCVInsertVSETVLI::ID = 0; |
828 | |
829 | INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME, |
830 | false, false) |
831 | |
832 | char RISCVCoalesceVSETVLI::ID = 0; |
833 | |
834 | INITIALIZE_PASS(RISCVCoalesceVSETVLI, "riscv-coalesce-vsetvli" , |
835 | RISCV_COALESCE_VSETVLI_NAME, false, false) |
836 | |
837 | // Return a VSETVLIInfo representing the changes made by this VSETVLI or |
838 | // VSETIVLI instruction. |
839 | static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) { |
840 | VSETVLIInfo NewInfo; |
841 | if (MI.getOpcode() == RISCV::PseudoVSETIVLI) { |
842 | NewInfo.setAVLImm(MI.getOperand(i: 1).getImm()); |
843 | } else { |
844 | assert(MI.getOpcode() == RISCV::PseudoVSETVLI || |
845 | MI.getOpcode() == RISCV::PseudoVSETVLIX0); |
846 | Register AVLReg = MI.getOperand(i: 1).getReg(); |
847 | assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) && |
848 | "Can't handle X0, X0 vsetvli yet" ); |
849 | NewInfo.setAVLReg(AVLReg); |
850 | } |
851 | NewInfo.setVTYPE(MI.getOperand(i: 2).getImm()); |
852 | |
853 | return NewInfo; |
854 | } |
855 | |
856 | static unsigned computeVLMAX(unsigned VLEN, unsigned SEW, |
857 | RISCVII::VLMUL VLMul) { |
858 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: VLMul); |
859 | if (Fractional) |
860 | VLEN = VLEN / LMul; |
861 | else |
862 | VLEN = VLEN * LMul; |
863 | return VLEN/SEW; |
864 | } |
865 | |
866 | static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags, |
867 | const RISCVSubtarget &ST, |
868 | const MachineRegisterInfo *MRI) { |
869 | VSETVLIInfo InstrInfo; |
870 | |
871 | bool TailAgnostic = true; |
872 | bool MaskAgnostic = true; |
873 | if (!hasUndefinedMergeOp(MI, MRI: *MRI)) { |
874 | // Start with undisturbed. |
875 | TailAgnostic = false; |
876 | MaskAgnostic = false; |
877 | |
878 | // If there is a policy operand, use it. |
879 | if (RISCVII::hasVecPolicyOp(TSFlags)) { |
880 | const MachineOperand &Op = MI.getOperand(i: MI.getNumExplicitOperands() - 1); |
881 | uint64_t Policy = Op.getImm(); |
882 | assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) && |
883 | "Invalid Policy Value" ); |
884 | TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC; |
885 | MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC; |
886 | } |
887 | |
888 | // Some pseudo instructions force a tail agnostic policy despite having a |
889 | // tied def. |
890 | if (RISCVII::doesForceTailAgnostic(TSFlags)) |
891 | TailAgnostic = true; |
892 | |
893 | if (!RISCVII::usesMaskPolicy(TSFlags)) |
894 | MaskAgnostic = true; |
895 | } |
896 | |
897 | RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags); |
898 | |
899 | unsigned Log2SEW = MI.getOperand(i: getSEWOpNum(MI)).getImm(); |
900 | // A Log2SEW of 0 is an operation on mask registers only. |
901 | unsigned SEW = Log2SEW ? 1 << Log2SEW : 8; |
902 | assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW" ); |
903 | |
904 | if (RISCVII::hasVLOp(TSFlags)) { |
905 | const MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
906 | if (VLOp.isImm()) { |
907 | int64_t Imm = VLOp.getImm(); |
908 | // Conver the VLMax sentintel to X0 register. |
909 | if (Imm == RISCV::VLMaxSentinel) { |
910 | // If we know the exact VLEN, see if we can use the constant encoding |
911 | // for the VLMAX instead. This reduces register pressure slightly. |
912 | const unsigned VLMAX = computeVLMAX(VLEN: ST.getRealMaxVLen(), SEW, VLMul); |
913 | if (ST.getRealMinVLen() == ST.getRealMaxVLen() && VLMAX <= 31) |
914 | InstrInfo.setAVLImm(VLMAX); |
915 | else |
916 | InstrInfo.setAVLReg(RISCV::X0); |
917 | } |
918 | else |
919 | InstrInfo.setAVLImm(Imm); |
920 | } else { |
921 | InstrInfo.setAVLReg(VLOp.getReg()); |
922 | } |
923 | } else { |
924 | assert(isScalarExtractInstr(MI)); |
925 | InstrInfo.setAVLReg(RISCV::NoRegister); |
926 | } |
927 | #ifndef NDEBUG |
928 | if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) { |
929 | assert(SEW == EEW && "Initial SEW doesn't match expected EEW" ); |
930 | } |
931 | #endif |
932 | InstrInfo.setVTYPE(L: VLMul, S: SEW, TA: TailAgnostic, MA: MaskAgnostic); |
933 | |
934 | // If AVL is defined by a vsetvli with the same VLMAX, we can replace the |
935 | // AVL operand with the AVL of the defining vsetvli. We avoid general |
936 | // register AVLs to avoid extending live ranges without being sure we can |
937 | // kill the original source reg entirely. |
938 | if (InstrInfo.hasAVLReg() && InstrInfo.getAVLReg().isVirtual()) { |
939 | MachineInstr *DefMI = MRI->getVRegDef(Reg: InstrInfo.getAVLReg()); |
940 | if (DefMI && isVectorConfigInstr(MI: *DefMI)) { |
941 | VSETVLIInfo DefInstrInfo = getInfoForVSETVLI(MI: *DefMI); |
942 | if (DefInstrInfo.hasSameVLMAX(InstrInfo) && |
943 | (DefInstrInfo.hasAVLImm() || DefInstrInfo.getAVLReg() == RISCV::X0)) { |
944 | InstrInfo.setAVL(DefInstrInfo); |
945 | } |
946 | } |
947 | } |
948 | |
949 | return InstrInfo; |
950 | } |
951 | |
952 | void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI, |
953 | const VSETVLIInfo &Info, |
954 | const VSETVLIInfo &PrevInfo) { |
955 | DebugLoc DL = MI.getDebugLoc(); |
956 | insertVSETVLI(MBB, InsertPt: MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo); |
957 | } |
958 | |
959 | void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, |
960 | MachineBasicBlock::iterator InsertPt, DebugLoc DL, |
961 | const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) { |
962 | |
963 | ++NumInsertedVSETVL; |
964 | if (PrevInfo.isValid() && !PrevInfo.isUnknown()) { |
965 | // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same |
966 | // VLMAX. |
967 | if (Info.hasSameAVL(Other: PrevInfo) && Info.hasSameVLMAX(Other: PrevInfo)) { |
968 | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
969 | .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
970 | .addReg(RISCV::X0, RegState::Kill) |
971 | .addImm(Info.encodeVTYPE()) |
972 | .addReg(RISCV::VL, RegState::Implicit); |
973 | return; |
974 | } |
975 | |
976 | // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If |
977 | // it has the same VLMAX we want and the last VL/VTYPE we observed is the |
978 | // same, we can use the X0, X0 form. |
979 | if (Info.hasSameVLMAX(Other: PrevInfo) && Info.hasAVLReg() && |
980 | Info.getAVLReg().isVirtual()) { |
981 | if (MachineInstr *DefMI = MRI->getVRegDef(Reg: Info.getAVLReg())) { |
982 | if (isVectorConfigInstr(MI: *DefMI)) { |
983 | VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI); |
984 | if (DefInfo.hasSameAVL(Other: PrevInfo) && DefInfo.hasSameVLMAX(Other: PrevInfo)) { |
985 | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
986 | .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
987 | .addReg(RISCV::X0, RegState::Kill) |
988 | .addImm(Info.encodeVTYPE()) |
989 | .addReg(RISCV::VL, RegState::Implicit); |
990 | return; |
991 | } |
992 | } |
993 | } |
994 | } |
995 | } |
996 | |
997 | if (Info.hasAVLImm()) { |
998 | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) |
999 | .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
1000 | .addImm(Info.getAVLImm()) |
1001 | .addImm(Info.encodeVTYPE()); |
1002 | return; |
1003 | } |
1004 | |
1005 | Register AVLReg = Info.getAVLReg(); |
1006 | if (AVLReg == RISCV::NoRegister) { |
1007 | // We can only use x0, x0 if there's no chance of the vtype change causing |
1008 | // the previous vl to become invalid. |
1009 | if (PrevInfo.isValid() && !PrevInfo.isUnknown() && |
1010 | Info.hasSameVLMAX(Other: PrevInfo)) { |
1011 | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) |
1012 | .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
1013 | .addReg(RISCV::X0, RegState::Kill) |
1014 | .addImm(Info.encodeVTYPE()) |
1015 | .addReg(RISCV::VL, RegState::Implicit); |
1016 | return; |
1017 | } |
1018 | // Otherwise use an AVL of 1 to avoid depending on previous vl. |
1019 | BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) |
1020 | .addReg(RISCV::X0, RegState::Define | RegState::Dead) |
1021 | .addImm(1) |
1022 | .addImm(Info.encodeVTYPE()); |
1023 | return; |
1024 | } |
1025 | |
1026 | if (AVLReg.isVirtual()) |
1027 | MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass); |
1028 | |
1029 | // Use X0 as the DestReg unless AVLReg is X0. We also need to change the |
1030 | // opcode if the AVLReg is X0 as they have different register classes for |
1031 | // the AVL operand. |
1032 | Register DestReg = RISCV::X0; |
1033 | unsigned Opcode = RISCV::PseudoVSETVLI; |
1034 | if (AVLReg == RISCV::X0) { |
1035 | DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); |
1036 | Opcode = RISCV::PseudoVSETVLIX0; |
1037 | } |
1038 | BuildMI(BB&: MBB, I: InsertPt, MIMD: DL, MCID: TII->get(Opcode)) |
1039 | .addReg(RegNo: DestReg, flags: RegState::Define | RegState::Dead) |
1040 | .addReg(RegNo: AVLReg) |
1041 | .addImm(Val: Info.encodeVTYPE()); |
1042 | } |
1043 | |
1044 | static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) { |
1045 | auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL: LMUL); |
1046 | return Fractional || LMul == 1; |
1047 | } |
1048 | |
1049 | /// Return true if a VSETVLI is required to transition from CurInfo to Require |
1050 | /// before MI. |
1051 | bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI, |
1052 | const VSETVLIInfo &Require, |
1053 | const VSETVLIInfo &CurInfo) const { |
1054 | assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, *ST, MRI)); |
1055 | |
1056 | if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly()) |
1057 | return true; |
1058 | |
1059 | DemandedFields Used = getDemanded(MI, MRI, ST); |
1060 | |
1061 | // A slidedown/slideup with an *undefined* merge op can freely clobber |
1062 | // elements not copied from the source vector (e.g. masked off, tail, or |
1063 | // slideup's prefix). Notes: |
1064 | // * We can't modify SEW here since the slide amount is in units of SEW. |
1065 | // * VL=1 is special only because we have existing support for zero vs |
1066 | // non-zero VL. We could generalize this if we had a VL > C predicate. |
1067 | // * The LMUL1 restriction is for machines whose latency may depend on VL. |
1068 | // * As above, this is only legal for tail "undefined" not "agnostic". |
1069 | if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && |
1070 | isLMUL1OrSmaller(LMUL: CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, MRI: *MRI)) { |
1071 | Used.VLAny = false; |
1072 | Used.VLZeroness = true; |
1073 | Used.LMUL = false; |
1074 | Used.TailPolicy = false; |
1075 | } |
1076 | |
1077 | // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same |
1078 | // semantically as vmv.s.x. This is particularly useful since we don't have an |
1079 | // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place. |
1080 | // Since a splat is non-constant time in LMUL, we do need to be careful to not |
1081 | // increase the number of active vector registers (unlike for vmv.s.x.) |
1082 | if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 && |
1083 | isLMUL1OrSmaller(LMUL: CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, MRI: *MRI)) { |
1084 | Used.LMUL = false; |
1085 | Used.SEWLMULRatio = false; |
1086 | Used.VLAny = false; |
1087 | if (isFloatScalarMoveOrScalarSplatInstr(MI) && !ST->hasVInstructionsF64()) |
1088 | Used.SEW = DemandedFields::SEWGreaterThanOrEqualAndLessThan64; |
1089 | else |
1090 | Used.SEW = DemandedFields::SEWGreaterThanOrEqual; |
1091 | Used.TailPolicy = false; |
1092 | } |
1093 | |
1094 | if (CurInfo.isCompatible(Used, Require, MRI: *MRI)) |
1095 | return false; |
1096 | |
1097 | // We didn't find a compatible value. If our AVL is a virtual register, |
1098 | // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need |
1099 | // and the last VL/VTYPE we observed is the same, we don't need a |
1100 | // VSETVLI here. |
1101 | if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() && |
1102 | CurInfo.hasCompatibleVTYPE(Used, Require)) { |
1103 | if (MachineInstr *DefMI = MRI->getVRegDef(Reg: Require.getAVLReg())) { |
1104 | if (isVectorConfigInstr(MI: *DefMI)) { |
1105 | VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI); |
1106 | if (DefInfo.hasSameAVL(Other: CurInfo) && DefInfo.hasSameVLMAX(Other: CurInfo)) |
1107 | return false; |
1108 | } |
1109 | } |
1110 | } |
1111 | |
1112 | return true; |
1113 | } |
1114 | |
1115 | // If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we |
1116 | // maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more |
1117 | // places. |
1118 | static VSETVLIInfo adjustIncoming(VSETVLIInfo PrevInfo, VSETVLIInfo NewInfo, |
1119 | DemandedFields &Demanded) { |
1120 | VSETVLIInfo Info = NewInfo; |
1121 | |
1122 | if (!Demanded.LMUL && !Demanded.SEWLMULRatio && PrevInfo.isValid() && |
1123 | !PrevInfo.isUnknown()) { |
1124 | if (auto NewVLMul = RISCVVType::getSameRatioLMUL( |
1125 | SEW: PrevInfo.getSEW(), VLMUL: PrevInfo.getVLMUL(), EEW: Info.getSEW())) |
1126 | Info.setVLMul(*NewVLMul); |
1127 | Demanded.LMUL = true; |
1128 | } |
1129 | |
1130 | return Info; |
1131 | } |
1132 | |
1133 | // Given an incoming state reaching MI, minimally modifies that state so that it |
1134 | // is compatible with MI. The resulting state is guaranteed to be semantically |
1135 | // legal for MI, but may not be the state requested by MI. |
1136 | void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, |
1137 | const MachineInstr &MI) const { |
1138 | uint64_t TSFlags = MI.getDesc().TSFlags; |
1139 | if (!RISCVII::hasSEWOp(TSFlags)) |
1140 | return; |
1141 | |
1142 | const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, ST: *ST, MRI); |
1143 | assert(NewInfo.isValid() && !NewInfo.isUnknown()); |
1144 | if (Info.isValid() && !needVSETVLI(MI, Require: NewInfo, CurInfo: Info)) |
1145 | return; |
1146 | |
1147 | const VSETVLIInfo PrevInfo = Info; |
1148 | if (!Info.isValid() || Info.isUnknown()) |
1149 | Info = NewInfo; |
1150 | |
1151 | DemandedFields Demanded = getDemanded(MI, MRI, ST); |
1152 | const VSETVLIInfo IncomingInfo = adjustIncoming(PrevInfo, NewInfo, Demanded); |
1153 | |
1154 | // If MI only demands that VL has the same zeroness, we only need to set the |
1155 | // AVL if the zeroness differs. This removes a vsetvli entirely if the types |
1156 | // match or allows use of cheaper avl preserving variant if VLMAX doesn't |
1157 | // change. If VLMAX might change, we couldn't use the 'vsetvli x0, x0, vtype" |
1158 | // variant, so we avoid the transform to prevent extending live range of an |
1159 | // avl register operand. |
1160 | // TODO: We can probably relax this for immediates. |
1161 | bool EquallyZero = IncomingInfo.hasEquallyZeroAVL(Other: PrevInfo, MRI: *MRI) && |
1162 | IncomingInfo.hasSameVLMAX(Other: PrevInfo); |
1163 | if (Demanded.VLAny || (Demanded.VLZeroness && !EquallyZero)) |
1164 | Info.setAVL(IncomingInfo); |
1165 | |
1166 | Info.setVTYPE( |
1167 | L: ((Demanded.LMUL || Demanded.SEWLMULRatio) ? IncomingInfo : Info) |
1168 | .getVLMUL(), |
1169 | S: ((Demanded.SEW || Demanded.SEWLMULRatio) ? IncomingInfo : Info).getSEW(), |
1170 | // Prefer tail/mask agnostic since it can be relaxed to undisturbed later |
1171 | // if needed. |
1172 | TA: (Demanded.TailPolicy ? IncomingInfo : Info).getTailAgnostic() || |
1173 | IncomingInfo.getTailAgnostic(), |
1174 | MA: (Demanded.MaskPolicy ? IncomingInfo : Info).getMaskAgnostic() || |
1175 | IncomingInfo.getMaskAgnostic()); |
1176 | |
1177 | // If we only knew the sew/lmul ratio previously, replace the VTYPE but keep |
1178 | // the AVL. |
1179 | if (Info.hasSEWLMULRatioOnly()) { |
1180 | VSETVLIInfo RatiolessInfo = IncomingInfo; |
1181 | RatiolessInfo.setAVL(Info); |
1182 | Info = RatiolessInfo; |
1183 | } |
1184 | } |
1185 | |
1186 | // Given a state with which we evaluated MI (see transferBefore above for why |
1187 | // this might be different that the state MI requested), modify the state to |
1188 | // reflect the changes MI might make. |
1189 | void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, |
1190 | const MachineInstr &MI) const { |
1191 | if (isVectorConfigInstr(MI)) { |
1192 | Info = getInfoForVSETVLI(MI); |
1193 | return; |
1194 | } |
1195 | |
1196 | if (RISCV::isFaultFirstLoad(MI)) { |
1197 | // Update AVL to vl-output of the fault first load. |
1198 | Info.setAVLReg(MI.getOperand(i: 1).getReg()); |
1199 | return; |
1200 | } |
1201 | |
1202 | // If this is something that updates VL/VTYPE that we don't know about, set |
1203 | // the state to unknown. |
1204 | if (MI.isCall() || MI.isInlineAsm() || |
1205 | MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
1206 | MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
1207 | Info = VSETVLIInfo::getUnknown(); |
1208 | } |
1209 | |
1210 | bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB, |
1211 | VSETVLIInfo &Info) const { |
1212 | bool HadVectorOp = false; |
1213 | |
1214 | Info = BlockInfo[MBB.getNumber()].Pred; |
1215 | for (const MachineInstr &MI : MBB) { |
1216 | transferBefore(Info, MI); |
1217 | |
1218 | if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(TSFlags: MI.getDesc().TSFlags)) |
1219 | HadVectorOp = true; |
1220 | |
1221 | transferAfter(Info, MI); |
1222 | } |
1223 | |
1224 | return HadVectorOp; |
1225 | } |
1226 | |
1227 | void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) { |
1228 | |
1229 | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
1230 | |
1231 | BBInfo.InQueue = false; |
1232 | |
1233 | // Start with the previous entry so that we keep the most conservative state |
1234 | // we have ever found. |
1235 | VSETVLIInfo InInfo = BBInfo.Pred; |
1236 | if (MBB.pred_empty()) { |
1237 | // There are no predecessors, so use the default starting status. |
1238 | InInfo.setUnknown(); |
1239 | } else { |
1240 | for (MachineBasicBlock *P : MBB.predecessors()) |
1241 | InInfo = InInfo.intersect(Other: BlockInfo[P->getNumber()].Exit); |
1242 | } |
1243 | |
1244 | // If we don't have any valid predecessor value, wait until we do. |
1245 | if (!InInfo.isValid()) |
1246 | return; |
1247 | |
1248 | // If no change, no need to rerun block |
1249 | if (InInfo == BBInfo.Pred) |
1250 | return; |
1251 | |
1252 | BBInfo.Pred = InInfo; |
1253 | LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB) |
1254 | << " changed to " << BBInfo.Pred << "\n" ); |
1255 | |
1256 | // Note: It's tempting to cache the state changes here, but due to the |
1257 | // compatibility checks performed a blocks output state can change based on |
1258 | // the input state. To cache, we'd have to add logic for finding |
1259 | // never-compatible state changes. |
1260 | VSETVLIInfo TmpStatus; |
1261 | computeVLVTYPEChanges(MBB, Info&: TmpStatus); |
1262 | |
1263 | // If the new exit value matches the old exit value, we don't need to revisit |
1264 | // any blocks. |
1265 | if (BBInfo.Exit == TmpStatus) |
1266 | return; |
1267 | |
1268 | BBInfo.Exit = TmpStatus; |
1269 | LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB) |
1270 | << " changed to " << BBInfo.Exit << "\n" ); |
1271 | |
1272 | // Add the successors to the work list so we can propagate the changed exit |
1273 | // status. |
1274 | for (MachineBasicBlock *S : MBB.successors()) |
1275 | if (!BlockInfo[S->getNumber()].InQueue) { |
1276 | BlockInfo[S->getNumber()].InQueue = true; |
1277 | WorkList.push(x: S); |
1278 | } |
1279 | } |
1280 | |
1281 | // If we weren't able to prove a vsetvli was directly unneeded, it might still |
1282 | // be unneeded if the AVL is a phi node where all incoming values are VL |
1283 | // outputs from the last VSETVLI in their respective basic blocks. |
1284 | bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require, |
1285 | const MachineBasicBlock &MBB) const { |
1286 | if (DisableInsertVSETVLPHIOpt) |
1287 | return true; |
1288 | |
1289 | if (!Require.hasAVLReg()) |
1290 | return true; |
1291 | |
1292 | Register AVLReg = Require.getAVLReg(); |
1293 | if (!AVLReg.isVirtual()) |
1294 | return true; |
1295 | |
1296 | // We need the AVL to be produce by a PHI node in this basic block. |
1297 | MachineInstr *PHI = MRI->getVRegDef(Reg: AVLReg); |
1298 | if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB) |
1299 | return true; |
1300 | |
1301 | for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps; |
1302 | PHIOp += 2) { |
1303 | Register InReg = PHI->getOperand(i: PHIOp).getReg(); |
1304 | MachineBasicBlock *PBB = PHI->getOperand(i: PHIOp + 1).getMBB(); |
1305 | const BlockData &PBBInfo = BlockInfo[PBB->getNumber()]; |
1306 | // If the exit from the predecessor has the VTYPE we are looking for |
1307 | // we might be able to avoid a VSETVLI. |
1308 | if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Other: Require)) |
1309 | return true; |
1310 | |
1311 | // We need the PHI input to the be the output of a VSET(I)VLI. |
1312 | MachineInstr *DefMI = MRI->getVRegDef(Reg: InReg); |
1313 | if (!DefMI || !isVectorConfigInstr(MI: *DefMI)) |
1314 | return true; |
1315 | |
1316 | // We found a VSET(I)VLI make sure it matches the output of the |
1317 | // predecessor block. |
1318 | VSETVLIInfo DefInfo = getInfoForVSETVLI(MI: *DefMI); |
1319 | if (!DefInfo.hasSameAVL(Other: PBBInfo.Exit) || |
1320 | !DefInfo.hasSameVTYPE(Other: PBBInfo.Exit)) |
1321 | return true; |
1322 | } |
1323 | |
1324 | // If all the incoming values to the PHI checked out, we don't need |
1325 | // to insert a VSETVLI. |
1326 | return false; |
1327 | } |
1328 | |
1329 | void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) { |
1330 | VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred; |
1331 | // Track whether the prefix of the block we've scanned is transparent |
1332 | // (meaning has not yet changed the abstract state). |
1333 | bool PrefixTransparent = true; |
1334 | for (MachineInstr &MI : MBB) { |
1335 | const VSETVLIInfo PrevInfo = CurInfo; |
1336 | transferBefore(Info&: CurInfo, MI); |
1337 | |
1338 | // If this is an explicit VSETVLI or VSETIVLI, update our state. |
1339 | if (isVectorConfigInstr(MI)) { |
1340 | // Conservatively, mark the VL and VTYPE as live. |
1341 | assert(MI.getOperand(3).getReg() == RISCV::VL && |
1342 | MI.getOperand(4).getReg() == RISCV::VTYPE && |
1343 | "Unexpected operands where VL and VTYPE should be" ); |
1344 | MI.getOperand(i: 3).setIsDead(false); |
1345 | MI.getOperand(i: 4).setIsDead(false); |
1346 | PrefixTransparent = false; |
1347 | } |
1348 | |
1349 | uint64_t TSFlags = MI.getDesc().TSFlags; |
1350 | if (RISCVII::hasSEWOp(TSFlags)) { |
1351 | if (PrevInfo != CurInfo) { |
1352 | // If this is the first implicit state change, and the state change |
1353 | // requested can be proven to produce the same register contents, we |
1354 | // can skip emitting the actual state change and continue as if we |
1355 | // had since we know the GPR result of the implicit state change |
1356 | // wouldn't be used and VL/VTYPE registers are correct. Note that |
1357 | // we *do* need to model the state as if it changed as while the |
1358 | // register contents are unchanged, the abstract model can change. |
1359 | if (!PrefixTransparent || needVSETVLIPHI(Require: CurInfo, MBB)) |
1360 | insertVSETVLI(MBB, MI, Info: CurInfo, PrevInfo); |
1361 | PrefixTransparent = false; |
1362 | } |
1363 | |
1364 | if (RISCVII::hasVLOp(TSFlags)) { |
1365 | MachineOperand &VLOp = MI.getOperand(i: getVLOpNum(MI)); |
1366 | if (VLOp.isReg()) { |
1367 | Register Reg = VLOp.getReg(); |
1368 | MachineInstr *VLOpDef = MRI->getVRegDef(Reg); |
1369 | |
1370 | // Erase the AVL operand from the instruction. |
1371 | VLOp.setReg(RISCV::NoRegister); |
1372 | VLOp.setIsKill(false); |
1373 | |
1374 | // If the AVL was an immediate > 31, then it would have been emitted |
1375 | // as an ADDI. However, the ADDI might not have been used in the |
1376 | // vsetvli, or a vsetvli might not have been emitted, so it may be |
1377 | // dead now. |
1378 | if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg) && |
1379 | MRI->use_nodbg_empty(RegNo: Reg)) |
1380 | VLOpDef->eraseFromParent(); |
1381 | } |
1382 | MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false, |
1383 | /*isImp*/ true)); |
1384 | } |
1385 | MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false, |
1386 | /*isImp*/ true)); |
1387 | } |
1388 | |
1389 | if (MI.isCall() || MI.isInlineAsm() || |
1390 | MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
1391 | MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
1392 | PrefixTransparent = false; |
1393 | |
1394 | transferAfter(Info&: CurInfo, MI); |
1395 | } |
1396 | |
1397 | // If we reach the end of the block and our current info doesn't match the |
1398 | // expected info, insert a vsetvli to correct. |
1399 | if (!UseStrictAsserts) { |
1400 | const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit; |
1401 | if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() && |
1402 | CurInfo != ExitInfo) { |
1403 | // Note there's an implicit assumption here that terminators never use |
1404 | // or modify VL or VTYPE. Also, fallthrough will return end(). |
1405 | auto InsertPt = MBB.getFirstInstrTerminator(); |
1406 | insertVSETVLI(MBB, InsertPt, DL: MBB.findDebugLoc(MBBI: InsertPt), Info: ExitInfo, |
1407 | PrevInfo: CurInfo); |
1408 | CurInfo = ExitInfo; |
1409 | } |
1410 | } |
1411 | |
1412 | if (UseStrictAsserts && CurInfo.isValid()) { |
1413 | const auto &Info = BlockInfo[MBB.getNumber()]; |
1414 | if (CurInfo != Info.Exit) { |
1415 | LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n" ); |
1416 | LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n" ); |
1417 | LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n" ); |
1418 | LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n" ); |
1419 | } |
1420 | assert(CurInfo == Info.Exit && |
1421 | "InsertVSETVLI dataflow invariant violated" ); |
1422 | } |
1423 | } |
1424 | |
1425 | /// Perform simple partial redundancy elimination of the VSETVLI instructions |
1426 | /// we're about to insert by looking for cases where we can PRE from the |
1427 | /// beginning of one block to the end of one of its predecessors. Specifically, |
1428 | /// this is geared to catch the common case of a fixed length vsetvl in a single |
1429 | /// block loop when it could execute once in the preheader instead. |
1430 | void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { |
1431 | if (!BlockInfo[MBB.getNumber()].Pred.isUnknown()) |
1432 | return; |
1433 | |
1434 | MachineBasicBlock *UnavailablePred = nullptr; |
1435 | VSETVLIInfo AvailableInfo; |
1436 | for (MachineBasicBlock *P : MBB.predecessors()) { |
1437 | const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit; |
1438 | if (PredInfo.isUnknown()) { |
1439 | if (UnavailablePred) |
1440 | return; |
1441 | UnavailablePred = P; |
1442 | } else if (!AvailableInfo.isValid()) { |
1443 | AvailableInfo = PredInfo; |
1444 | } else if (AvailableInfo != PredInfo) { |
1445 | return; |
1446 | } |
1447 | } |
1448 | |
1449 | // Unreachable, single pred, or full redundancy. Note that FRE is handled by |
1450 | // phase 3. |
1451 | if (!UnavailablePred || !AvailableInfo.isValid()) |
1452 | return; |
1453 | |
1454 | // If we don't know the exact VTYPE, we can't copy the vsetvli to the exit of |
1455 | // the unavailable pred. |
1456 | if (AvailableInfo.hasSEWLMULRatioOnly()) |
1457 | return; |
1458 | |
1459 | // Critical edge - TODO: consider splitting? |
1460 | if (UnavailablePred->succ_size() != 1) |
1461 | return; |
1462 | |
1463 | // If the AVL value is a register (other than our VLMAX sentinel), |
1464 | // we need to prove the value is available at the point we're going |
1465 | // to insert the vsetvli at. |
1466 | if (AvailableInfo.hasAVLReg() && RISCV::X0 != AvailableInfo.getAVLReg()) { |
1467 | MachineInstr *AVLDefMI = MRI->getVRegDef(Reg: AvailableInfo.getAVLReg()); |
1468 | if (!AVLDefMI) |
1469 | return; |
1470 | // This is an inline dominance check which covers the case of |
1471 | // UnavailablePred being the preheader of a loop. |
1472 | if (AVLDefMI->getParent() != UnavailablePred) |
1473 | return; |
1474 | for (auto &TermMI : UnavailablePred->terminators()) |
1475 | if (&TermMI == AVLDefMI) |
1476 | return; |
1477 | } |
1478 | |
1479 | // Model the effect of changing the input state of the block MBB to |
1480 | // AvailableInfo. We're looking for two issues here; one legality, |
1481 | // one profitability. |
1482 | // 1) If the block doesn't use some of the fields from VL or VTYPE, we |
1483 | // may hit the end of the block with a different end state. We can |
1484 | // not make this change without reflowing later blocks as well. |
1485 | // 2) If we don't actually remove a transition, inserting a vsetvli |
1486 | // into the predecessor block would be correct, but unprofitable. |
1487 | VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred; |
1488 | VSETVLIInfo CurInfo = AvailableInfo; |
1489 | int TransitionsRemoved = 0; |
1490 | for (const MachineInstr &MI : MBB) { |
1491 | const VSETVLIInfo LastInfo = CurInfo; |
1492 | const VSETVLIInfo LastOldInfo = OldInfo; |
1493 | transferBefore(Info&: CurInfo, MI); |
1494 | transferBefore(Info&: OldInfo, MI); |
1495 | if (CurInfo == LastInfo) |
1496 | TransitionsRemoved++; |
1497 | if (LastOldInfo == OldInfo) |
1498 | TransitionsRemoved--; |
1499 | transferAfter(Info&: CurInfo, MI); |
1500 | transferAfter(Info&: OldInfo, MI); |
1501 | if (CurInfo == OldInfo) |
1502 | // Convergence. All transitions after this must match by construction. |
1503 | break; |
1504 | } |
1505 | if (CurInfo != OldInfo || TransitionsRemoved <= 0) |
1506 | // Issues 1 and 2 above |
1507 | return; |
1508 | |
1509 | // Finally, update both data flow state and insert the actual vsetvli. |
1510 | // Doing both keeps the code in sync with the dataflow results, which |
1511 | // is critical for correctness of phase 3. |
1512 | auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit; |
1513 | LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to " |
1514 | << UnavailablePred->getName() << " with state " |
1515 | << AvailableInfo << "\n" ); |
1516 | BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo; |
1517 | BlockInfo[MBB.getNumber()].Pred = AvailableInfo; |
1518 | |
1519 | // Note there's an implicit assumption here that terminators never use |
1520 | // or modify VL or VTYPE. Also, fallthrough will return end(). |
1521 | auto InsertPt = UnavailablePred->getFirstInstrTerminator(); |
1522 | insertVSETVLI(MBB&: *UnavailablePred, InsertPt, |
1523 | DL: UnavailablePred->findDebugLoc(MBBI: InsertPt), |
1524 | Info: AvailableInfo, PrevInfo: OldExit); |
1525 | } |
1526 | |
1527 | static void doUnion(DemandedFields &A, DemandedFields B) { |
1528 | A.VLAny |= B.VLAny; |
1529 | A.VLZeroness |= B.VLZeroness; |
1530 | A.SEW = std::max(a: A.SEW, b: B.SEW); |
1531 | A.LMUL |= B.LMUL; |
1532 | A.SEWLMULRatio |= B.SEWLMULRatio; |
1533 | A.TailPolicy |= B.TailPolicy; |
1534 | A.MaskPolicy |= B.MaskPolicy; |
1535 | } |
1536 | |
1537 | // Return true if we can mutate PrevMI to match MI without changing any the |
1538 | // fields which would be observed. |
1539 | static bool canMutatePriorConfig(const MachineInstr &PrevMI, |
1540 | const MachineInstr &MI, |
1541 | const DemandedFields &Used, |
1542 | const MachineRegisterInfo &MRI) { |
1543 | // If the VL values aren't equal, return false if either a) the former is |
1544 | // demanded, or b) we can't rewrite the former to be the later for |
1545 | // implementation reasons. |
1546 | if (!isVLPreservingConfig(MI)) { |
1547 | if (Used.VLAny) |
1548 | return false; |
1549 | |
1550 | if (Used.VLZeroness) { |
1551 | if (isVLPreservingConfig(MI: PrevMI)) |
1552 | return false; |
1553 | if (!getInfoForVSETVLI(MI: PrevMI).hasEquallyZeroAVL(Other: getInfoForVSETVLI(MI), |
1554 | MRI)) |
1555 | return false; |
1556 | } |
1557 | |
1558 | auto &AVL = MI.getOperand(i: 1); |
1559 | auto &PrevAVL = PrevMI.getOperand(i: 1); |
1560 | |
1561 | // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. |
1562 | // For now just check that PrevMI uses the same virtual register. |
1563 | if (AVL.isReg() && AVL.getReg() != RISCV::X0 && |
1564 | (!MRI.hasOneDef(AVL.getReg()) || !PrevAVL.isReg() || |
1565 | PrevAVL.getReg() != AVL.getReg())) |
1566 | return false; |
1567 | } |
1568 | |
1569 | assert(PrevMI.getOperand(2).isImm() && MI.getOperand(2).isImm()); |
1570 | auto PriorVType = PrevMI.getOperand(i: 2).getImm(); |
1571 | auto VType = MI.getOperand(i: 2).getImm(); |
1572 | return areCompatibleVTYPEs(CurVType: PriorVType, NewVType: VType, Used); |
1573 | } |
1574 | |
1575 | bool RISCVCoalesceVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) { |
1576 | MachineInstr *NextMI = nullptr; |
1577 | // We can have arbitrary code in successors, so VL and VTYPE |
1578 | // must be considered demanded. |
1579 | DemandedFields Used; |
1580 | Used.demandVL(); |
1581 | Used.demandVTYPE(); |
1582 | SmallVector<MachineInstr*> ToDelete; |
1583 | for (MachineInstr &MI : make_range(x: MBB.rbegin(), y: MBB.rend())) { |
1584 | |
1585 | if (!isVectorConfigInstr(MI)) { |
1586 | doUnion(A&: Used, B: getDemanded(MI, MRI, ST)); |
1587 | if (MI.isCall() || MI.isInlineAsm() || |
1588 | MI.modifiesRegister(RISCV::VL, /*TRI=*/nullptr) || |
1589 | MI.modifiesRegister(RISCV::VTYPE, /*TRI=*/nullptr)) |
1590 | NextMI = nullptr; |
1591 | continue; |
1592 | } |
1593 | |
1594 | Register RegDef = MI.getOperand(i: 0).getReg(); |
1595 | assert(RegDef == RISCV::X0 || RegDef.isVirtual()); |
1596 | if (RegDef != RISCV::X0 && !MRI->use_nodbg_empty(RegDef)) |
1597 | Used.demandVL(); |
1598 | |
1599 | if (NextMI) { |
1600 | if (!Used.usedVL() && !Used.usedVTYPE()) { |
1601 | ToDelete.push_back(Elt: &MI); |
1602 | // Leave NextMI unchanged |
1603 | continue; |
1604 | } |
1605 | |
1606 | if (canMutatePriorConfig(PrevMI: MI, MI: *NextMI, Used, MRI: *MRI)) { |
1607 | if (!isVLPreservingConfig(MI: *NextMI)) { |
1608 | Register DefReg = NextMI->getOperand(i: 0).getReg(); |
1609 | |
1610 | MI.getOperand(i: 0).setReg(DefReg); |
1611 | MI.getOperand(i: 0).setIsDead(false); |
1612 | |
1613 | // The def of DefReg moved to MI, so extend the LiveInterval up to |
1614 | // it. |
1615 | if (DefReg.isVirtual()) { |
1616 | LiveInterval &DefLI = LIS->getInterval(Reg: DefReg); |
1617 | SlotIndex MISlot = LIS->getInstructionIndex(Instr: MI).getRegSlot(); |
1618 | VNInfo *DefVNI = DefLI.getVNInfoAt(Idx: DefLI.beginIndex()); |
1619 | LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI); |
1620 | DefLI.addSegment(S); |
1621 | DefVNI->def = MISlot; |
1622 | // Mark DefLI as spillable if it was previously unspillable |
1623 | DefLI.setWeight(0); |
1624 | |
1625 | // DefReg may have had no uses, in which case we need to shrink |
1626 | // the LiveInterval up to MI. |
1627 | LIS->shrinkToUses(li: &DefLI); |
1628 | } |
1629 | |
1630 | Register OldVLReg; |
1631 | if (MI.getOperand(i: 1).isReg()) |
1632 | OldVLReg = MI.getOperand(i: 1).getReg(); |
1633 | if (NextMI->getOperand(i: 1).isImm()) |
1634 | MI.getOperand(i: 1).ChangeToImmediate(ImmVal: NextMI->getOperand(i: 1).getImm()); |
1635 | else |
1636 | MI.getOperand(i: 1).ChangeToRegister(Reg: NextMI->getOperand(i: 1).getReg(), isDef: false); |
1637 | |
1638 | // Clear NextMI's AVL early so we're not counting it as a use. |
1639 | if (NextMI->getOperand(1).isReg()) |
1640 | NextMI->getOperand(1).setReg(RISCV::NoRegister); |
1641 | |
1642 | if (OldVLReg && OldVLReg.isVirtual()) { |
1643 | // NextMI no longer uses OldVLReg so shrink its LiveInterval. |
1644 | LIS->shrinkToUses(li: &LIS->getInterval(Reg: OldVLReg)); |
1645 | |
1646 | MachineInstr *VLOpDef = MRI->getUniqueVRegDef(Reg: OldVLReg); |
1647 | if (VLOpDef && TII->isAddImmediate(MI: *VLOpDef, Reg: OldVLReg) && |
1648 | MRI->use_nodbg_empty(RegNo: OldVLReg)) { |
1649 | VLOpDef->eraseFromParent(); |
1650 | LIS->removeInterval(Reg: OldVLReg); |
1651 | } |
1652 | } |
1653 | MI.setDesc(NextMI->getDesc()); |
1654 | } |
1655 | MI.getOperand(i: 2).setImm(NextMI->getOperand(i: 2).getImm()); |
1656 | ToDelete.push_back(Elt: NextMI); |
1657 | // fallthrough |
1658 | } |
1659 | } |
1660 | NextMI = &MI; |
1661 | Used = getDemanded(MI, MRI, ST); |
1662 | } |
1663 | |
1664 | NumCoalescedVSETVL += ToDelete.size(); |
1665 | for (auto *MI : ToDelete) { |
1666 | LIS->RemoveMachineInstrFromMaps(MI&: *MI); |
1667 | MI->eraseFromParent(); |
1668 | } |
1669 | |
1670 | return !ToDelete.empty(); |
1671 | } |
1672 | |
1673 | void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) { |
1674 | for (auto I = MBB.begin(), E = MBB.end(); I != E;) { |
1675 | MachineInstr &MI = *I++; |
1676 | if (RISCV::isFaultFirstLoad(MI)) { |
1677 | Register VLOutput = MI.getOperand(i: 1).getReg(); |
1678 | if (!MRI->use_nodbg_empty(VLOutput)) |
1679 | BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL), |
1680 | VLOutput); |
1681 | // We don't use the vl output of the VLEFF/VLSEGFF anymore. |
1682 | MI.getOperand(1).setReg(RISCV::X0); |
1683 | } |
1684 | } |
1685 | } |
1686 | |
1687 | bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) { |
1688 | // Skip if the vector extension is not enabled. |
1689 | ST = &MF.getSubtarget<RISCVSubtarget>(); |
1690 | if (!ST->hasVInstructions()) |
1691 | return false; |
1692 | |
1693 | LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n" ); |
1694 | |
1695 | TII = ST->getInstrInfo(); |
1696 | MRI = &MF.getRegInfo(); |
1697 | |
1698 | assert(BlockInfo.empty() && "Expect empty block infos" ); |
1699 | BlockInfo.resize(new_size: MF.getNumBlockIDs()); |
1700 | |
1701 | bool HaveVectorOp = false; |
1702 | |
1703 | // Phase 1 - determine how VL/VTYPE are affected by the each block. |
1704 | for (const MachineBasicBlock &MBB : MF) { |
1705 | VSETVLIInfo TmpStatus; |
1706 | HaveVectorOp |= computeVLVTYPEChanges(MBB, Info&: TmpStatus); |
1707 | // Initial exit state is whatever change we found in the block. |
1708 | BlockData &BBInfo = BlockInfo[MBB.getNumber()]; |
1709 | BBInfo.Exit = TmpStatus; |
1710 | LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB) |
1711 | << " is " << BBInfo.Exit << "\n" ); |
1712 | |
1713 | } |
1714 | |
1715 | // If we didn't find any instructions that need VSETVLI, we're done. |
1716 | if (!HaveVectorOp) { |
1717 | BlockInfo.clear(); |
1718 | return false; |
1719 | } |
1720 | |
1721 | // Phase 2 - determine the exit VL/VTYPE from each block. We add all |
1722 | // blocks to the list here, but will also add any that need to be revisited |
1723 | // during Phase 2 processing. |
1724 | for (const MachineBasicBlock &MBB : MF) { |
1725 | WorkList.push(x: &MBB); |
1726 | BlockInfo[MBB.getNumber()].InQueue = true; |
1727 | } |
1728 | while (!WorkList.empty()) { |
1729 | const MachineBasicBlock &MBB = *WorkList.front(); |
1730 | WorkList.pop(); |
1731 | computeIncomingVLVTYPE(MBB); |
1732 | } |
1733 | |
1734 | // Perform partial redundancy elimination of vsetvli transitions. |
1735 | for (MachineBasicBlock &MBB : MF) |
1736 | doPRE(MBB); |
1737 | |
1738 | // Phase 3 - add any vsetvli instructions needed in the block. Use the |
1739 | // Phase 2 information to avoid adding vsetvlis before the first vector |
1740 | // instruction in the block if the VL/VTYPE is satisfied by its |
1741 | // predecessors. |
1742 | for (MachineBasicBlock &MBB : MF) |
1743 | emitVSETVLIs(MBB); |
1744 | |
1745 | // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output |
1746 | // of VLEFF/VLSEGFF. |
1747 | for (MachineBasicBlock &MBB : MF) |
1748 | insertReadVL(MBB); |
1749 | |
1750 | BlockInfo.clear(); |
1751 | return HaveVectorOp; |
1752 | } |
1753 | |
1754 | /// Returns an instance of the Insert VSETVLI pass. |
1755 | FunctionPass *llvm::createRISCVInsertVSETVLIPass() { |
1756 | return new RISCVInsertVSETVLI(); |
1757 | } |
1758 | |
1759 | // Now that all vsetvlis are explicit, go through and do block local |
1760 | // DSE and peephole based demanded fields based transforms. Note that |
1761 | // this *must* be done outside the main dataflow so long as we allow |
1762 | // any cross block analysis within the dataflow. We can't have both |
1763 | // demanded fields based mutation and non-local analysis in the |
1764 | // dataflow at the same time without introducing inconsistencies. |
1765 | bool RISCVCoalesceVSETVLI::runOnMachineFunction(MachineFunction &MF) { |
1766 | // Skip if the vector extension is not enabled. |
1767 | ST = &MF.getSubtarget<RISCVSubtarget>(); |
1768 | if (!ST->hasVInstructions()) |
1769 | return false; |
1770 | TII = ST->getInstrInfo(); |
1771 | MRI = &MF.getRegInfo(); |
1772 | LIS = &getAnalysis<LiveIntervals>(); |
1773 | |
1774 | bool Changed = false; |
1775 | for (MachineBasicBlock &MBB : MF) |
1776 | Changed |= coalesceVSETVLIs(MBB); |
1777 | |
1778 | return Changed; |
1779 | } |
1780 | |
1781 | FunctionPass *llvm::createRISCVCoalesceVSETVLIPass() { |
1782 | return new RISCVCoalesceVSETVLI(); |
1783 | } |
1784 | |