1//===- GCNVOPDUtils.cpp - GCN VOPD Utils ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file contains the AMDGPU DAG scheduling
10/// mutation to pair VOPD instructions back to back. It also contains
11// subroutines useful in the creation of VOPD instructions
12//
13//===----------------------------------------------------------------------===//
14
15#include "GCNVOPDUtils.h"
16#include "AMDGPUSubtarget.h"
17#include "GCNSubtarget.h"
18#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19#include "SIInstrInfo.h"
20#include "Utils/AMDGPUBaseInfo.h"
21#include "llvm/ADT/STLExtras.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineInstr.h"
25#include "llvm/CodeGen/MachineOperand.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/MacroFusion.h"
28#include "llvm/CodeGen/ScheduleDAG.h"
29#include "llvm/CodeGen/ScheduleDAGMutation.h"
30#include "llvm/CodeGen/TargetInstrInfo.h"
31#include "llvm/MC/MCInst.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "gcn-vopd-utils"
36
37bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
38 const MachineInstr &FirstMI,
39 const MachineInstr &SecondMI) {
40 namespace VOPD = AMDGPU::VOPD;
41
42 const MachineFunction *MF = FirstMI.getMF();
43 const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
44 const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(Val: ST.getRegisterInfo());
45 const MachineRegisterInfo &MRI = MF->getRegInfo();
46 // Literals also count against scalar bus limit
47 SmallVector<const MachineOperand *> UniqueLiterals;
48 auto addLiteral = [&](const MachineOperand &Op) {
49 for (auto &Literal : UniqueLiterals) {
50 if (Literal->isIdenticalTo(Other: Op))
51 return;
52 }
53 UniqueLiterals.push_back(Elt: &Op);
54 };
55 SmallVector<Register> UniqueScalarRegs;
56 assert([&]() -> bool {
57 for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
58 MII != FirstMI.getParent()->instr_end(); ++MII) {
59 if (&*MII == &SecondMI)
60 return true;
61 }
62 return false;
63 }() && "Expected FirstMI to precede SecondMI");
64 // Cannot pair dependent instructions
65 for (const auto &Use : SecondMI.uses())
66 if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
67 return false;
68
69 auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
70 const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;
71 const MachineOperand &Operand = MI.getOperand(i: OperandIdx);
72 if (Operand.isReg() && TRI->isVectorRegister(MRI, Reg: Operand.getReg()))
73 return Operand.getReg();
74 return Register();
75 };
76
77 auto InstInfo =
78 AMDGPU::getVOPDInstInfo(OpX: FirstMI.getDesc(), OpY: SecondMI.getDesc());
79
80 for (auto CompIdx : VOPD::COMPONENTS) {
81 const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;
82
83 const MachineOperand &Src0 = MI.getOperand(i: VOPD::Component::SRC0);
84 if (Src0.isReg()) {
85 if (!TRI->isVectorRegister(MRI, Reg: Src0.getReg())) {
86 if (!is_contained(Range&: UniqueScalarRegs, Element: Src0.getReg()))
87 UniqueScalarRegs.push_back(Elt: Src0.getReg());
88 }
89 } else {
90 if (!TII.isInlineConstant(MI, OpIdx: VOPD::Component::SRC0))
91 addLiteral(Src0);
92 }
93
94 if (InstInfo[CompIdx].hasMandatoryLiteral()) {
95 auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();
96 addLiteral(MI.getOperand(i: CompOprIdx));
97 }
98 if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::Reg: VCC))
99 UniqueScalarRegs.push_back(AMDGPU::Elt: VCC_LO);
100 }
101
102 if (UniqueLiterals.size() > 1)
103 return false;
104 if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
105 return false;
106
107 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
108 bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&
109 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&
110 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;
111
112 if (InstInfo.hasInvalidOperand(GetRegIdx: getVRegIdx, SkipSrc))
113 return false;
114
115 LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
116 << "\n\tY: " << SecondMI << "\n");
117 return true;
118}
119
120/// Check if the instr pair, FirstMI and SecondMI, should be scheduled
121/// together. Given SecondMI, when FirstMI is unspecified, then check if
122/// SecondMI may be part of a fused pair at all.
123static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
124 const TargetSubtargetInfo &TSI,
125 const MachineInstr *FirstMI,
126 const MachineInstr &SecondMI) {
127 const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
128 unsigned Opc2 = SecondMI.getOpcode();
129 auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc: Opc2);
130
131 // One instruction case
132 if (!FirstMI)
133 return SecondCanBeVOPD.Y;
134
135 unsigned Opc = FirstMI->getOpcode();
136 auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
137
138 if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
139 (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
140 return false;
141
142 return checkVOPDRegConstraints(TII: STII, FirstMI: *FirstMI, SecondMI);
143}
144
145namespace {
146/// Adapts design from MacroFusion
147/// Puts valid candidate instructions back-to-back so they can easily
148/// be turned into VOPD instructions
149/// Greedily pairs instruction candidates. O(n^2) algorithm.
150struct VOPDPairingMutation : ScheduleDAGMutation {
151 MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer
152
153 VOPDPairingMutation(
154 MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer
155 : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
156
157 void apply(ScheduleDAGInstrs *DAG) override {
158 const TargetInstrInfo &TII = *DAG->TII;
159 const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
160 if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
161 LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
162 return;
163 }
164
165 std::vector<SUnit>::iterator ISUI, JSUI;
166 for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
167 const MachineInstr *IMI = ISUI->getInstr();
168 if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
169 continue;
170 if (!hasLessThanNumFused(SU: *ISUI, FuseLimit: 2))
171 continue;
172
173 for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
174 if (JSUI->isBoundaryNode())
175 continue;
176 const MachineInstr *JMI = JSUI->getInstr();
177 if (!hasLessThanNumFused(SU: *JSUI, FuseLimit: 2) ||
178 !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
179 continue;
180 if (fuseInstructionPair(DAG&: *DAG, FirstSU&: *ISUI, SecondSU&: *JSUI))
181 break;
182 }
183 }
184 LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
185 }
186};
187} // namespace
188
189std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
190 return std::make_unique<VOPDPairingMutation>(args&: shouldScheduleVOPDAdjacent);
191}
192

source code of llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp