1//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative manner.
11/// This pass is merging consecutive CFAlus where applicable.
12/// It needs to be called after IfCvt for best results.
13//===----------------------------------------------------------------------===//
14
15#include "MCTargetDesc/R600MCTargetDesc.h"
16#include "R600.h"
17#include "R600Subtarget.h"
18#include "llvm/CodeGen/MachineFunctionPass.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "r600mergeclause"
23
24namespace {
25
26static bool isCFAlu(const MachineInstr &MI) {
27 switch (MI.getOpcode()) {
28 case R600::CF_ALU:
29 case R600::CF_ALU_PUSH_BEFORE:
30 return true;
31 default:
32 return false;
33 }
34}
35
36class R600ClauseMergePass : public MachineFunctionPass {
37
38private:
39 const R600InstrInfo *TII;
40
41 unsigned getCFAluSize(const MachineInstr &MI) const;
42 bool isCFAluEnabled(const MachineInstr &MI) const;
43
44 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
45 /// removed and their content affected to the previous alu clause.
46 /// This function parse instructions after CFAlu until it find a disabled
47 /// CFAlu and merge the content, or an enabled CFAlu.
48 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
49
50 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
51 /// it is the case.
52 bool mergeIfPossible(MachineInstr &RootCFAlu,
53 const MachineInstr &LatrCFAlu) const;
54
55public:
56 static char ID;
57
58 R600ClauseMergePass() : MachineFunctionPass(ID) { }
59
60 bool runOnMachineFunction(MachineFunction &MF) override;
61
62 StringRef getPassName() const override;
63};
64
65} // end anonymous namespace
66
67INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
68 "R600 Clause Merge", false, false)
69INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
70 "R600 Clause Merge", false, false)
71
72char R600ClauseMergePass::ID = 0;
73
74char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
75
76unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
77 assert(isCFAlu(MI));
78 return MI
79 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
80 .getImm();
81}
82
83bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
84 assert(isCFAlu(MI));
85 return MI
86 .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
87 .getImm();
88}
89
90void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
91 MachineInstr &CFAlu) const {
92 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
93 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
94 I++;
95 do {
96 while (I != E && !isCFAlu(MI: *I))
97 I++;
98 if (I == E)
99 return;
100 MachineInstr &MI = *I++;
101 if (isCFAluEnabled(MI))
102 break;
103 CFAlu.getOperand(i: CntIdx).setImm(getCFAluSize(MI: CFAlu) + getCFAluSize(MI));
104 MI.eraseFromParent();
105 } while (I != E);
106}
107
108bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
109 const MachineInstr &LatrCFAlu) const {
110 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
111 int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
112 unsigned RootInstCount = getCFAluSize(MI: RootCFAlu),
113 LaterInstCount = getCFAluSize(MI: LatrCFAlu);
114 unsigned CumuledInsts = RootInstCount + LaterInstCount;
115 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
116 LLVM_DEBUG(dbgs() << "Excess inst counts\n");
117 return false;
118 }
119 if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
120 return false;
121 // Is KCache Bank 0 compatible ?
122 int Mode0Idx =
123 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
124 int KBank0Idx =
125 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
126 int KBank0LineIdx =
127 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
128 if (LatrCFAlu.getOperand(i: Mode0Idx).getImm() &&
129 RootCFAlu.getOperand(i: Mode0Idx).getImm() &&
130 (LatrCFAlu.getOperand(i: KBank0Idx).getImm() !=
131 RootCFAlu.getOperand(i: KBank0Idx).getImm() ||
132 LatrCFAlu.getOperand(i: KBank0LineIdx).getImm() !=
133 RootCFAlu.getOperand(i: KBank0LineIdx).getImm())) {
134 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
135 return false;
136 }
137 // Is KCache Bank 1 compatible ?
138 int Mode1Idx =
139 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
140 int KBank1Idx =
141 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
142 int KBank1LineIdx =
143 TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
144 if (LatrCFAlu.getOperand(i: Mode1Idx).getImm() &&
145 RootCFAlu.getOperand(i: Mode1Idx).getImm() &&
146 (LatrCFAlu.getOperand(i: KBank1Idx).getImm() !=
147 RootCFAlu.getOperand(i: KBank1Idx).getImm() ||
148 LatrCFAlu.getOperand(i: KBank1LineIdx).getImm() !=
149 RootCFAlu.getOperand(i: KBank1LineIdx).getImm())) {
150 LLVM_DEBUG(dbgs() << "Wrong KC0\n");
151 return false;
152 }
153 if (LatrCFAlu.getOperand(i: Mode0Idx).getImm()) {
154 RootCFAlu.getOperand(i: Mode0Idx).setImm(
155 LatrCFAlu.getOperand(i: Mode0Idx).getImm());
156 RootCFAlu.getOperand(i: KBank0Idx).setImm(
157 LatrCFAlu.getOperand(i: KBank0Idx).getImm());
158 RootCFAlu.getOperand(i: KBank0LineIdx)
159 .setImm(LatrCFAlu.getOperand(i: KBank0LineIdx).getImm());
160 }
161 if (LatrCFAlu.getOperand(i: Mode1Idx).getImm()) {
162 RootCFAlu.getOperand(i: Mode1Idx).setImm(
163 LatrCFAlu.getOperand(i: Mode1Idx).getImm());
164 RootCFAlu.getOperand(i: KBank1Idx).setImm(
165 LatrCFAlu.getOperand(i: KBank1Idx).getImm());
166 RootCFAlu.getOperand(i: KBank1LineIdx)
167 .setImm(LatrCFAlu.getOperand(i: KBank1LineIdx).getImm());
168 }
169 RootCFAlu.getOperand(i: CntIdx).setImm(CumuledInsts);
170 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
171 return true;
172}
173
174bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
175 if (skipFunction(F: MF.getFunction()))
176 return false;
177
178 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
179 TII = ST.getInstrInfo();
180
181 for (MachineBasicBlock &MBB : MF) {
182 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
183 MachineBasicBlock::iterator LatestCFAlu = E;
184 while (I != E) {
185 MachineInstr &MI = *I++;
186 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187 TII->mustBeLastInClause(Opcode: MI.getOpcode()))
188 LatestCFAlu = E;
189 if (!isCFAlu(MI))
190 continue;
191 cleanPotentialDisabledCFAlu(CFAlu&: MI);
192
193 if (LatestCFAlu != E && mergeIfPossible(RootCFAlu&: *LatestCFAlu, LatrCFAlu: MI)) {
194 MI.eraseFromParent();
195 } else {
196 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197 LatestCFAlu = MI;
198 }
199 }
200 }
201 return false;
202}
203
204StringRef R600ClauseMergePass::getPassName() const {
205 return "R600 Merge Clause Markers Pass";
206}
207
208llvm::FunctionPass *llvm::createR600ClauseMergePass() {
209 return new R600ClauseMergePass();
210}
211

source code of llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp