1 | //===- R600MergeVectorRegisters.cpp ---------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// This pass merges inputs of swizzeable instructions into vector sharing |
11 | /// common data and/or have enough undef subreg using swizzle abilities. |
12 | /// |
13 | /// For instance let's consider the following pseudo code : |
14 | /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 |
15 | /// ... |
16 | /// %7 = REG_SEQ %1, sub0, %3, sub1, undef, sub2, %4, sub3 |
17 | /// (swizzable Inst) %7, SwizzleMask : sub0, sub1, sub2, sub3 |
18 | /// |
19 | /// is turned into : |
20 | /// %5 = REG_SEQ %1, sub0, %2, sub1, %3, sub2, undef, sub3 |
21 | /// ... |
22 | /// %7 = INSERT_SUBREG %4, sub3 |
23 | /// (swizzable Inst) %7, SwizzleMask : sub0, sub2, sub1, sub3 |
24 | /// |
25 | /// This allow regalloc to reduce register pressure for vector registers and |
26 | /// to reduce MOV count. |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include "MCTargetDesc/R600MCTargetDesc.h" |
30 | #include "R600.h" |
31 | #include "R600Defines.h" |
32 | #include "R600Subtarget.h" |
33 | #include "llvm/CodeGen/MachineDominators.h" |
34 | #include "llvm/CodeGen/MachineLoopInfo.h" |
35 | |
36 | using namespace llvm; |
37 | |
38 | #define DEBUG_TYPE "vec-merger" |
39 | |
40 | static bool isImplicitlyDef(MachineRegisterInfo &MRI, Register Reg) { |
41 | if (Reg.isPhysical()) |
42 | return false; |
43 | const MachineInstr *MI = MRI.getUniqueVRegDef(Reg); |
44 | return MI && MI->isImplicitDef(); |
45 | } |
46 | |
47 | namespace { |
48 | |
49 | class RegSeqInfo { |
50 | public: |
51 | MachineInstr *Instr; |
52 | DenseMap<Register, unsigned> RegToChan; |
53 | std::vector<Register> UndefReg; |
54 | |
55 | RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { |
56 | assert(MI->getOpcode() == R600::REG_SEQUENCE); |
57 | for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { |
58 | MachineOperand &MO = Instr->getOperand(i); |
59 | unsigned Chan = Instr->getOperand(i: i + 1).getImm(); |
60 | if (isImplicitlyDef(MRI, Reg: MO.getReg())) |
61 | UndefReg.push_back(x: Chan); |
62 | else |
63 | RegToChan[MO.getReg()] = Chan; |
64 | } |
65 | } |
66 | |
67 | RegSeqInfo() = default; |
68 | |
69 | bool operator==(const RegSeqInfo &RSI) const { |
70 | return RSI.Instr == Instr; |
71 | } |
72 | }; |
73 | |
74 | class R600VectorRegMerger : public MachineFunctionPass { |
75 | private: |
76 | using InstructionSetMap = DenseMap<unsigned, std::vector<MachineInstr *>>; |
77 | |
78 | MachineRegisterInfo *MRI; |
79 | const R600InstrInfo *TII = nullptr; |
80 | DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; |
81 | InstructionSetMap PreviousRegSeqByReg; |
82 | InstructionSetMap PreviousRegSeqByUndefCount; |
83 | |
84 | bool canSwizzle(const MachineInstr &MI) const; |
85 | bool areAllUsesSwizzeable(Register Reg) const; |
86 | void SwizzleInput(MachineInstr &, |
87 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; |
88 | bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge, |
89 | std::vector<std::pair<unsigned, unsigned>> &Remap) const; |
90 | bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, |
91 | std::vector<std::pair<unsigned, unsigned>> &RemapChan); |
92 | bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, |
93 | std::vector<std::pair<unsigned, unsigned>> &RemapChan); |
94 | MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec, |
95 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const; |
96 | void RemoveMI(MachineInstr *); |
97 | void trackRSI(const RegSeqInfo &RSI); |
98 | |
99 | public: |
100 | static char ID; |
101 | |
102 | R600VectorRegMerger() : MachineFunctionPass(ID) {} |
103 | |
104 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
105 | AU.setPreservesCFG(); |
106 | AU.addRequired<MachineDominatorTree>(); |
107 | AU.addPreserved<MachineDominatorTree>(); |
108 | AU.addRequired<MachineLoopInfo>(); |
109 | AU.addPreserved<MachineLoopInfo>(); |
110 | MachineFunctionPass::getAnalysisUsage(AU); |
111 | } |
112 | |
113 | MachineFunctionProperties getRequiredProperties() const override { |
114 | return MachineFunctionProperties() |
115 | .set(MachineFunctionProperties::Property::IsSSA); |
116 | } |
117 | |
118 | StringRef getPassName() const override { |
119 | return "R600 Vector Registers Merge Pass" ; |
120 | } |
121 | |
122 | bool runOnMachineFunction(MachineFunction &Fn) override; |
123 | }; |
124 | |
125 | } // end anonymous namespace |
126 | |
127 | INITIALIZE_PASS_BEGIN(R600VectorRegMerger, DEBUG_TYPE, |
128 | "R600 Vector Reg Merger" , false, false) |
129 | INITIALIZE_PASS_END(R600VectorRegMerger, DEBUG_TYPE, |
130 | "R600 Vector Reg Merger" , false, false) |
131 | |
132 | char R600VectorRegMerger::ID = 0; |
133 | |
134 | char &llvm::R600VectorRegMergerID = R600VectorRegMerger::ID; |
135 | |
136 | bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) |
137 | const { |
138 | if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) |
139 | return true; |
140 | switch (MI.getOpcode()) { |
141 | case R600::R600_ExportSwz: |
142 | case R600::EG_ExportSwz: |
143 | return true; |
144 | default: |
145 | return false; |
146 | } |
147 | } |
148 | |
149 | bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, |
150 | RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap) |
151 | const { |
152 | unsigned CurrentUndexIdx = 0; |
153 | for (auto &It : ToMerge->RegToChan) { |
154 | DenseMap<Register, unsigned>::const_iterator PosInUntouched = |
155 | Untouched->RegToChan.find(Val: It.first); |
156 | if (PosInUntouched != Untouched->RegToChan.end()) { |
157 | Remap.push_back( |
158 | x: std::pair<unsigned, unsigned>(It.second, (*PosInUntouched).second)); |
159 | continue; |
160 | } |
161 | if (CurrentUndexIdx >= Untouched->UndefReg.size()) |
162 | return false; |
163 | Remap.push_back(x: std::pair<unsigned, unsigned>( |
164 | It.second, Untouched->UndefReg[CurrentUndexIdx++])); |
165 | } |
166 | |
167 | return true; |
168 | } |
169 | |
170 | static |
171 | unsigned getReassignedChan( |
172 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan, |
173 | unsigned Chan) { |
174 | for (const auto &J : RemapChan) { |
175 | if (J.first == Chan) |
176 | return J.second; |
177 | } |
178 | llvm_unreachable("Chan wasn't reassigned" ); |
179 | } |
180 | |
181 | MachineInstr *R600VectorRegMerger::RebuildVector( |
182 | RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, |
183 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { |
184 | Register Reg = RSI->Instr->getOperand(i: 0).getReg(); |
185 | MachineBasicBlock::iterator Pos = RSI->Instr; |
186 | MachineBasicBlock &MBB = *Pos->getParent(); |
187 | DebugLoc DL = Pos->getDebugLoc(); |
188 | |
189 | Register SrcVec = BaseRSI->Instr->getOperand(i: 0).getReg(); |
190 | DenseMap<Register, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; |
191 | std::vector<Register> UpdatedUndef = BaseRSI->UndefReg; |
192 | for (const auto &It : RSI->RegToChan) { |
193 | Register DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass); |
194 | unsigned SubReg = It.first; |
195 | unsigned Swizzle = It.second; |
196 | unsigned Chan = getReassignedChan(RemapChan, Chan: Swizzle); |
197 | |
198 | MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG), |
199 | DstReg) |
200 | .addReg(SrcVec) |
201 | .addReg(SubReg) |
202 | .addImm(Chan); |
203 | UpdatedRegToChan[SubReg] = Chan; |
204 | std::vector<Register>::iterator ChanPos = llvm::find(Range&: UpdatedUndef, Val: Chan); |
205 | if (ChanPos != UpdatedUndef.end()) |
206 | UpdatedUndef.erase(position: ChanPos); |
207 | assert(!is_contained(UpdatedUndef, Chan) && |
208 | "UpdatedUndef shouldn't contain Chan more than once!" ); |
209 | LLVM_DEBUG(dbgs() << " ->" ; Tmp->dump();); |
210 | (void)Tmp; |
211 | SrcVec = DstReg; |
212 | } |
213 | MachineInstr *NewMI = |
214 | BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec); |
215 | LLVM_DEBUG(dbgs() << " ->" ; NewMI->dump();); |
216 | |
217 | LLVM_DEBUG(dbgs() << " Updating Swizzle:\n" ); |
218 | for (MachineRegisterInfo::use_instr_iterator It = MRI->use_instr_begin(RegNo: Reg), |
219 | E = MRI->use_instr_end(); It != E; ++It) { |
220 | LLVM_DEBUG(dbgs() << " " ; (*It).dump(); dbgs() << " ->" ); |
221 | SwizzleInput(*It, RemapChan); |
222 | LLVM_DEBUG((*It).dump()); |
223 | } |
224 | RSI->Instr->eraseFromParent(); |
225 | |
226 | // Update RSI |
227 | RSI->Instr = NewMI; |
228 | RSI->RegToChan = UpdatedRegToChan; |
229 | RSI->UndefReg = UpdatedUndef; |
230 | |
231 | return NewMI; |
232 | } |
233 | |
234 | void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { |
235 | for (auto &It : PreviousRegSeqByReg) { |
236 | std::vector<MachineInstr *> &MIs = It.second; |
237 | MIs.erase(first: llvm::find(Range&: MIs, Val: MI), last: MIs.end()); |
238 | } |
239 | for (auto &It : PreviousRegSeqByUndefCount) { |
240 | std::vector<MachineInstr *> &MIs = It.second; |
241 | MIs.erase(first: llvm::find(Range&: MIs, Val: MI), last: MIs.end()); |
242 | } |
243 | } |
244 | |
245 | void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, |
246 | const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const { |
247 | unsigned Offset; |
248 | if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) |
249 | Offset = 2; |
250 | else |
251 | Offset = 3; |
252 | for (unsigned i = 0; i < 4; i++) { |
253 | unsigned Swizzle = MI.getOperand(i: i + Offset).getImm() + 1; |
254 | for (const auto &J : RemapChan) { |
255 | if (J.first == Swizzle) { |
256 | MI.getOperand(i: i + Offset).setImm(J.second - 1); |
257 | break; |
258 | } |
259 | } |
260 | } |
261 | } |
262 | |
263 | bool R600VectorRegMerger::areAllUsesSwizzeable(Register Reg) const { |
264 | return llvm::all_of(Range: MRI->use_instructions(Reg), |
265 | P: [&](const MachineInstr &MI) { return canSwizzle(MI); }); |
266 | } |
267 | |
268 | bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, |
269 | RegSeqInfo &CompatibleRSI, |
270 | std::vector<std::pair<unsigned, unsigned>> &RemapChan) { |
271 | for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), |
272 | MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { |
273 | if (!MOp->isReg()) |
274 | continue; |
275 | if (PreviousRegSeqByReg[MOp->getReg()].empty()) |
276 | continue; |
277 | for (MachineInstr *MI : PreviousRegSeqByReg[MOp->getReg()]) { |
278 | CompatibleRSI = PreviousRegSeq[MI]; |
279 | if (RSI == CompatibleRSI) |
280 | continue; |
281 | if (tryMergeVector(Untouched: &CompatibleRSI, ToMerge: &RSI, Remap&: RemapChan)) |
282 | return true; |
283 | } |
284 | } |
285 | return false; |
286 | } |
287 | |
288 | bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, |
289 | RegSeqInfo &CompatibleRSI, |
290 | std::vector<std::pair<unsigned, unsigned>> &RemapChan) { |
291 | unsigned NeededUndefs = 4 - RSI.UndefReg.size(); |
292 | if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) |
293 | return false; |
294 | std::vector<MachineInstr *> &MIs = |
295 | PreviousRegSeqByUndefCount[NeededUndefs]; |
296 | CompatibleRSI = PreviousRegSeq[MIs.back()]; |
297 | tryMergeVector(Untouched: &CompatibleRSI, ToMerge: &RSI, Remap&: RemapChan); |
298 | return true; |
299 | } |
300 | |
301 | void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { |
302 | for (DenseMap<Register, unsigned>::const_iterator |
303 | It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { |
304 | PreviousRegSeqByReg[(*It).first].push_back(x: RSI.Instr); |
305 | } |
306 | PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(x: RSI.Instr); |
307 | PreviousRegSeq[RSI.Instr] = RSI; |
308 | } |
309 | |
310 | bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { |
311 | if (skipFunction(F: Fn.getFunction())) |
312 | return false; |
313 | |
314 | const R600Subtarget &ST = Fn.getSubtarget<R600Subtarget>(); |
315 | TII = ST.getInstrInfo(); |
316 | MRI = &Fn.getRegInfo(); |
317 | |
318 | for (MachineBasicBlock &MB : Fn) { |
319 | PreviousRegSeq.clear(); |
320 | PreviousRegSeqByReg.clear(); |
321 | PreviousRegSeqByUndefCount.clear(); |
322 | |
323 | for (MachineBasicBlock::iterator MII = MB.begin(), MIIE = MB.end(); |
324 | MII != MIIE; ++MII) { |
325 | MachineInstr &MI = *MII; |
326 | if (MI.getOpcode() != R600::REG_SEQUENCE) { |
327 | if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { |
328 | Register Reg = MI.getOperand(i: 1).getReg(); |
329 | for (MachineRegisterInfo::def_instr_iterator |
330 | It = MRI->def_instr_begin(RegNo: Reg), E = MRI->def_instr_end(); |
331 | It != E; ++It) { |
332 | RemoveMI(MI: &(*It)); |
333 | } |
334 | } |
335 | continue; |
336 | } |
337 | |
338 | RegSeqInfo RSI(*MRI, &MI); |
339 | |
340 | // All uses of MI are swizzeable ? |
341 | Register Reg = MI.getOperand(i: 0).getReg(); |
342 | if (!areAllUsesSwizzeable(Reg)) |
343 | continue; |
344 | |
345 | LLVM_DEBUG({ |
346 | dbgs() << "Trying to optimize " ; |
347 | MI.dump(); |
348 | }); |
349 | |
350 | RegSeqInfo CandidateRSI; |
351 | std::vector<std::pair<unsigned, unsigned>> RemapChan; |
352 | LLVM_DEBUG(dbgs() << "Using common slots...\n" ;); |
353 | if (tryMergeUsingCommonSlot(RSI, CompatibleRSI&: CandidateRSI, RemapChan)) { |
354 | // Remove CandidateRSI mapping |
355 | RemoveMI(MI: CandidateRSI.Instr); |
356 | MII = RebuildVector(RSI: &RSI, BaseRSI: &CandidateRSI, RemapChan); |
357 | trackRSI(RSI); |
358 | continue; |
359 | } |
360 | LLVM_DEBUG(dbgs() << "Using free slots...\n" ;); |
361 | RemapChan.clear(); |
362 | if (tryMergeUsingFreeSlot(RSI, CompatibleRSI&: CandidateRSI, RemapChan)) { |
363 | RemoveMI(MI: CandidateRSI.Instr); |
364 | MII = RebuildVector(RSI: &RSI, BaseRSI: &CandidateRSI, RemapChan); |
365 | trackRSI(RSI); |
366 | continue; |
367 | } |
368 | //Failed to merge |
369 | trackRSI(RSI); |
370 | } |
371 | } |
372 | return false; |
373 | } |
374 | |
375 | llvm::FunctionPass *llvm::createR600VectorRegMerger() { |
376 | return new R600VectorRegMerger(); |
377 | } |
378 | |