1 | //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This utility class duplicates basic blocks ending in unconditional branches |
10 | // into the tails of their predecessors. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/CodeGen/TailDuplicator.h" |
15 | #include "llvm/ADT/DenseMap.h" |
16 | #include "llvm/ADT/DenseSet.h" |
17 | #include "llvm/ADT/STLExtras.h" |
18 | #include "llvm/ADT/SetVector.h" |
19 | #include "llvm/ADT/SmallPtrSet.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/Statistic.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" |
24 | #include "llvm/CodeGen/MachineFunction.h" |
25 | #include "llvm/CodeGen/MachineInstr.h" |
26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | #include "llvm/CodeGen/MachineOperand.h" |
28 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | #include "llvm/CodeGen/MachineSSAUpdater.h" |
30 | #include "llvm/CodeGen/MachineSizeOpts.h" |
31 | #include "llvm/CodeGen/TargetInstrInfo.h" |
32 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
33 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
34 | #include "llvm/IR/DebugLoc.h" |
35 | #include "llvm/IR/Function.h" |
36 | #include "llvm/Support/CommandLine.h" |
37 | #include "llvm/Support/Debug.h" |
38 | #include "llvm/Support/ErrorHandling.h" |
39 | #include "llvm/Support/raw_ostream.h" |
40 | #include "llvm/Target/TargetMachine.h" |
41 | #include <algorithm> |
42 | #include <cassert> |
43 | #include <iterator> |
44 | #include <utility> |
45 | |
46 | using namespace llvm; |
47 | |
48 | #define DEBUG_TYPE "tailduplication" |
49 | |
50 | STATISTIC(NumTails, "Number of tails duplicated" ); |
51 | STATISTIC(NumTailDups, "Number of tail duplicated blocks" ); |
52 | STATISTIC(NumTailDupAdded, |
53 | "Number of instructions added due to tail duplication" ); |
54 | STATISTIC(NumTailDupRemoved, |
55 | "Number of instructions removed due to tail duplication" ); |
56 | STATISTIC(NumDeadBlocks, "Number of dead blocks removed" ); |
57 | STATISTIC(NumAddedPHIs, "Number of phis added" ); |
58 | |
59 | // Heuristic for tail duplication. |
60 | static cl::opt<unsigned> TailDuplicateSize( |
61 | "tail-dup-size" , |
62 | cl::desc("Maximum instructions to consider tail duplicating" ), cl::init(Val: 2), |
63 | cl::Hidden); |
64 | |
65 | static cl::opt<unsigned> TailDupIndirectBranchSize( |
66 | "tail-dup-indirect-size" , |
67 | cl::desc("Maximum instructions to consider tail duplicating blocks that " |
68 | "end with indirect branches." ), cl::init(Val: 20), |
69 | cl::Hidden); |
70 | |
71 | static cl::opt<unsigned> |
72 | TailDupPredSize("tail-dup-pred-size" , |
73 | cl::desc("Maximum predecessors (maximum successors at the " |
74 | "same time) to consider tail duplicating blocks." ), |
75 | cl::init(Val: 16), cl::Hidden); |
76 | |
77 | static cl::opt<unsigned> |
78 | TailDupSuccSize("tail-dup-succ-size" , |
79 | cl::desc("Maximum successors (maximum predecessors at the " |
80 | "same time) to consider tail duplicating blocks." ), |
81 | cl::init(Val: 16), cl::Hidden); |
82 | |
83 | static cl::opt<bool> |
84 | TailDupVerify("tail-dup-verify" , |
85 | cl::desc("Verify sanity of PHI instructions during taildup" ), |
86 | cl::init(Val: false), cl::Hidden); |
87 | |
88 | static cl::opt<unsigned> TailDupLimit("tail-dup-limit" , cl::init(Val: ~0U), |
89 | cl::Hidden); |
90 | |
91 | void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, |
92 | const MachineBranchProbabilityInfo *MBPIin, |
93 | MBFIWrapper *MBFIin, |
94 | ProfileSummaryInfo *PSIin, |
95 | bool LayoutModeIn, unsigned TailDupSizeIn) { |
96 | MF = &MFin; |
97 | TII = MF->getSubtarget().getInstrInfo(); |
98 | TRI = MF->getSubtarget().getRegisterInfo(); |
99 | MRI = &MF->getRegInfo(); |
100 | MMI = &MF->getMMI(); |
101 | MBPI = MBPIin; |
102 | MBFI = MBFIin; |
103 | PSI = PSIin; |
104 | TailDupSize = TailDupSizeIn; |
105 | |
106 | assert(MBPI != nullptr && "Machine Branch Probability Info required" ); |
107 | |
108 | LayoutMode = LayoutModeIn; |
109 | this->PreRegAlloc = PreRegAlloc; |
110 | } |
111 | |
112 | static void VerifyPHIs(MachineFunction &MF, bool ) { |
113 | for (MachineBasicBlock &MBB : llvm::drop_begin(RangeOrContainer&: MF)) { |
114 | SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(), |
115 | MBB.pred_end()); |
116 | MachineBasicBlock::iterator MI = MBB.begin(); |
117 | while (MI != MBB.end()) { |
118 | if (!MI->isPHI()) |
119 | break; |
120 | for (MachineBasicBlock *PredBB : Preds) { |
121 | bool Found = false; |
122 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { |
123 | MachineBasicBlock *PHIBB = MI->getOperand(i: i + 1).getMBB(); |
124 | if (PHIBB == PredBB) { |
125 | Found = true; |
126 | break; |
127 | } |
128 | } |
129 | if (!Found) { |
130 | dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": " |
131 | << *MI; |
132 | dbgs() << " missing input from predecessor " |
133 | << printMBBReference(MBB: *PredBB) << '\n'; |
134 | llvm_unreachable(nullptr); |
135 | } |
136 | } |
137 | |
138 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { |
139 | MachineBasicBlock *PHIBB = MI->getOperand(i: i + 1).getMBB(); |
140 | if (CheckExtra && !Preds.count(key: PHIBB)) { |
141 | dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB) |
142 | << ": " << *MI; |
143 | dbgs() << " extra input from predecessor " |
144 | << printMBBReference(MBB: *PHIBB) << '\n'; |
145 | llvm_unreachable(nullptr); |
146 | } |
147 | if (PHIBB->getNumber() < 0) { |
148 | dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": " |
149 | << *MI; |
150 | dbgs() << " non-existing " << printMBBReference(MBB: *PHIBB) << '\n'; |
151 | llvm_unreachable(nullptr); |
152 | } |
153 | } |
154 | ++MI; |
155 | } |
156 | } |
157 | } |
158 | |
159 | /// Tail duplicate the block and cleanup. |
160 | /// \p IsSimple - return value of isSimpleBB |
161 | /// \p MBB - block to be duplicated |
162 | /// \p ForcedLayoutPred - If non-null, treat this block as the layout |
163 | /// predecessor, instead of using the ordering in MF |
164 | /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of |
165 | /// all Preds that received a copy of \p MBB. |
166 | /// \p RemovalCallback - if non-null, called just before MBB is deleted. |
167 | bool TailDuplicator::tailDuplicateAndUpdate( |
168 | bool IsSimple, MachineBasicBlock *MBB, |
169 | MachineBasicBlock *ForcedLayoutPred, |
170 | SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, |
171 | function_ref<void(MachineBasicBlock *)> *RemovalCallback, |
172 | SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) { |
173 | // Save the successors list. |
174 | SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), |
175 | MBB->succ_end()); |
176 | |
177 | SmallVector<MachineBasicBlock *, 8> TDBBs; |
178 | SmallVector<MachineInstr *, 16> Copies; |
179 | if (!tailDuplicate(IsSimple, TailBB: MBB, ForcedLayoutPred, |
180 | TDBBs, Copies, CandidatePtr)) |
181 | return false; |
182 | |
183 | ++NumTails; |
184 | |
185 | SmallVector<MachineInstr *, 8> NewPHIs; |
186 | MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); |
187 | |
188 | // TailBB's immediate successors are now successors of those predecessors |
189 | // which duplicated TailBB. Add the predecessors as sources to the PHI |
190 | // instructions. |
191 | bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); |
192 | if (PreRegAlloc) |
193 | updateSuccessorsPHIs(FromBB: MBB, isDead, TDBBs, Succs); |
194 | |
195 | // If it is dead, remove it. |
196 | if (isDead) { |
197 | NumTailDupRemoved += MBB->size(); |
198 | removeDeadBlock(MBB, RemovalCallback); |
199 | ++NumDeadBlocks; |
200 | } |
201 | |
202 | // Update SSA form. |
203 | if (!SSAUpdateVRs.empty()) { |
204 | for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { |
205 | unsigned VReg = SSAUpdateVRs[i]; |
206 | SSAUpdate.Initialize(V: VReg); |
207 | |
208 | // If the original definition is still around, add it as an available |
209 | // value. |
210 | MachineInstr *DefMI = MRI->getVRegDef(Reg: VReg); |
211 | MachineBasicBlock *DefBB = nullptr; |
212 | if (DefMI) { |
213 | DefBB = DefMI->getParent(); |
214 | SSAUpdate.AddAvailableValue(BB: DefBB, V: VReg); |
215 | } |
216 | |
217 | // Add the new vregs as available values. |
218 | DenseMap<Register, AvailableValsTy>::iterator LI = |
219 | SSAUpdateVals.find(Val: VReg); |
220 | for (std::pair<MachineBasicBlock *, Register> &J : LI->second) { |
221 | MachineBasicBlock *SrcBB = J.first; |
222 | Register SrcReg = J.second; |
223 | SSAUpdate.AddAvailableValue(BB: SrcBB, V: SrcReg); |
224 | } |
225 | |
226 | SmallVector<MachineOperand *> DebugUses; |
227 | // Rewrite uses that are outside of the original def's block. |
228 | for (MachineOperand &UseMO : |
229 | llvm::make_early_inc_range(Range: MRI->use_operands(Reg: VReg))) { |
230 | MachineInstr *UseMI = UseMO.getParent(); |
231 | // Rewrite debug uses last so that they can take advantage of any |
232 | // register mappings introduced by other users in its BB, since we |
233 | // cannot create new register definitions specifically for the debug |
234 | // instruction (as debug instructions should not affect CodeGen). |
235 | if (UseMI->isDebugValue()) { |
236 | DebugUses.push_back(Elt: &UseMO); |
237 | continue; |
238 | } |
239 | if (UseMI->getParent() == DefBB && !UseMI->isPHI()) |
240 | continue; |
241 | SSAUpdate.RewriteUse(U&: UseMO); |
242 | } |
243 | for (auto *UseMO : DebugUses) { |
244 | MachineInstr *UseMI = UseMO->getParent(); |
245 | UseMO->setReg( |
246 | SSAUpdate.GetValueInMiddleOfBlock(BB: UseMI->getParent(), ExistingValueOnly: true)); |
247 | } |
248 | } |
249 | |
250 | SSAUpdateVRs.clear(); |
251 | SSAUpdateVals.clear(); |
252 | } |
253 | |
254 | // Eliminate some of the copies inserted by tail duplication to maintain |
255 | // SSA form. |
256 | for (unsigned i = 0, e = Copies.size(); i != e; ++i) { |
257 | MachineInstr *Copy = Copies[i]; |
258 | if (!Copy->isCopy()) |
259 | continue; |
260 | Register Dst = Copy->getOperand(i: 0).getReg(); |
261 | Register Src = Copy->getOperand(i: 1).getReg(); |
262 | if (MRI->hasOneNonDBGUse(RegNo: Src) && |
263 | MRI->constrainRegClass(Reg: Src, RC: MRI->getRegClass(Reg: Dst))) { |
264 | // Copy is the only use. Do trivial copy propagation here. |
265 | MRI->replaceRegWith(FromReg: Dst, ToReg: Src); |
266 | Copy->eraseFromParent(); |
267 | } |
268 | } |
269 | |
270 | if (NewPHIs.size()) |
271 | NumAddedPHIs += NewPHIs.size(); |
272 | |
273 | if (DuplicatedPreds) |
274 | *DuplicatedPreds = std::move(TDBBs); |
275 | |
276 | return true; |
277 | } |
278 | |
279 | /// Look for small blocks that are unconditionally branched to and do not fall |
280 | /// through. Tail-duplicate their instructions into their predecessors to |
281 | /// eliminate (dynamic) branches. |
282 | bool TailDuplicator::tailDuplicateBlocks() { |
283 | bool MadeChange = false; |
284 | |
285 | if (PreRegAlloc && TailDupVerify) { |
286 | LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n" ); |
287 | VerifyPHIs(MF&: *MF, CheckExtra: true); |
288 | } |
289 | |
290 | for (MachineBasicBlock &MBB : |
291 | llvm::make_early_inc_range(Range: llvm::drop_begin(RangeOrContainer&: *MF))) { |
292 | if (NumTails == TailDupLimit) |
293 | break; |
294 | |
295 | bool IsSimple = isSimpleBB(TailBB: &MBB); |
296 | |
297 | if (!shouldTailDuplicate(IsSimple, TailBB&: MBB)) |
298 | continue; |
299 | |
300 | MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB: &MBB, ForcedLayoutPred: nullptr); |
301 | } |
302 | |
303 | if (PreRegAlloc && TailDupVerify) |
304 | VerifyPHIs(MF&: *MF, CheckExtra: false); |
305 | |
306 | return MadeChange; |
307 | } |
308 | |
309 | static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB, |
310 | const MachineRegisterInfo *MRI) { |
311 | for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { |
312 | if (UseMI.isDebugValue()) |
313 | continue; |
314 | if (UseMI.getParent() != BB) |
315 | return true; |
316 | } |
317 | return false; |
318 | } |
319 | |
320 | static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { |
321 | for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) |
322 | if (MI->getOperand(i: i + 1).getMBB() == SrcBB) |
323 | return i; |
324 | return 0; |
325 | } |
326 | |
327 | // Remember which registers are used by phis in this block. This is |
328 | // used to determine which registers are liveout while modifying the |
329 | // block (which is why we need to copy the information). |
330 | static void getRegsUsedByPHIs(const MachineBasicBlock &BB, |
331 | DenseSet<Register> *UsedByPhi) { |
332 | for (const auto &MI : BB) { |
333 | if (!MI.isPHI()) |
334 | break; |
335 | for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { |
336 | Register SrcReg = MI.getOperand(i).getReg(); |
337 | UsedByPhi->insert(V: SrcReg); |
338 | } |
339 | } |
340 | } |
341 | |
342 | /// Add a definition and source virtual registers pair for SSA update. |
343 | void TailDuplicator::addSSAUpdateEntry(Register OrigReg, Register NewReg, |
344 | MachineBasicBlock *BB) { |
345 | DenseMap<Register, AvailableValsTy>::iterator LI = |
346 | SSAUpdateVals.find(Val: OrigReg); |
347 | if (LI != SSAUpdateVals.end()) |
348 | LI->second.push_back(x: std::make_pair(x&: BB, y&: NewReg)); |
349 | else { |
350 | AvailableValsTy Vals; |
351 | Vals.push_back(x: std::make_pair(x&: BB, y&: NewReg)); |
352 | SSAUpdateVals.insert(KV: std::make_pair(x&: OrigReg, y&: Vals)); |
353 | SSAUpdateVRs.push_back(Elt: OrigReg); |
354 | } |
355 | } |
356 | |
357 | /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the |
358 | /// source register that's contributed by PredBB and update SSA update map. |
359 | void TailDuplicator::processPHI( |
360 | MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, |
361 | DenseMap<Register, RegSubRegPair> &LocalVRMap, |
362 | SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies, |
363 | const DenseSet<Register> &RegsUsedByPhi, bool Remove) { |
364 | Register DefReg = MI->getOperand(i: 0).getReg(); |
365 | unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, SrcBB: PredBB); |
366 | assert(SrcOpIdx && "Unable to find matching PHI source?" ); |
367 | Register SrcReg = MI->getOperand(i: SrcOpIdx).getReg(); |
368 | unsigned SrcSubReg = MI->getOperand(i: SrcOpIdx).getSubReg(); |
369 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: DefReg); |
370 | LocalVRMap.insert(KV: std::make_pair(x&: DefReg, y: RegSubRegPair(SrcReg, SrcSubReg))); |
371 | |
372 | // Insert a copy from source to the end of the block. The def register is the |
373 | // available value liveout of the block. |
374 | Register NewDef = MRI->createVirtualRegister(RegClass: RC); |
375 | Copies.push_back(Elt: std::make_pair(x&: NewDef, y: RegSubRegPair(SrcReg, SrcSubReg))); |
376 | if (isDefLiveOut(Reg: DefReg, BB: TailBB, MRI) || RegsUsedByPhi.count(V: DefReg)) |
377 | addSSAUpdateEntry(OrigReg: DefReg, NewReg: NewDef, BB: PredBB); |
378 | |
379 | if (!Remove) |
380 | return; |
381 | |
382 | // Remove PredBB from the PHI node. |
383 | MI->removeOperand(OpNo: SrcOpIdx + 1); |
384 | MI->removeOperand(OpNo: SrcOpIdx); |
385 | if (MI->getNumOperands() == 1 && !TailBB->hasAddressTaken()) |
386 | MI->eraseFromParent(); |
387 | else if (MI->getNumOperands() == 1) |
388 | MI->setDesc(TII->get(Opcode: TargetOpcode::IMPLICIT_DEF)); |
389 | } |
390 | |
391 | /// Duplicate a TailBB instruction to PredBB and update |
392 | /// the source operands due to earlier PHI translation. |
393 | void TailDuplicator::duplicateInstruction( |
394 | MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, |
395 | DenseMap<Register, RegSubRegPair> &LocalVRMap, |
396 | const DenseSet<Register> &UsedByPhi) { |
397 | // Allow duplication of CFI instructions. |
398 | if (MI->isCFIInstruction()) { |
399 | BuildMI(BB&: *PredBB, I: PredBB->end(), MIMD: PredBB->findDebugLoc(MBBI: PredBB->begin()), |
400 | MCID: TII->get(Opcode: TargetOpcode::CFI_INSTRUCTION)) |
401 | .addCFIIndex(CFIIndex: MI->getOperand(i: 0).getCFIIndex()) |
402 | .setMIFlags(MI->getFlags()); |
403 | return; |
404 | } |
405 | MachineInstr &NewMI = TII->duplicate(MBB&: *PredBB, InsertBefore: PredBB->end(), Orig: *MI); |
406 | if (PreRegAlloc) { |
407 | for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { |
408 | MachineOperand &MO = NewMI.getOperand(i); |
409 | if (!MO.isReg()) |
410 | continue; |
411 | Register Reg = MO.getReg(); |
412 | if (!Reg.isVirtual()) |
413 | continue; |
414 | if (MO.isDef()) { |
415 | const TargetRegisterClass *RC = MRI->getRegClass(Reg); |
416 | Register NewReg = MRI->createVirtualRegister(RegClass: RC); |
417 | MO.setReg(NewReg); |
418 | LocalVRMap.insert(KV: std::make_pair(x&: Reg, y: RegSubRegPair(NewReg, 0))); |
419 | if (isDefLiveOut(Reg, BB: TailBB, MRI) || UsedByPhi.count(V: Reg)) |
420 | addSSAUpdateEntry(OrigReg: Reg, NewReg, BB: PredBB); |
421 | } else { |
422 | auto VI = LocalVRMap.find(Val: Reg); |
423 | if (VI != LocalVRMap.end()) { |
424 | // Need to make sure that the register class of the mapped register |
425 | // will satisfy the constraints of the class of the register being |
426 | // replaced. |
427 | auto *OrigRC = MRI->getRegClass(Reg); |
428 | auto *MappedRC = MRI->getRegClass(Reg: VI->second.Reg); |
429 | const TargetRegisterClass *ConstrRC; |
430 | if (VI->second.SubReg != 0) { |
431 | ConstrRC = TRI->getMatchingSuperRegClass(A: MappedRC, B: OrigRC, |
432 | Idx: VI->second.SubReg); |
433 | if (ConstrRC) { |
434 | // The actual constraining (as in "find appropriate new class") |
435 | // is done by getMatchingSuperRegClass, so now we only need to |
436 | // change the class of the mapped register. |
437 | MRI->setRegClass(Reg: VI->second.Reg, RC: ConstrRC); |
438 | } |
439 | } else { |
440 | // For mapped registers that do not have sub-registers, simply |
441 | // restrict their class to match the original one. |
442 | |
443 | // We don't want debug instructions affecting the resulting code so |
444 | // if we're cloning a debug instruction then just use MappedRC |
445 | // rather than constraining the register class further. |
446 | ConstrRC = NewMI.isDebugInstr() |
447 | ? MappedRC |
448 | : MRI->constrainRegClass(Reg: VI->second.Reg, RC: OrigRC); |
449 | } |
450 | |
451 | if (ConstrRC) { |
452 | // If the class constraining succeeded, we can simply replace |
453 | // the old register with the mapped one. |
454 | MO.setReg(VI->second.Reg); |
455 | // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a |
456 | // sub-register, we need to compose the sub-register indices. |
457 | MO.setSubReg( |
458 | TRI->composeSubRegIndices(a: VI->second.SubReg, b: MO.getSubReg())); |
459 | } else { |
460 | // The direct replacement is not possible, due to failing register |
461 | // class constraints. An explicit COPY is necessary. Create one |
462 | // that can be reused. |
463 | Register NewReg = MRI->createVirtualRegister(RegClass: OrigRC); |
464 | BuildMI(BB&: *PredBB, I&: NewMI, MIMD: NewMI.getDebugLoc(), |
465 | MCID: TII->get(Opcode: TargetOpcode::COPY), DestReg: NewReg) |
466 | .addReg(RegNo: VI->second.Reg, flags: 0, SubReg: VI->second.SubReg); |
467 | LocalVRMap.erase(I: VI); |
468 | LocalVRMap.insert(KV: std::make_pair(x&: Reg, y: RegSubRegPair(NewReg, 0))); |
469 | MO.setReg(NewReg); |
470 | // The composed VI.Reg:VI.SubReg is replaced with NewReg, which |
471 | // is equivalent to the whole register Reg. Hence, Reg:subreg |
472 | // is same as NewReg:subreg, so keep the sub-register index |
473 | // unchanged. |
474 | } |
475 | // Clear any kill flags from this operand. The new register could |
476 | // have uses after this one, so kills are not valid here. |
477 | MO.setIsKill(false); |
478 | } |
479 | } |
480 | } |
481 | } |
482 | } |
483 | |
484 | /// After FromBB is tail duplicated into its predecessor blocks, the successors |
485 | /// have gained new predecessors. Update the PHI instructions in them |
486 | /// accordingly. |
487 | void TailDuplicator::updateSuccessorsPHIs( |
488 | MachineBasicBlock *FromBB, bool isDead, |
489 | SmallVectorImpl<MachineBasicBlock *> &TDBBs, |
490 | SmallSetVector<MachineBasicBlock *, 8> &Succs) { |
491 | for (MachineBasicBlock *SuccBB : Succs) { |
492 | for (MachineInstr &MI : *SuccBB) { |
493 | if (!MI.isPHI()) |
494 | break; |
495 | MachineInstrBuilder MIB(*FromBB->getParent(), MI); |
496 | unsigned Idx = 0; |
497 | for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { |
498 | MachineOperand &MO = MI.getOperand(i: i + 1); |
499 | if (MO.getMBB() == FromBB) { |
500 | Idx = i; |
501 | break; |
502 | } |
503 | } |
504 | |
505 | assert(Idx != 0); |
506 | MachineOperand &MO0 = MI.getOperand(i: Idx); |
507 | Register Reg = MO0.getReg(); |
508 | if (isDead) { |
509 | // Folded into the previous BB. |
510 | // There could be duplicate phi source entries. FIXME: Should sdisel |
511 | // or earlier pass fixed this? |
512 | for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { |
513 | MachineOperand &MO = MI.getOperand(i: i + 1); |
514 | if (MO.getMBB() == FromBB) { |
515 | MI.removeOperand(OpNo: i + 1); |
516 | MI.removeOperand(OpNo: i); |
517 | } |
518 | } |
519 | } else |
520 | Idx = 0; |
521 | |
522 | // If Idx is set, the operands at Idx and Idx+1 must be removed. |
523 | // We reuse the location to avoid expensive removeOperand calls. |
524 | |
525 | DenseMap<Register, AvailableValsTy>::iterator LI = |
526 | SSAUpdateVals.find(Val: Reg); |
527 | if (LI != SSAUpdateVals.end()) { |
528 | // This register is defined in the tail block. |
529 | for (const std::pair<MachineBasicBlock *, Register> &J : LI->second) { |
530 | MachineBasicBlock *SrcBB = J.first; |
531 | // If we didn't duplicate a bb into a particular predecessor, we |
532 | // might still have added an entry to SSAUpdateVals to correcly |
533 | // recompute SSA. If that case, avoid adding a dummy extra argument |
534 | // this PHI. |
535 | if (!SrcBB->isSuccessor(MBB: SuccBB)) |
536 | continue; |
537 | |
538 | Register SrcReg = J.second; |
539 | if (Idx != 0) { |
540 | MI.getOperand(i: Idx).setReg(SrcReg); |
541 | MI.getOperand(i: Idx + 1).setMBB(SrcBB); |
542 | Idx = 0; |
543 | } else { |
544 | MIB.addReg(RegNo: SrcReg).addMBB(MBB: SrcBB); |
545 | } |
546 | } |
547 | } else { |
548 | // Live in tail block, must also be live in predecessors. |
549 | for (MachineBasicBlock *SrcBB : TDBBs) { |
550 | if (Idx != 0) { |
551 | MI.getOperand(i: Idx).setReg(Reg); |
552 | MI.getOperand(i: Idx + 1).setMBB(SrcBB); |
553 | Idx = 0; |
554 | } else { |
555 | MIB.addReg(RegNo: Reg).addMBB(MBB: SrcBB); |
556 | } |
557 | } |
558 | } |
559 | if (Idx != 0) { |
560 | MI.removeOperand(OpNo: Idx + 1); |
561 | MI.removeOperand(OpNo: Idx); |
562 | } |
563 | } |
564 | } |
565 | } |
566 | |
567 | /// Determine if it is profitable to duplicate this block. |
568 | bool TailDuplicator::shouldTailDuplicate(bool IsSimple, |
569 | MachineBasicBlock &TailBB) { |
570 | // When doing tail-duplication during layout, the block ordering is in flux, |
571 | // so canFallThrough returns a result based on incorrect information and |
572 | // should just be ignored. |
573 | if (!LayoutMode && TailBB.canFallThrough()) |
574 | return false; |
575 | |
576 | // Don't try to tail-duplicate single-block loops. |
577 | if (TailBB.isSuccessor(MBB: &TailBB)) |
578 | return false; |
579 | |
580 | // Duplicating a BB which has both multiple predecessors and successors will |
581 | // result in a complex CFG and also may cause huge amount of PHI nodes. If we |
582 | // want to remove this limitation, we have to address |
583 | // https://github.com/llvm/llvm-project/issues/78578. |
584 | if (TailBB.pred_size() > TailDupPredSize && |
585 | TailBB.succ_size() > TailDupSuccSize) |
586 | return false; |
587 | |
588 | // Set the limit on the cost to duplicate. When optimizing for size, |
589 | // duplicate only one, because one branch instruction can be eliminated to |
590 | // compensate for the duplication. |
591 | unsigned MaxDuplicateCount; |
592 | bool OptForSize = MF->getFunction().hasOptSize() || |
593 | llvm::shouldOptimizeForSize(MBB: &TailBB, PSI, MBFIWrapper: MBFI); |
594 | if (TailDupSize == 0) |
595 | MaxDuplicateCount = TailDuplicateSize; |
596 | else |
597 | MaxDuplicateCount = TailDupSize; |
598 | if (OptForSize) |
599 | MaxDuplicateCount = 1; |
600 | |
601 | // If the block to be duplicated ends in an unanalyzable fallthrough, don't |
602 | // duplicate it. |
603 | // A similar check is necessary in MachineBlockPlacement to make sure pairs of |
604 | // blocks with unanalyzable fallthrough get layed out contiguously. |
605 | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
606 | SmallVector<MachineOperand, 4> PredCond; |
607 | if (TII->analyzeBranch(MBB&: TailBB, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond) && |
608 | TailBB.canFallThrough()) |
609 | return false; |
610 | |
611 | // If the target has hardware branch prediction that can handle indirect |
612 | // branches, duplicating them can often make them predictable when there |
613 | // are common paths through the code. The limit needs to be high enough |
614 | // to allow undoing the effects of tail merging and other optimizations |
615 | // that rearrange the predecessors of the indirect branch. |
616 | |
617 | bool HasIndirectbr = false; |
618 | if (!TailBB.empty()) |
619 | HasIndirectbr = TailBB.back().isIndirectBranch(); |
620 | |
621 | if (HasIndirectbr && PreRegAlloc) |
622 | MaxDuplicateCount = TailDupIndirectBranchSize; |
623 | |
624 | // Check the instructions in the block to determine whether tail-duplication |
625 | // is invalid or unlikely to be profitable. |
626 | unsigned InstrCount = 0; |
627 | for (MachineInstr &MI : TailBB) { |
628 | // Non-duplicable things shouldn't be tail-duplicated. |
629 | // CFI instructions are marked as non-duplicable, because Darwin compact |
630 | // unwind info emission can't handle multiple prologue setups. In case of |
631 | // DWARF, allow them be duplicated, so that their existence doesn't prevent |
632 | // tail duplication of some basic blocks, that would be duplicated otherwise. |
633 | if (MI.isNotDuplicable() && |
634 | (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() || |
635 | !MI.isCFIInstruction())) |
636 | return false; |
637 | |
638 | // Convergent instructions can be duplicated only if doing so doesn't add |
639 | // new control dependencies, which is what we're going to do here. |
640 | if (MI.isConvergent()) |
641 | return false; |
642 | |
643 | // Do not duplicate 'return' instructions if this is a pre-regalloc run. |
644 | // A return may expand into a lot more instructions (e.g. reload of callee |
645 | // saved registers) after PEI. |
646 | if (PreRegAlloc && MI.isReturn()) |
647 | return false; |
648 | |
649 | // Avoid duplicating calls before register allocation. Calls presents a |
650 | // barrier to register allocation so duplicating them may end up increasing |
651 | // spills. |
652 | if (PreRegAlloc && MI.isCall()) |
653 | return false; |
654 | |
655 | // TailDuplicator::appendCopies will erroneously place COPYs after |
656 | // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same |
657 | // bug that was fixed in f7a53d82c090. |
658 | // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point |
659 | // for the COPY when replacing PHIs. |
660 | if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) |
661 | return false; |
662 | |
663 | if (MI.isBundle()) |
664 | InstrCount += MI.getBundleSize(); |
665 | else if (!MI.isPHI() && !MI.isMetaInstruction()) |
666 | InstrCount += 1; |
667 | |
668 | if (InstrCount > MaxDuplicateCount) |
669 | return false; |
670 | } |
671 | |
672 | // Check if any of the successors of TailBB has a PHI node in which the |
673 | // value corresponding to TailBB uses a subregister. |
674 | // If a phi node uses a register paired with a subregister, the actual |
675 | // "value type" of the phi may differ from the type of the register without |
676 | // any subregisters. Due to a bug, tail duplication may add a new operand |
677 | // without a necessary subregister, producing an invalid code. This is |
678 | // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. |
679 | // Disable tail duplication for this case for now, until the problem is |
680 | // fixed. |
681 | for (auto *SB : TailBB.successors()) { |
682 | for (auto &I : *SB) { |
683 | if (!I.isPHI()) |
684 | break; |
685 | unsigned Idx = getPHISrcRegOpIdx(MI: &I, SrcBB: &TailBB); |
686 | assert(Idx != 0); |
687 | MachineOperand &PU = I.getOperand(i: Idx); |
688 | if (PU.getSubReg() != 0) |
689 | return false; |
690 | } |
691 | } |
692 | |
693 | if (HasIndirectbr && PreRegAlloc) |
694 | return true; |
695 | |
696 | if (IsSimple) |
697 | return true; |
698 | |
699 | if (!PreRegAlloc) |
700 | return true; |
701 | |
702 | return canCompletelyDuplicateBB(BB&: TailBB); |
703 | } |
704 | |
705 | /// True if this BB has only one unconditional jump. |
706 | bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { |
707 | if (TailBB->succ_size() != 1) |
708 | return false; |
709 | if (TailBB->pred_empty()) |
710 | return false; |
711 | MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(SkipPseudoOp: true); |
712 | if (I == TailBB->end()) |
713 | return true; |
714 | return I->isUnconditionalBranch(); |
715 | } |
716 | |
717 | static bool bothUsedInPHI(const MachineBasicBlock &A, |
718 | const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { |
719 | for (MachineBasicBlock *BB : A.successors()) |
720 | if (SuccsB.count(Ptr: BB) && !BB->empty() && BB->begin()->isPHI()) |
721 | return true; |
722 | |
723 | return false; |
724 | } |
725 | |
726 | bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { |
727 | for (MachineBasicBlock *PredBB : BB.predecessors()) { |
728 | if (PredBB->succ_size() > 1) |
729 | return false; |
730 | |
731 | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
732 | SmallVector<MachineOperand, 4> PredCond; |
733 | if (TII->analyzeBranch(MBB&: *PredBB, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond)) |
734 | return false; |
735 | |
736 | if (!PredCond.empty()) |
737 | return false; |
738 | } |
739 | return true; |
740 | } |
741 | |
742 | bool TailDuplicator::duplicateSimpleBB( |
743 | MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, |
744 | const DenseSet<Register> &UsedByPhi) { |
745 | SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), |
746 | TailBB->succ_end()); |
747 | SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors()); |
748 | bool Changed = false; |
749 | for (MachineBasicBlock *PredBB : Preds) { |
750 | if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr()) |
751 | continue; |
752 | |
753 | if (bothUsedInPHI(A: *PredBB, SuccsB: Succs)) |
754 | continue; |
755 | |
756 | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
757 | SmallVector<MachineOperand, 4> PredCond; |
758 | if (TII->analyzeBranch(MBB&: *PredBB, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond)) |
759 | continue; |
760 | |
761 | Changed = true; |
762 | LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB |
763 | << "From simple Succ: " << *TailBB); |
764 | |
765 | MachineBasicBlock *NewTarget = *TailBB->succ_begin(); |
766 | MachineBasicBlock *NextBB = PredBB->getNextNode(); |
767 | |
768 | // Make PredFBB explicit. |
769 | if (PredCond.empty()) |
770 | PredFBB = PredTBB; |
771 | |
772 | // Make fall through explicit. |
773 | if (!PredTBB) |
774 | PredTBB = NextBB; |
775 | if (!PredFBB) |
776 | PredFBB = NextBB; |
777 | |
778 | // Redirect |
779 | if (PredFBB == TailBB) |
780 | PredFBB = NewTarget; |
781 | if (PredTBB == TailBB) |
782 | PredTBB = NewTarget; |
783 | |
784 | // Make the branch unconditional if possible |
785 | if (PredTBB == PredFBB) { |
786 | PredCond.clear(); |
787 | PredFBB = nullptr; |
788 | } |
789 | |
790 | // Avoid adding fall through branches. |
791 | if (PredFBB == NextBB) |
792 | PredFBB = nullptr; |
793 | if (PredTBB == NextBB && PredFBB == nullptr) |
794 | PredTBB = nullptr; |
795 | |
796 | auto DL = PredBB->findBranchDebugLoc(); |
797 | TII->removeBranch(MBB&: *PredBB); |
798 | |
799 | if (!PredBB->isSuccessor(MBB: NewTarget)) |
800 | PredBB->replaceSuccessor(Old: TailBB, New: NewTarget); |
801 | else { |
802 | PredBB->removeSuccessor(Succ: TailBB, NormalizeSuccProbs: true); |
803 | assert(PredBB->succ_size() <= 1); |
804 | } |
805 | |
806 | if (PredTBB) |
807 | TII->insertBranch(MBB&: *PredBB, TBB: PredTBB, FBB: PredFBB, Cond: PredCond, DL); |
808 | |
809 | TDBBs.push_back(Elt: PredBB); |
810 | } |
811 | return Changed; |
812 | } |
813 | |
814 | bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, |
815 | MachineBasicBlock *PredBB) { |
816 | // EH edges are ignored by analyzeBranch. |
817 | if (PredBB->succ_size() > 1) |
818 | return false; |
819 | |
820 | MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; |
821 | SmallVector<MachineOperand, 4> PredCond; |
822 | if (TII->analyzeBranch(MBB&: *PredBB, TBB&: PredTBB, FBB&: PredFBB, Cond&: PredCond)) |
823 | return false; |
824 | if (!PredCond.empty()) |
825 | return false; |
826 | // FIXME: This is overly conservative; it may be ok to relax this in the |
827 | // future under more specific conditions. If TailBB is an INLINEASM_BR |
828 | // indirect target, we need to see if the edge from PredBB to TailBB is from |
829 | // an INLINEASM_BR in PredBB, and then also if that edge was from the |
830 | // indirect target list, fallthrough/default target, or potentially both. If |
831 | // it's both, TailDuplicator::tailDuplicate will remove the edge, corrupting |
832 | // the successor list in PredBB and predecessor list in TailBB. |
833 | if (TailBB->isInlineAsmBrIndirectTarget()) |
834 | return false; |
835 | return true; |
836 | } |
837 | |
838 | /// If it is profitable, duplicate TailBB's contents in each |
839 | /// of its predecessors. |
840 | /// \p IsSimple result of isSimpleBB |
841 | /// \p TailBB Block to be duplicated. |
842 | /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor |
843 | /// instead of the previous block in MF's order. |
844 | /// \p TDBBs A vector to keep track of all blocks tail-duplicated |
845 | /// into. |
846 | /// \p Copies A vector of copy instructions inserted. Used later to |
847 | /// walk all the inserted copies and remove redundant ones. |
848 | bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, |
849 | MachineBasicBlock *ForcedLayoutPred, |
850 | SmallVectorImpl<MachineBasicBlock *> &TDBBs, |
851 | SmallVectorImpl<MachineInstr *> &Copies, |
852 | SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) { |
853 | LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) |
854 | << '\n'); |
855 | |
856 | bool ShouldUpdateTerminators = TailBB->canFallThrough(); |
857 | |
858 | DenseSet<Register> UsedByPhi; |
859 | getRegsUsedByPHIs(BB: *TailBB, UsedByPhi: &UsedByPhi); |
860 | |
861 | if (IsSimple) |
862 | return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi); |
863 | |
864 | // Iterate through all the unique predecessors and tail-duplicate this |
865 | // block into them, if possible. Copying the list ahead of time also |
866 | // avoids trouble with the predecessor list reallocating. |
867 | bool Changed = false; |
868 | SmallSetVector<MachineBasicBlock *, 8> Preds; |
869 | if (CandidatePtr) |
870 | Preds.insert(Start: CandidatePtr->begin(), End: CandidatePtr->end()); |
871 | else |
872 | Preds.insert(Start: TailBB->pred_begin(), End: TailBB->pred_end()); |
873 | |
874 | for (MachineBasicBlock *PredBB : Preds) { |
875 | assert(TailBB != PredBB && |
876 | "Single-block loop should have been rejected earlier!" ); |
877 | |
878 | if (!canTailDuplicate(TailBB, PredBB)) |
879 | continue; |
880 | |
881 | // Don't duplicate into a fall-through predecessor (at least for now). |
882 | // If profile is available, findDuplicateCandidates can choose better |
883 | // fall-through predecessor. |
884 | if (!(MF->getFunction().hasProfileData() && LayoutMode)) { |
885 | bool IsLayoutSuccessor = false; |
886 | if (ForcedLayoutPred) |
887 | IsLayoutSuccessor = (ForcedLayoutPred == PredBB); |
888 | else if (PredBB->isLayoutSuccessor(MBB: TailBB) && PredBB->canFallThrough()) |
889 | IsLayoutSuccessor = true; |
890 | if (IsLayoutSuccessor) |
891 | continue; |
892 | } |
893 | |
894 | LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB |
895 | << "From Succ: " << *TailBB); |
896 | |
897 | TDBBs.push_back(Elt: PredBB); |
898 | |
899 | // Remove PredBB's unconditional branch. |
900 | TII->removeBranch(MBB&: *PredBB); |
901 | |
902 | // Clone the contents of TailBB into PredBB. |
903 | DenseMap<Register, RegSubRegPair> LocalVRMap; |
904 | SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; |
905 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: *TailBB)) { |
906 | if (MI.isPHI()) { |
907 | // Replace the uses of the def of the PHI with the register coming |
908 | // from PredBB. |
909 | processPHI(MI: &MI, TailBB, PredBB, LocalVRMap, Copies&: CopyInfos, RegsUsedByPhi: UsedByPhi, Remove: true); |
910 | } else { |
911 | // Replace def of virtual registers with new registers, and update |
912 | // uses with PHI source register or the new registers. |
913 | duplicateInstruction(MI: &MI, TailBB, PredBB, LocalVRMap, UsedByPhi); |
914 | } |
915 | } |
916 | appendCopies(MBB: PredBB, CopyInfos, Copies); |
917 | |
918 | NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch |
919 | |
920 | // Update the CFG. |
921 | PredBB->removeSuccessor(I: PredBB->succ_begin()); |
922 | assert(PredBB->succ_empty() && |
923 | "TailDuplicate called on block with multiple successors!" ); |
924 | for (MachineBasicBlock *Succ : TailBB->successors()) |
925 | PredBB->addSuccessor(Succ, Prob: MBPI->getEdgeProbability(Src: TailBB, Dst: Succ)); |
926 | |
927 | // Update branches in pred to jump to tail's layout successor if needed. |
928 | if (ShouldUpdateTerminators) |
929 | PredBB->updateTerminator(PreviousLayoutSuccessor: TailBB->getNextNode()); |
930 | |
931 | Changed = true; |
932 | ++NumTailDups; |
933 | } |
934 | |
935 | // If TailBB was duplicated into all its predecessors except for the prior |
936 | // block, which falls through unconditionally, move the contents of this |
937 | // block into the prior block. |
938 | MachineBasicBlock *PrevBB = ForcedLayoutPred; |
939 | if (!PrevBB) |
940 | PrevBB = &*std::prev(x: TailBB->getIterator()); |
941 | MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; |
942 | SmallVector<MachineOperand, 4> PriorCond; |
943 | // This has to check PrevBB->succ_size() because EH edges are ignored by |
944 | // analyzeBranch. |
945 | if (PrevBB->succ_size() == 1 && |
946 | // Layout preds are not always CFG preds. Check. |
947 | *PrevBB->succ_begin() == TailBB && |
948 | !TII->analyzeBranch(MBB&: *PrevBB, TBB&: PriorTBB, FBB&: PriorFBB, Cond&: PriorCond) && |
949 | PriorCond.empty() && |
950 | (!PriorTBB || PriorTBB == TailBB) && |
951 | TailBB->pred_size() == 1 && |
952 | !TailBB->hasAddressTaken()) { |
953 | LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB |
954 | << "From MBB: " << *TailBB); |
955 | // There may be a branch to the layout successor. This is unlikely but it |
956 | // happens. The correct thing to do is to remove the branch before |
957 | // duplicating the instructions in all cases. |
958 | bool RemovedBranches = TII->removeBranch(MBB&: *PrevBB) != 0; |
959 | |
960 | // If there are still tail instructions, abort the merge |
961 | if (PrevBB->getFirstTerminator() == PrevBB->end()) { |
962 | if (PreRegAlloc) { |
963 | DenseMap<Register, RegSubRegPair> LocalVRMap; |
964 | SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; |
965 | MachineBasicBlock::iterator I = TailBB->begin(); |
966 | // Process PHI instructions first. |
967 | while (I != TailBB->end() && I->isPHI()) { |
968 | // Replace the uses of the def of the PHI with the register coming |
969 | // from PredBB. |
970 | MachineInstr *MI = &*I++; |
971 | processPHI(MI, TailBB, PredBB: PrevBB, LocalVRMap, Copies&: CopyInfos, RegsUsedByPhi: UsedByPhi, |
972 | Remove: true); |
973 | } |
974 | |
975 | // Now copy the non-PHI instructions. |
976 | while (I != TailBB->end()) { |
977 | // Replace def of virtual registers with new registers, and update |
978 | // uses with PHI source register or the new registers. |
979 | MachineInstr *MI = &*I++; |
980 | assert(!MI->isBundle() && "Not expecting bundles before regalloc!" ); |
981 | duplicateInstruction(MI, TailBB, PredBB: PrevBB, LocalVRMap, UsedByPhi); |
982 | MI->eraseFromParent(); |
983 | } |
984 | appendCopies(MBB: PrevBB, CopyInfos, Copies); |
985 | } else { |
986 | TII->removeBranch(MBB&: *PrevBB); |
987 | // No PHIs to worry about, just splice the instructions over. |
988 | PrevBB->splice(Where: PrevBB->end(), Other: TailBB, From: TailBB->begin(), To: TailBB->end()); |
989 | } |
990 | PrevBB->removeSuccessor(I: PrevBB->succ_begin()); |
991 | assert(PrevBB->succ_empty()); |
992 | PrevBB->transferSuccessors(FromMBB: TailBB); |
993 | |
994 | // Update branches in PrevBB based on Tail's layout successor. |
995 | if (ShouldUpdateTerminators) |
996 | PrevBB->updateTerminator(PreviousLayoutSuccessor: TailBB->getNextNode()); |
997 | |
998 | TDBBs.push_back(Elt: PrevBB); |
999 | Changed = true; |
1000 | } else { |
1001 | LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still " |
1002 | "contains terminator instructions" ); |
1003 | // Return early if no changes were made |
1004 | if (!Changed) |
1005 | return RemovedBranches; |
1006 | } |
1007 | Changed |= RemovedBranches; |
1008 | } |
1009 | |
1010 | // If this is after register allocation, there are no phis to fix. |
1011 | if (!PreRegAlloc) |
1012 | return Changed; |
1013 | |
1014 | // If we made no changes so far, we are safe. |
1015 | if (!Changed) |
1016 | return Changed; |
1017 | |
1018 | // Handle the nasty case in that we duplicated a block that is part of a loop |
1019 | // into some but not all of its predecessors. For example: |
1020 | // 1 -> 2 <-> 3 | |
1021 | // \ | |
1022 | // \---> rest | |
1023 | // if we duplicate 2 into 1 but not into 3, we end up with |
1024 | // 12 -> 3 <-> 2 -> rest | |
1025 | // \ / | |
1026 | // \----->-----/ | |
1027 | // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced |
1028 | // with a phi in 3 (which now dominates 2). |
1029 | // What we do here is introduce a copy in 3 of the register defined by the |
1030 | // phi, just like when we are duplicating 2 into 3, but we don't copy any |
1031 | // real instructions or remove the 3 -> 2 edge from the phi in 2. |
1032 | for (MachineBasicBlock *PredBB : Preds) { |
1033 | if (is_contained(Range&: TDBBs, Element: PredBB)) |
1034 | continue; |
1035 | |
1036 | // EH edges |
1037 | if (PredBB->succ_size() != 1) |
1038 | continue; |
1039 | |
1040 | DenseMap<Register, RegSubRegPair> LocalVRMap; |
1041 | SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos; |
1042 | // Process PHI instructions first. |
1043 | for (MachineInstr &MI : make_early_inc_range(Range: TailBB->phis())) { |
1044 | // Replace the uses of the def of the PHI with the register coming |
1045 | // from PredBB. |
1046 | processPHI(MI: &MI, TailBB, PredBB, LocalVRMap, Copies&: CopyInfos, RegsUsedByPhi: UsedByPhi, Remove: false); |
1047 | } |
1048 | appendCopies(MBB: PredBB, CopyInfos, Copies); |
1049 | } |
1050 | |
1051 | return Changed; |
1052 | } |
1053 | |
1054 | /// At the end of the block \p MBB generate COPY instructions between registers |
1055 | /// described by \p CopyInfos. Append resulting instructions to \p Copies. |
1056 | void TailDuplicator::appendCopies(MachineBasicBlock *MBB, |
1057 | SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos, |
1058 | SmallVectorImpl<MachineInstr*> &Copies) { |
1059 | MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); |
1060 | const MCInstrDesc &CopyD = TII->get(Opcode: TargetOpcode::COPY); |
1061 | for (auto &CI : CopyInfos) { |
1062 | auto C = BuildMI(BB&: *MBB, I: Loc, MIMD: DebugLoc(), MCID: CopyD, DestReg: CI.first) |
1063 | .addReg(RegNo: CI.second.Reg, flags: 0, SubReg: CI.second.SubReg); |
1064 | Copies.push_back(Elt: C); |
1065 | } |
1066 | } |
1067 | |
1068 | /// Remove the specified dead machine basic block from the function, updating |
1069 | /// the CFG. |
1070 | void TailDuplicator::removeDeadBlock( |
1071 | MachineBasicBlock *MBB, |
1072 | function_ref<void(MachineBasicBlock *)> *RemovalCallback) { |
1073 | assert(MBB->pred_empty() && "MBB must be dead!" ); |
1074 | LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); |
1075 | |
1076 | MachineFunction *MF = MBB->getParent(); |
1077 | // Update the call site info. |
1078 | for (const MachineInstr &MI : *MBB) |
1079 | if (MI.shouldUpdateCallSiteInfo()) |
1080 | MF->eraseCallSiteInfo(MI: &MI); |
1081 | |
1082 | if (RemovalCallback) |
1083 | (*RemovalCallback)(MBB); |
1084 | |
1085 | // Remove all successors. |
1086 | while (!MBB->succ_empty()) |
1087 | MBB->removeSuccessor(I: MBB->succ_end() - 1); |
1088 | |
1089 | // Remove the block. |
1090 | MBB->eraseFromParent(); |
1091 | } |
1092 | |