1//===- bolt/Passes/TailDuplication.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the TailDuplication class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/TailDuplication.h"
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/MC/MCRegisterInfo.h"
16
17#include <numeric>
18#include <queue>
19
20#define DEBUG_TYPE "taildup"
21
22using namespace llvm;
23
24namespace opts {
25
26extern cl::OptionCategory BoltOptCategory;
27extern cl::opt<bool> NoThreads;
28
29cl::opt<bolt::TailDuplication::DuplicationMode> TailDuplicationMode(
30 "tail-duplication",
31 cl::desc("duplicate unconditional branches that cross a cache line"),
32 cl::init(Val: bolt::TailDuplication::TD_NONE),
33 cl::values(clEnumValN(bolt::TailDuplication::TD_NONE, "none",
34 "do not apply"),
35 clEnumValN(bolt::TailDuplication::TD_AGGRESSIVE, "aggressive",
36 "aggressive strategy"),
37 clEnumValN(bolt::TailDuplication::TD_MODERATE, "moderate",
38 "moderate strategy"),
39 clEnumValN(bolt::TailDuplication::TD_CACHE, "cache",
40 "cache-aware duplication strategy")),
41 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
42
43static cl::opt<unsigned>
44 TailDuplicationMinimumOffset("tail-duplication-minimum-offset",
45 cl::desc("minimum offset needed between block "
46 "and successor to allow duplication"),
47 cl::ReallyHidden, cl::init(Val: 64),
48 cl::cat(BoltOptCategory));
49
50static cl::opt<unsigned> TailDuplicationMaximumDuplication(
51 "tail-duplication-maximum-duplication",
52 cl::desc("tail blocks whose size (in bytes) exceeds the value are never "
53 "duplicated"),
54 cl::ZeroOrMore, cl::ReallyHidden, cl::init(Val: 24), cl::cat(BoltOptCategory));
55
56static cl::opt<unsigned> TailDuplicationMinimumDuplication(
57 "tail-duplication-minimum-duplication",
58 cl::desc("tail blocks with size (in bytes) not exceeding the value are "
59 "always duplicated"),
60 cl::ReallyHidden, cl::init(Val: 2), cl::cat(BoltOptCategory));
61
62static cl::opt<bool> TailDuplicationConstCopyPropagation(
63 "tail-duplication-const-copy-propagation",
64 cl::desc("enable const and copy propagation after tail duplication"),
65 cl::ReallyHidden, cl::init(Val: false), cl::cat(BoltOptCategory));
66
67static cl::opt<unsigned> TailDuplicationMaxCacheDistance(
68 "tail-duplication-max-cache-distance",
69 cl::desc("The weight of backward jumps for ExtTSP value"), cl::init(Val: 256),
70 cl::ReallyHidden, cl::cat(BoltOptCategory));
71
72static cl::opt<double> TailDuplicationCacheBackwardWeight(
73 "tail-duplication-cache-backward-weight",
74 cl::desc(
75 "The maximum distance (in bytes) of backward jumps for ExtTSP value"),
76 cl::init(Val: 0.5), cl::ReallyHidden, cl::cat(BoltOptCategory));
77
78} // namespace opts
79
80namespace llvm {
81namespace bolt {
82
83void TailDuplication::getCallerSavedRegs(const MCInst &Inst, BitVector &Regs,
84 BinaryContext &BC) const {
85 if (!BC.MIB->isCall(Inst))
86 return;
87 BitVector CallRegs = BitVector(BC.MRI->getNumRegs(), false);
88 BC.MIB->getCalleeSavedRegs(Regs&: CallRegs);
89 CallRegs.flip();
90 Regs |= CallRegs;
91}
92
93bool TailDuplication::regIsPossiblyOverwritten(const MCInst &Inst, unsigned Reg,
94 BinaryContext &BC) const {
95 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
96 BC.MIB->getWrittenRegs(Inst, Regs&: WrittenRegs);
97 getCallerSavedRegs(Inst, Regs&: WrittenRegs, BC);
98 if (BC.MIB->isRep(Inst))
99 BC.MIB->getRepRegs(Regs&: WrittenRegs);
100 WrittenRegs &= BC.MIB->getAliases(Reg, OnlySmaller: false);
101 return WrittenRegs.any();
102}
103
104bool TailDuplication::regIsDefinitelyOverwritten(const MCInst &Inst,
105 unsigned Reg,
106 BinaryContext &BC) const {
107 BitVector WrittenRegs = BitVector(BC.MRI->getNumRegs(), false);
108 BC.MIB->getWrittenRegs(Inst, Regs&: WrittenRegs);
109 getCallerSavedRegs(Inst, Regs&: WrittenRegs, BC);
110 if (BC.MIB->isRep(Inst))
111 BC.MIB->getRepRegs(Regs&: WrittenRegs);
112 return (!regIsUsed(Inst, Reg, BC) && WrittenRegs.test(Idx: Reg) &&
113 !BC.MIB->isConditionalMove(Inst));
114}
115
116bool TailDuplication::regIsUsed(const MCInst &Inst, unsigned Reg,
117 BinaryContext &BC) const {
118 BitVector SrcRegs = BitVector(BC.MRI->getNumRegs(), false);
119 BC.MIB->getSrcRegs(Inst, Regs&: SrcRegs);
120 SrcRegs &= BC.MIB->getAliases(Reg, OnlySmaller: true);
121 return SrcRegs.any();
122}
123
124bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB,
125 unsigned Reg) const {
126 BinaryFunction *BF = StartBB.getFunction();
127 BinaryContext &BC = BF->getBinaryContext();
128 std::queue<BinaryBasicBlock *> Q;
129 for (auto Itr = StartBB.succ_begin(); Itr != StartBB.succ_end(); ++Itr) {
130 BinaryBasicBlock *NextBB = *Itr;
131 Q.push(x: NextBB);
132 }
133 std::set<BinaryBasicBlock *> Visited;
134 // Breadth first search through successive blocks and see if Reg is ever used
135 // before its overwritten
136 while (Q.size() > 0) {
137 BinaryBasicBlock *CurrBB = Q.front();
138 Q.pop();
139 if (Visited.count(x: CurrBB))
140 continue;
141 Visited.insert(x: CurrBB);
142 bool Overwritten = false;
143 for (auto Itr = CurrBB->begin(); Itr != CurrBB->end(); ++Itr) {
144 MCInst &Inst = *Itr;
145 if (regIsUsed(Inst, Reg, BC))
146 return false;
147 if (regIsDefinitelyOverwritten(Inst, Reg, BC)) {
148 Overwritten = true;
149 break;
150 }
151 }
152 if (Overwritten)
153 continue;
154 for (auto Itr = CurrBB->succ_begin(); Itr != CurrBB->succ_end(); ++Itr) {
155 BinaryBasicBlock *NextBB = *Itr;
156 Q.push(x: NextBB);
157 }
158 }
159 return true;
160}
161
162void TailDuplication::constantAndCopyPropagate(
163 BinaryBasicBlock &OriginalBB,
164 std::vector<BinaryBasicBlock *> &BlocksToPropagate) {
165 BinaryFunction *BF = OriginalBB.getFunction();
166 BinaryContext &BC = BF->getBinaryContext();
167
168 BlocksToPropagate.insert(position: BlocksToPropagate.begin(), x: &OriginalBB);
169 // Iterate through the original instructions to find one to propagate
170 for (auto Itr = OriginalBB.begin(); Itr != OriginalBB.end(); ++Itr) {
171 MCInst &OriginalInst = *Itr;
172 // It must be a non conditional
173 if (BC.MIB->isConditionalMove(Inst: OriginalInst))
174 continue;
175
176 // Move immediate or move register
177 if ((!BC.MII->get(Opcode: OriginalInst.getOpcode()).isMoveImmediate() ||
178 !OriginalInst.getOperand(i: 1).isImm()) &&
179 (!BC.MII->get(Opcode: OriginalInst.getOpcode()).isMoveReg() ||
180 !OriginalInst.getOperand(i: 1).isReg()))
181 continue;
182
183 // True if this is constant propagation and not copy propagation
184 bool ConstantProp = BC.MII->get(Opcode: OriginalInst.getOpcode()).isMoveImmediate();
185 // The Register to replaced
186 unsigned Reg = OriginalInst.getOperand(i: 0).getReg();
187 // True if the register to replace was replaced everywhere it was used
188 bool ReplacedEverywhere = true;
189 // True if the register was definitely overwritten
190 bool Overwritten = false;
191 // True if the register to replace and the register to replace with (for
192 // copy propagation) has not been overwritten and is still usable
193 bool RegsActive = true;
194
195 // Iterate through successor blocks and through their instructions
196 for (BinaryBasicBlock *NextBB : BlocksToPropagate) {
197 for (auto PropagateItr =
198 ((NextBB == &OriginalBB) ? Itr + 1 : NextBB->begin());
199 PropagateItr < NextBB->end(); ++PropagateItr) {
200 MCInst &PropagateInst = *PropagateItr;
201 if (regIsUsed(Inst: PropagateInst, Reg, BC)) {
202 bool Replaced = false;
203 // If both registers are active for copy propagation or the register
204 // to replace is active for constant propagation
205 if (RegsActive) {
206 // Set Replaced and so ReplacedEverwhere to false if it cannot be
207 // replaced (no replacing that opcode, Register is src and dest)
208 if (ConstantProp)
209 Replaced = BC.MIB->replaceRegWithImm(
210 Inst&: PropagateInst, Register: Reg, Imm: OriginalInst.getOperand(i: 1).getImm());
211 else
212 Replaced = BC.MIB->replaceRegWithReg(
213 Inst&: PropagateInst, ToReplace: Reg, ReplaceWith: OriginalInst.getOperand(i: 1).getReg());
214 }
215 ReplacedEverywhere = ReplacedEverywhere && Replaced;
216 }
217 // For copy propagation, make sure no propagation happens after the
218 // register to replace with is overwritten
219 if (!ConstantProp &&
220 regIsPossiblyOverwritten(Inst: PropagateInst,
221 Reg: OriginalInst.getOperand(i: 1).getReg(), BC))
222 RegsActive = false;
223
224 // Make sure no propagation happens after the register to replace is
225 // overwritten
226 if (regIsPossiblyOverwritten(Inst: PropagateInst, Reg, BC))
227 RegsActive = false;
228
229 // Record if the register to replace is overwritten
230 if (regIsDefinitelyOverwritten(Inst: PropagateInst, Reg, BC)) {
231 Overwritten = true;
232 break;
233 }
234 }
235 if (Overwritten)
236 break;
237 }
238
239 // If the register was replaced everywhere and it was overwritten in either
240 // one of the iterated through blocks or one of the successor blocks, delete
241 // the original move instruction
242 if (ReplacedEverywhere &&
243 (Overwritten ||
244 isOverwrittenBeforeUsed(
245 StartBB&: *BlocksToPropagate[BlocksToPropagate.size() - 1], Reg))) {
246 // If both registers are active for copy propagation or the register
247 // to replace is active for constant propagation
248 StaticInstructionDeletionCount++;
249 DynamicInstructionDeletionCount += OriginalBB.getExecutionCount();
250 Itr = std::prev(x: OriginalBB.eraseInstruction(II: Itr));
251 }
252 }
253}
254
255bool TailDuplication::isInCacheLine(const BinaryBasicBlock &BB,
256 const BinaryBasicBlock &Succ) const {
257 if (&BB == &Succ)
258 return true;
259
260 uint64_t Distance = 0;
261 int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1;
262
263 for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex();
264 I += Direction) {
265 Distance += BB.getFunction()->getLayout().getBlock(Index: I)->getOriginalSize();
266 if (Distance > opts::TailDuplicationMinimumOffset)
267 return false;
268 }
269 return true;
270}
271
272std::vector<BinaryBasicBlock *>
273TailDuplication::moderateDuplicate(BinaryBasicBlock &BB,
274 BinaryBasicBlock &Tail) const {
275 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
276 // The block must be hot
277 if (BB.getKnownExecutionCount() == 0)
278 return BlocksToDuplicate;
279 // and its successor is not already in the same cache line
280 if (isInCacheLine(BB, Succ: Tail))
281 return BlocksToDuplicate;
282 // and its size do not exceed the maximum allowed size
283 if (Tail.getOriginalSize() > opts::TailDuplicationMaximumDuplication)
284 return BlocksToDuplicate;
285 // If duplicating would introduce a new branch, don't duplicate
286 for (auto Itr = Tail.succ_begin(); Itr != Tail.succ_end(); ++Itr) {
287 if ((*Itr)->getLayoutIndex() == Tail.getLayoutIndex() + 1)
288 return BlocksToDuplicate;
289 }
290
291 BlocksToDuplicate.push_back(x: &Tail);
292 return BlocksToDuplicate;
293}
294
295std::vector<BinaryBasicBlock *>
296TailDuplication::aggressiveDuplicate(BinaryBasicBlock &BB,
297 BinaryBasicBlock &Tail) const {
298 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
299 // The block must be hot
300 if (BB.getKnownExecutionCount() == 0)
301 return BlocksToDuplicate;
302 // and its successor is not already in the same cache line
303 if (isInCacheLine(BB, Succ: Tail))
304 return BlocksToDuplicate;
305
306 BinaryBasicBlock *CurrBB = &Tail;
307 while (CurrBB) {
308 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
309 << CurrBB->getName() << " to duplication list\n";);
310 BlocksToDuplicate.push_back(x: CurrBB);
311
312 if (CurrBB->hasJumpTable()) {
313 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing duplication "
314 "list due to a JT in "
315 << CurrBB->getName() << '\n';);
316 BlocksToDuplicate.clear();
317 break;
318 }
319
320 // With no successors, we've reached the end and should duplicate all of
321 // BlocksToDuplicate
322 if (CurrBB->succ_size() == 0)
323 break;
324
325 // With two successors, if they're both a jump, we should duplicate all
326 // blocks in BlocksToDuplicate. Otherwise, we cannot find a simple stream of
327 // blocks to copy
328 if (CurrBB->succ_size() >= 2) {
329 if (CurrBB->getConditionalSuccessor(Condition: false)->getLayoutIndex() ==
330 CurrBB->getLayoutIndex() + 1 ||
331 CurrBB->getConditionalSuccessor(Condition: true)->getLayoutIndex() ==
332 CurrBB->getLayoutIndex() + 1) {
333 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: clearing "
334 "duplication list, can't find a simple stream at "
335 << CurrBB->getName() << '\n';);
336 BlocksToDuplicate.clear();
337 }
338 break;
339 }
340
341 // With one successor, if its a jump, we should duplicate all blocks in
342 // BlocksToDuplicate. Otherwise, we should keep going
343 BinaryBasicBlock *SuccBB = CurrBB->getSuccessor();
344 if (SuccBB->getLayoutIndex() != CurrBB->getLayoutIndex() + 1)
345 break;
346 CurrBB = SuccBB;
347 }
348 // Don't duplicate if its too much code
349 unsigned DuplicationByteCount = std::accumulate(
350 first: std::begin(cont&: BlocksToDuplicate), last: std::end(cont&: BlocksToDuplicate), init: 0,
351 binary_op: [](int value, BinaryBasicBlock *p) {
352 return value + p->getOriginalSize();
353 });
354 if (DuplicationByteCount > opts::TailDuplicationMaximumDuplication) {
355 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: duplication byte count ("
356 << DuplicationByteCount << ") exceeds maximum "
357 << opts::TailDuplicationMaximumDuplication << '\n';);
358 BlocksToDuplicate.clear();
359 }
360 LLVM_DEBUG(dbgs() << "Aggressive tail duplication: found "
361 << BlocksToDuplicate.size() << " blocks to duplicate\n";);
362 return BlocksToDuplicate;
363}
364
365bool TailDuplication::shouldDuplicate(BinaryBasicBlock *Pred,
366 BinaryBasicBlock *Tail) const {
367 if (Pred == Tail)
368 return false;
369 // Cannot duplicate non-tail blocks
370 if (Tail->succ_size() != 0)
371 return false;
372 // The blocks are already in the order
373 if (Pred->getLayoutIndex() + 1 == Tail->getLayoutIndex())
374 return false;
375 // No tail duplication for blocks with jump tables
376 if (Pred->hasJumpTable())
377 return false;
378 if (Tail->hasJumpTable())
379 return false;
380
381 return true;
382}
383
384double TailDuplication::cacheScore(uint64_t SrcAddr, uint64_t SrcSize,
385 uint64_t DstAddr, uint64_t DstSize,
386 uint64_t Count) const {
387 assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE);
388
389 bool IsForwardJump = SrcAddr <= DstAddr;
390 uint64_t JumpDistance = 0;
391 // Computing the length of the jump so that it takes the sizes of the two
392 // blocks into consideration
393 if (IsForwardJump) {
394 JumpDistance = (DstAddr + DstSize) - (SrcAddr);
395 } else {
396 JumpDistance = (SrcAddr + SrcSize) - (DstAddr);
397 }
398
399 if (JumpDistance >= opts::TailDuplicationMaxCacheDistance)
400 return 0;
401 double Prob = 1.0 - static_cast<double>(JumpDistance) /
402 opts::TailDuplicationMaxCacheDistance;
403 return (IsForwardJump ? 1.0 : opts::TailDuplicationCacheBackwardWeight) *
404 Prob * Count;
405}
406
407bool TailDuplication::cacheScoreImproved(const MCCodeEmitter *Emitter,
408 BinaryFunction &BF,
409 BinaryBasicBlock *Pred,
410 BinaryBasicBlock *Tail) const {
411 // Collect (estimated) basic block sizes
412 DenseMap<const BinaryBasicBlock *, uint64_t> BBSize;
413 for (const BinaryBasicBlock &BB : BF) {
414 BBSize[&BB] = std::max<uint64_t>(a: BB.estimateSize(Emitter), b: 1);
415 }
416
417 // Build current addresses of basic blocks starting at the entry block
418 DenseMap<BinaryBasicBlock *, uint64_t> CurAddr;
419 uint64_t Addr = 0;
420 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
421 CurAddr[SrcBB] = Addr;
422 Addr += BBSize[SrcBB];
423 }
424
425 // Build new addresses (after duplication) starting at the entry block
426 DenseMap<BinaryBasicBlock *, uint64_t> NewAddr;
427 Addr = 0;
428 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
429 NewAddr[SrcBB] = Addr;
430 Addr += BBSize[SrcBB];
431 if (SrcBB == Pred)
432 Addr += BBSize[Tail];
433 }
434
435 // Compute the cache score for the existing layout of basic blocks
436 double CurScore = 0;
437 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
438 auto BI = SrcBB->branch_info_begin();
439 for (BinaryBasicBlock *DstBB : SrcBB->successors()) {
440 if (SrcBB != DstBB) {
441 CurScore += cacheScore(SrcAddr: CurAddr[SrcBB], SrcSize: BBSize[SrcBB], DstAddr: CurAddr[DstBB],
442 DstSize: BBSize[DstBB], Count: BI->Count);
443 }
444 ++BI;
445 }
446 }
447
448 // Compute the cache score for the layout of blocks after tail duplication
449 double NewScore = 0;
450 for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) {
451 auto BI = SrcBB->branch_info_begin();
452 for (BinaryBasicBlock *DstBB : SrcBB->successors()) {
453 if (SrcBB != DstBB) {
454 if (SrcBB == Pred && DstBB == Tail) {
455 NewScore += cacheScore(SrcAddr: NewAddr[SrcBB], SrcSize: BBSize[SrcBB],
456 DstAddr: NewAddr[SrcBB] + BBSize[SrcBB], DstSize: BBSize[DstBB],
457 Count: BI->Count);
458 } else {
459 NewScore += cacheScore(SrcAddr: NewAddr[SrcBB], SrcSize: BBSize[SrcBB], DstAddr: NewAddr[DstBB],
460 DstSize: BBSize[DstBB], Count: BI->Count);
461 }
462 }
463 ++BI;
464 }
465 }
466
467 return NewScore > CurScore;
468}
469
470std::vector<BinaryBasicBlock *>
471TailDuplication::cacheDuplicate(const MCCodeEmitter *Emitter,
472 BinaryFunction &BF, BinaryBasicBlock *Pred,
473 BinaryBasicBlock *Tail) const {
474 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
475
476 // No need to duplicate cold basic blocks
477 if (Pred->isCold() || Tail->isCold()) {
478 return BlocksToDuplicate;
479 }
480 // Always duplicate "small" tail basic blocks, which might be beneficial for
481 // code size, since a jump instruction is eliminated
482 if (Tail->estimateSize(Emitter) <= opts::TailDuplicationMinimumDuplication) {
483 BlocksToDuplicate.push_back(x: Tail);
484 return BlocksToDuplicate;
485 }
486 // Never duplicate "large" tail basic blocks
487 if (Tail->estimateSize(Emitter) > opts::TailDuplicationMaximumDuplication) {
488 return BlocksToDuplicate;
489 }
490 // Do not append basic blocks after the last hot block in the current layout
491 auto NextBlock = BF.getLayout().getBasicBlockAfter(BB: Pred);
492 if (NextBlock == nullptr || (!Pred->isCold() && NextBlock->isCold())) {
493 return BlocksToDuplicate;
494 }
495
496 // Duplicate the tail only if it improves the cache score
497 if (cacheScoreImproved(Emitter, BF, Pred, Tail)) {
498 BlocksToDuplicate.push_back(x: Tail);
499 }
500
501 return BlocksToDuplicate;
502}
503
504std::vector<BinaryBasicBlock *> TailDuplication::duplicateBlocks(
505 BinaryBasicBlock &BB,
506 const std::vector<BinaryBasicBlock *> &BlocksToDuplicate) const {
507 BinaryFunction *BF = BB.getFunction();
508 BinaryContext &BC = BF->getBinaryContext();
509
510 // Ratio of this new branches execution count to the total size of the
511 // successor's execution count. Used to set this new branches execution count
512 // and lower the old successor's execution count
513 double ExecutionCountRatio =
514 BB.getExecutionCount() >= BB.getSuccessor()->getExecutionCount()
515 ? 1.0
516 : (double)BB.getExecutionCount() /
517 BB.getSuccessor()->getExecutionCount();
518
519 // Use the last branch info when adding a successor to LastBB
520 BinaryBasicBlock::BinaryBranchInfo &LastBI =
521 BB.getBranchInfo(Succ: *(BB.getSuccessor()));
522
523 BinaryBasicBlock *LastOriginalBB = &BB;
524 BinaryBasicBlock *LastDuplicatedBB = &BB;
525 assert(LastDuplicatedBB->succ_size() == 1 &&
526 "tail duplication cannot act on a block with more than 1 successor");
527 LastDuplicatedBB->removeSuccessor(Succ: LastDuplicatedBB->getSuccessor());
528
529 std::vector<std::unique_ptr<BinaryBasicBlock>> DuplicatedBlocks;
530 std::vector<BinaryBasicBlock *> DuplicatedBlocksToReturn;
531
532 for (BinaryBasicBlock *CurBB : BlocksToDuplicate) {
533 DuplicatedBlocks.emplace_back(
534 args: BF->createBasicBlock(Label: (BC.Ctx)->createNamedTempSymbol(Name: "tail-dup")));
535 BinaryBasicBlock *NewBB = DuplicatedBlocks.back().get();
536
537 NewBB->addInstructions(Begin: CurBB->begin(), End: CurBB->end());
538 // Set execution count as if it was just a copy of the original
539 NewBB->setExecutionCount(CurBB->getExecutionCount());
540 NewBB->setIsCold(CurBB->isCold());
541 LastDuplicatedBB->addSuccessor(Succ: NewBB, BI: LastBI);
542
543 DuplicatedBlocksToReturn.push_back(x: NewBB);
544
545 // As long as its not the first block, adjust both original and duplicated
546 // to what they should be
547 if (LastDuplicatedBB != &BB) {
548 LastOriginalBB->adjustExecutionCount(Ratio: 1.0 - ExecutionCountRatio);
549 LastDuplicatedBB->adjustExecutionCount(Ratio: ExecutionCountRatio);
550 }
551
552 if (CurBB->succ_size() == 1)
553 LastBI = CurBB->getBranchInfo(Succ: *(CurBB->getSuccessor()));
554
555 LastOriginalBB = CurBB;
556 LastDuplicatedBB = NewBB;
557 }
558
559 LastDuplicatedBB->addSuccessors(
560 Begin: LastOriginalBB->succ_begin(), End: LastOriginalBB->succ_end(),
561 BrBegin: LastOriginalBB->branch_info_begin(), BrEnd: LastOriginalBB->branch_info_end());
562
563 LastOriginalBB->adjustExecutionCount(Ratio: 1.0 - ExecutionCountRatio);
564 LastDuplicatedBB->adjustExecutionCount(Ratio: ExecutionCountRatio);
565
566 BF->insertBasicBlocks(Start: &BB, NewBBs: std::move(DuplicatedBlocks));
567
568 return DuplicatedBlocksToReturn;
569}
570
571void TailDuplication::runOnFunction(BinaryFunction &Function) {
572 // Create a separate MCCodeEmitter to allow lock-free execution
573 BinaryContext::IndependentCodeEmitter Emitter;
574 if (!opts::NoThreads) {
575 Emitter = Function.getBinaryContext().createIndependentMCCodeEmitter();
576 }
577
578 Function.getLayout().updateLayoutIndices();
579
580 // New blocks will be added and layout will change,
581 // so make a copy here to iterate over the original layout
582 BinaryFunction::BasicBlockOrderType BlockLayout(
583 Function.getLayout().block_begin(), Function.getLayout().block_end());
584 bool ModifiedFunction = false;
585 for (BinaryBasicBlock *BB : BlockLayout) {
586 AllDynamicCount += BB->getKnownExecutionCount();
587
588 // The block must be with one successor
589 if (BB->succ_size() != 1)
590 continue;
591 BinaryBasicBlock *Tail = BB->getSuccessor();
592 // Verify that the tail should be duplicated
593 if (!shouldDuplicate(Pred: BB, Tail))
594 continue;
595
596 std::vector<BinaryBasicBlock *> BlocksToDuplicate;
597 if (opts::TailDuplicationMode == TailDuplication::TD_AGGRESSIVE) {
598 BlocksToDuplicate = aggressiveDuplicate(BB&: *BB, Tail&: *Tail);
599 } else if (opts::TailDuplicationMode == TailDuplication::TD_MODERATE) {
600 BlocksToDuplicate = moderateDuplicate(BB&: *BB, Tail&: *Tail);
601 } else if (opts::TailDuplicationMode == TailDuplication::TD_CACHE) {
602 BlocksToDuplicate = cacheDuplicate(Emitter: Emitter.MCE.get(), BF&: Function, Pred: BB, Tail);
603 } else {
604 llvm_unreachable("unknown tail duplication mode");
605 }
606
607 if (BlocksToDuplicate.empty())
608 continue;
609
610 // Apply the duplication
611 ModifiedFunction = true;
612 DuplicationsDynamicCount += BB->getExecutionCount();
613 auto DuplicatedBlocks = duplicateBlocks(BB&: *BB, BlocksToDuplicate);
614 for (BinaryBasicBlock *BB : DuplicatedBlocks) {
615 DuplicatedBlockCount++;
616 DuplicatedByteCount += BB->estimateSize(Emitter: Emitter.MCE.get());
617 }
618
619 if (opts::TailDuplicationConstCopyPropagation) {
620 constantAndCopyPropagate(OriginalBB&: *BB, BlocksToPropagate&: DuplicatedBlocks);
621 BinaryBasicBlock *FirstBB = BlocksToDuplicate[0];
622 if (FirstBB->pred_size() == 1) {
623 BinaryBasicBlock *PredBB = *FirstBB->pred_begin();
624 if (PredBB->succ_size() == 1)
625 constantAndCopyPropagate(OriginalBB&: *PredBB, BlocksToPropagate&: BlocksToDuplicate);
626 }
627 }
628
629 // Layout indices might be stale after duplication
630 Function.getLayout().updateLayoutIndices();
631 }
632 if (ModifiedFunction)
633 ModifiedFunctions++;
634}
635
636Error TailDuplication::runOnFunctions(BinaryContext &BC) {
637 if (opts::TailDuplicationMode == TailDuplication::TD_NONE)
638 return Error::success();
639
640 for (auto &It : BC.getBinaryFunctions()) {
641 BinaryFunction &Function = It.second;
642 if (!shouldOptimize(BF: Function))
643 continue;
644 runOnFunction(Function);
645 }
646
647 BC.outs()
648 << "BOLT-INFO: tail duplication"
649 << format(Fmt: " modified %zu (%.2f%%) functions;", Vals: ModifiedFunctions,
650 Vals: 100.0 * ModifiedFunctions / BC.getBinaryFunctions().size())
651 << format(Fmt: " duplicated %zu blocks (%zu bytes) responsible for",
652 Vals: DuplicatedBlockCount, Vals: DuplicatedByteCount)
653 << format(Fmt: " %zu dynamic executions (%.2f%% of all block executions)",
654 Vals: DuplicationsDynamicCount,
655 Vals: 100.0 * DuplicationsDynamicCount / AllDynamicCount)
656 << "\n";
657
658 if (opts::TailDuplicationConstCopyPropagation) {
659 BC.outs() << "BOLT-INFO: tail duplication "
660 << format(
661 Fmt: "applied %zu static and %zu dynamic propagation deletions",
662 Vals: StaticInstructionDeletionCount,
663 Vals: DynamicInstructionDeletionCount)
664 << "\n";
665 }
666 return Error::success();
667}
668
669} // end namespace bolt
670} // end namespace llvm
671

source code of bolt/lib/Passes/TailDuplication.cpp