1//===- bolt/Passes/LongJmp.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the LongJmpPass class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/LongJmp.h"
14#include "bolt/Core/ParallelUtilities.h"
15#include "bolt/Utils/CommandLineOpts.h"
16#include "llvm/Support/MathExtras.h"
17
18#define DEBUG_TYPE "longjmp"
19
20using namespace llvm;
21
22namespace opts {
23extern cl::OptionCategory BoltCategory;
24extern cl::OptionCategory BoltOptCategory;
25extern llvm::cl::opt<unsigned> AlignText;
26extern cl::opt<unsigned> AlignFunctions;
27extern cl::opt<bool> UseOldText;
28extern cl::opt<bool> HotFunctionsAtEnd;
29
30static cl::opt<bool> GroupStubs("group-stubs",
31 cl::desc("share stubs across functions"),
32 cl::init(Val: true), cl::cat(BoltOptCategory));
33}
34
35namespace llvm {
36namespace bolt {
37
38constexpr unsigned ColdFragAlign = 16;
39
40static void relaxStubToShortJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) {
41 const BinaryContext &BC = StubBB.getFunction()->getBinaryContext();
42 InstructionListType Seq;
43 BC.MIB->createShortJmp(Seq, Target: Tgt, Ctx: BC.Ctx.get());
44 StubBB.clear();
45 StubBB.addInstructions(Begin: Seq.begin(), End: Seq.end());
46}
47
48static void relaxStubToLongJmp(BinaryBasicBlock &StubBB, const MCSymbol *Tgt) {
49 const BinaryContext &BC = StubBB.getFunction()->getBinaryContext();
50 InstructionListType Seq;
51 BC.MIB->createLongJmp(Seq, Target: Tgt, Ctx: BC.Ctx.get());
52 StubBB.clear();
53 StubBB.addInstructions(Begin: Seq.begin(), End: Seq.end());
54}
55
56static BinaryBasicBlock *getBBAtHotColdSplitPoint(BinaryFunction &Func) {
57 if (!Func.isSplit() || Func.empty())
58 return nullptr;
59
60 assert(!(*Func.begin()).isCold() && "Entry cannot be cold");
61 for (auto I = Func.getLayout().block_begin(),
62 E = Func.getLayout().block_end();
63 I != E; ++I) {
64 auto Next = std::next(x: I);
65 if (Next != E && (*Next)->isCold())
66 return *I;
67 }
68 llvm_unreachable("No hot-cold split point found");
69}
70
71static bool mayNeedStub(const BinaryContext &BC, const MCInst &Inst) {
72 return (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst)) &&
73 !BC.MIB->isIndirectBranch(Inst) && !BC.MIB->isIndirectCall(Inst);
74}
75
76std::pair<std::unique_ptr<BinaryBasicBlock>, MCSymbol *>
77LongJmpPass::createNewStub(BinaryBasicBlock &SourceBB, const MCSymbol *TgtSym,
78 bool TgtIsFunc, uint64_t AtAddress) {
79 BinaryFunction &Func = *SourceBB.getFunction();
80 const BinaryContext &BC = Func.getBinaryContext();
81 const bool IsCold = SourceBB.isCold();
82 MCSymbol *StubSym = BC.Ctx->createNamedTempSymbol(Name: "Stub");
83 std::unique_ptr<BinaryBasicBlock> StubBB = Func.createBasicBlock(Label: StubSym);
84 MCInst Inst;
85 BC.MIB->createUncondBranch(Inst, TBB: TgtSym, Ctx: BC.Ctx.get());
86 if (TgtIsFunc)
87 BC.MIB->convertJmpToTailCall(Inst);
88 StubBB->addInstruction(Inst);
89 StubBB->setExecutionCount(0);
90
91 // Register this in stubs maps
92 auto registerInMap = [&](StubGroupsTy &Map) {
93 StubGroupTy &StubGroup = Map[TgtSym];
94 StubGroup.insert(
95 I: llvm::lower_bound(
96 Range&: StubGroup, Value: std::make_pair(x&: AtAddress, y: nullptr),
97 C: [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS,
98 const std::pair<uint64_t, BinaryBasicBlock *> &RHS) {
99 return LHS.first < RHS.first;
100 }),
101 Elt: std::make_pair(x&: AtAddress, y: StubBB.get()));
102 };
103
104 Stubs[&Func].insert(x: StubBB.get());
105 StubBits[StubBB.get()] = BC.MIB->getUncondBranchEncodingSize();
106 if (IsCold) {
107 registerInMap(ColdLocalStubs[&Func]);
108 if (opts::GroupStubs && TgtIsFunc)
109 registerInMap(ColdStubGroups);
110 ++NumColdStubs;
111 } else {
112 registerInMap(HotLocalStubs[&Func]);
113 if (opts::GroupStubs && TgtIsFunc)
114 registerInMap(HotStubGroups);
115 ++NumHotStubs;
116 }
117
118 return std::make_pair(x: std::move(StubBB), y&: StubSym);
119}
120
121BinaryBasicBlock *LongJmpPass::lookupStubFromGroup(
122 const StubGroupsTy &StubGroups, const BinaryFunction &Func,
123 const MCInst &Inst, const MCSymbol *TgtSym, uint64_t DotAddress) const {
124 const BinaryContext &BC = Func.getBinaryContext();
125 auto CandidatesIter = StubGroups.find(Val: TgtSym);
126 if (CandidatesIter == StubGroups.end())
127 return nullptr;
128 const StubGroupTy &Candidates = CandidatesIter->second;
129 if (Candidates.empty())
130 return nullptr;
131 auto Cand = llvm::lower_bound(
132 Range: Candidates, Value: std::make_pair(x&: DotAddress, y: nullptr),
133 C: [&](const std::pair<uint64_t, BinaryBasicBlock *> &LHS,
134 const std::pair<uint64_t, BinaryBasicBlock *> &RHS) {
135 return LHS.first < RHS.first;
136 });
137 if (Cand == Candidates.end()) {
138 Cand = std::prev(x: Cand);
139 } else if (Cand != Candidates.begin()) {
140 const StubTy *LeftCand = std::prev(x: Cand);
141 if (Cand->first - DotAddress > DotAddress - LeftCand->first)
142 Cand = LeftCand;
143 }
144 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
145 assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to"
146 "check for out-of-bounds.");
147 int64_t MaxVal = (1ULL << BitsAvail) - 1;
148 int64_t MinVal = -(1ULL << BitsAvail);
149 uint64_t PCRelTgtAddress = Cand->first;
150 int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress);
151
152 LLVM_DEBUG({
153 if (Candidates.size() > 1)
154 dbgs() << "Considering stub group with " << Candidates.size()
155 << " candidates. DotAddress is " << Twine::utohexstr(DotAddress)
156 << ", chosen candidate address is "
157 << Twine::utohexstr(Cand->first) << "\n";
158 });
159 return (PCOffset < MinVal || PCOffset > MaxVal) ? nullptr : Cand->second;
160}
161
162BinaryBasicBlock *
163LongJmpPass::lookupGlobalStub(const BinaryBasicBlock &SourceBB,
164 const MCInst &Inst, const MCSymbol *TgtSym,
165 uint64_t DotAddress) const {
166 const BinaryFunction &Func = *SourceBB.getFunction();
167 const StubGroupsTy &StubGroups =
168 SourceBB.isCold() ? ColdStubGroups : HotStubGroups;
169 return lookupStubFromGroup(StubGroups, Func, Inst, TgtSym, DotAddress);
170}
171
172BinaryBasicBlock *LongJmpPass::lookupLocalStub(const BinaryBasicBlock &SourceBB,
173 const MCInst &Inst,
174 const MCSymbol *TgtSym,
175 uint64_t DotAddress) const {
176 const BinaryFunction &Func = *SourceBB.getFunction();
177 const DenseMap<const BinaryFunction *, StubGroupsTy> &StubGroups =
178 SourceBB.isCold() ? ColdLocalStubs : HotLocalStubs;
179 const auto Iter = StubGroups.find(Val: &Func);
180 if (Iter == StubGroups.end())
181 return nullptr;
182 return lookupStubFromGroup(StubGroups: Iter->second, Func, Inst, TgtSym, DotAddress);
183}
184
185std::unique_ptr<BinaryBasicBlock>
186LongJmpPass::replaceTargetWithStub(BinaryBasicBlock &BB, MCInst &Inst,
187 uint64_t DotAddress,
188 uint64_t StubCreationAddress) {
189 const BinaryFunction &Func = *BB.getFunction();
190 const BinaryContext &BC = Func.getBinaryContext();
191 std::unique_ptr<BinaryBasicBlock> NewBB;
192 const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst);
193 assert(TgtSym && "getTargetSymbol failed");
194
195 BinaryBasicBlock::BinaryBranchInfo BI{.Count: 0, .MispredictedCount: 0};
196 BinaryBasicBlock *TgtBB = BB.getSuccessor(Label: TgtSym, BI);
197 auto LocalStubsIter = Stubs.find(Val: &Func);
198
199 // If already using stub and the stub is from another function, create a local
200 // stub, since the foreign stub is now out of range
201 if (!TgtBB) {
202 auto SSIter = SharedStubs.find(Val: TgtSym);
203 if (SSIter != SharedStubs.end()) {
204 TgtSym = BC.MIB->getTargetSymbol(Inst: *SSIter->second->begin());
205 --NumSharedStubs;
206 }
207 } else if (LocalStubsIter != Stubs.end() &&
208 LocalStubsIter->second.count(x: TgtBB)) {
209 // The TgtBB and TgtSym now are the local out-of-range stub and its label.
210 // So, we are attempting to restore BB to its previous state without using
211 // this stub.
212 TgtSym = BC.MIB->getTargetSymbol(Inst: *TgtBB->begin());
213 assert(TgtSym &&
214 "First instruction is expected to contain a target symbol.");
215 BinaryBasicBlock *TgtBBSucc = TgtBB->getSuccessor(Label: TgtSym, BI);
216
217 // TgtBB might have no successor. e.g. a stub for a function call.
218 if (TgtBBSucc) {
219 BB.replaceSuccessor(Succ: TgtBB, NewSucc: TgtBBSucc, Count: BI.Count, MispredictedCount: BI.MispredictedCount);
220 assert(TgtBB->getExecutionCount() >= BI.Count &&
221 "At least equal or greater than the branch count.");
222 TgtBB->setExecutionCount(TgtBB->getExecutionCount() - BI.Count);
223 }
224
225 TgtBB = TgtBBSucc;
226 }
227
228 BinaryBasicBlock *StubBB = lookupLocalStub(SourceBB: BB, Inst, TgtSym, DotAddress);
229 // If not found, look it up in globally shared stub maps if it is a function
230 // call (TgtBB is not set)
231 if (!StubBB && !TgtBB) {
232 StubBB = lookupGlobalStub(SourceBB: BB, Inst, TgtSym, DotAddress);
233 if (StubBB) {
234 SharedStubs[StubBB->getLabel()] = StubBB;
235 ++NumSharedStubs;
236 }
237 }
238 MCSymbol *StubSymbol = StubBB ? StubBB->getLabel() : nullptr;
239
240 if (!StubBB) {
241 std::tie(args&: NewBB, args&: StubSymbol) =
242 createNewStub(SourceBB&: BB, TgtSym, /*is func?*/ TgtIsFunc: !TgtBB, AtAddress: StubCreationAddress);
243 StubBB = NewBB.get();
244 }
245
246 // Local branch
247 if (TgtBB) {
248 uint64_t OrigCount = BI.Count;
249 uint64_t OrigMispreds = BI.MispredictedCount;
250 BB.replaceSuccessor(Succ: TgtBB, NewSucc: StubBB, Count: OrigCount, MispredictedCount: OrigMispreds);
251 StubBB->setExecutionCount(StubBB->getExecutionCount() + OrigCount);
252 if (NewBB) {
253 StubBB->addSuccessor(Succ: TgtBB, Count: OrigCount, MispredictedCount: OrigMispreds);
254 StubBB->setIsCold(BB.isCold());
255 }
256 // Call / tail call
257 } else {
258 StubBB->setExecutionCount(StubBB->getExecutionCount() +
259 BB.getExecutionCount());
260 if (NewBB) {
261 assert(TgtBB == nullptr);
262 StubBB->setIsCold(BB.isCold());
263 // Set as entry point because this block is valid but we have no preds
264 StubBB->getFunction()->addEntryPoint(BB: *StubBB);
265 }
266 }
267 BC.MIB->replaceBranchTarget(Inst, TBB: StubSymbol, Ctx: BC.Ctx.get());
268
269 return NewBB;
270}
271
272void LongJmpPass::updateStubGroups() {
273 auto update = [&](StubGroupsTy &StubGroups) {
274 for (auto &KeyVal : StubGroups) {
275 for (StubTy &Elem : KeyVal.second)
276 Elem.first = BBAddresses[Elem.second];
277 llvm::sort(C&: KeyVal.second, Comp: llvm::less_first());
278 }
279 };
280
281 for (auto &KeyVal : HotLocalStubs)
282 update(KeyVal.second);
283 for (auto &KeyVal : ColdLocalStubs)
284 update(KeyVal.second);
285 update(HotStubGroups);
286 update(ColdStubGroups);
287}
288
289void LongJmpPass::tentativeBBLayout(const BinaryFunction &Func) {
290 const BinaryContext &BC = Func.getBinaryContext();
291 uint64_t HotDot = HotAddresses[&Func];
292 uint64_t ColdDot = ColdAddresses[&Func];
293 bool Cold = false;
294 for (const BinaryBasicBlock *BB : Func.getLayout().blocks()) {
295 if (Cold || BB->isCold()) {
296 Cold = true;
297 BBAddresses[BB] = ColdDot;
298 ColdDot += BC.computeCodeSize(Beg: BB->begin(), End: BB->end());
299 } else {
300 BBAddresses[BB] = HotDot;
301 HotDot += BC.computeCodeSize(Beg: BB->begin(), End: BB->end());
302 }
303 }
304}
305
306uint64_t LongJmpPass::tentativeLayoutRelocColdPart(
307 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions,
308 uint64_t DotAddress) {
309 DotAddress = alignTo(Size: DotAddress, A: llvm::Align(opts::AlignFunctions));
310 for (BinaryFunction *Func : SortedFunctions) {
311 if (!Func->isSplit())
312 continue;
313 DotAddress = alignTo(Value: DotAddress, Align: Func->getMinAlignment());
314 uint64_t Pad =
315 offsetToAlignment(Value: DotAddress, Alignment: llvm::Align(Func->getAlignment()));
316 if (Pad <= Func->getMaxColdAlignmentBytes())
317 DotAddress += Pad;
318 ColdAddresses[Func] = DotAddress;
319 LLVM_DEBUG(dbgs() << Func->getPrintName() << " cold tentative: "
320 << Twine::utohexstr(DotAddress) << "\n");
321 DotAddress += Func->estimateColdSize();
322 DotAddress = alignTo(Value: DotAddress, Align: Func->getConstantIslandAlignment());
323 DotAddress += Func->estimateConstantIslandSize();
324 }
325 return DotAddress;
326}
327
328uint64_t LongJmpPass::tentativeLayoutRelocMode(
329 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions,
330 uint64_t DotAddress) {
331 // Compute hot cold frontier
332 int64_t LastHotIndex = -1u;
333 uint32_t CurrentIndex = 0;
334 if (opts::HotFunctionsAtEnd) {
335 for (BinaryFunction *BF : SortedFunctions) {
336 if (BF->hasValidIndex()) {
337 LastHotIndex = CurrentIndex;
338 break;
339 }
340
341 ++CurrentIndex;
342 }
343 } else {
344 for (BinaryFunction *BF : SortedFunctions) {
345 if (!BF->hasValidIndex()) {
346 LastHotIndex = CurrentIndex;
347 break;
348 }
349
350 ++CurrentIndex;
351 }
352 }
353
354 // Hot
355 CurrentIndex = 0;
356 bool ColdLayoutDone = false;
357 auto runColdLayout = [&]() {
358 DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress);
359 ColdLayoutDone = true;
360 if (opts::HotFunctionsAtEnd)
361 DotAddress = alignTo(Value: DotAddress, Align: opts::AlignText);
362 };
363 for (BinaryFunction *Func : SortedFunctions) {
364 if (!BC.shouldEmit(Function: *Func)) {
365 HotAddresses[Func] = Func->getAddress();
366 continue;
367 }
368
369 if (!ColdLayoutDone && CurrentIndex >= LastHotIndex)
370 runColdLayout();
371
372 DotAddress = alignTo(Value: DotAddress, Align: Func->getMinAlignment());
373 uint64_t Pad =
374 offsetToAlignment(Value: DotAddress, Alignment: llvm::Align(Func->getAlignment()));
375 if (Pad <= Func->getMaxAlignmentBytes())
376 DotAddress += Pad;
377 HotAddresses[Func] = DotAddress;
378 LLVM_DEBUG(dbgs() << Func->getPrintName() << " tentative: "
379 << Twine::utohexstr(DotAddress) << "\n");
380 if (!Func->isSplit())
381 DotAddress += Func->estimateSize();
382 else
383 DotAddress += Func->estimateHotSize();
384
385 DotAddress = alignTo(Value: DotAddress, Align: Func->getConstantIslandAlignment());
386 DotAddress += Func->estimateConstantIslandSize();
387 ++CurrentIndex;
388 }
389
390 // Ensure that tentative code layout always runs for cold blocks.
391 if (!ColdLayoutDone)
392 runColdLayout();
393
394 // BBs
395 for (BinaryFunction *Func : SortedFunctions)
396 tentativeBBLayout(Func: *Func);
397
398 return DotAddress;
399}
400
401void LongJmpPass::tentativeLayout(
402 const BinaryContext &BC, std::vector<BinaryFunction *> &SortedFunctions) {
403 uint64_t DotAddress = BC.LayoutStartAddress;
404
405 if (!BC.HasRelocations) {
406 for (BinaryFunction *Func : SortedFunctions) {
407 HotAddresses[Func] = Func->getAddress();
408 DotAddress = alignTo(Value: DotAddress, Align: ColdFragAlign);
409 ColdAddresses[Func] = DotAddress;
410 if (Func->isSplit())
411 DotAddress += Func->estimateColdSize();
412 tentativeBBLayout(Func: *Func);
413 }
414
415 return;
416 }
417
418 // Relocation mode
419 uint64_t EstimatedTextSize = 0;
420 if (opts::UseOldText) {
421 EstimatedTextSize = tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress: 0);
422
423 // Initial padding
424 if (EstimatedTextSize <= BC.OldTextSectionSize) {
425 DotAddress = BC.OldTextSectionAddress;
426 uint64_t Pad =
427 offsetToAlignment(Value: DotAddress, Alignment: llvm::Align(opts::AlignText));
428 if (Pad + EstimatedTextSize <= BC.OldTextSectionSize) {
429 DotAddress += Pad;
430 }
431 }
432 }
433
434 if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize)
435 DotAddress = alignTo(Value: BC.LayoutStartAddress, Align: opts::AlignText);
436
437 tentativeLayoutRelocMode(BC, SortedFunctions, DotAddress);
438}
439
440bool LongJmpPass::usesStub(const BinaryFunction &Func,
441 const MCInst &Inst) const {
442 const MCSymbol *TgtSym = Func.getBinaryContext().MIB->getTargetSymbol(Inst);
443 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(Label: TgtSym);
444 auto Iter = Stubs.find(Val: &Func);
445 if (Iter != Stubs.end())
446 return Iter->second.count(x: TgtBB);
447 return false;
448}
449
450uint64_t LongJmpPass::getSymbolAddress(const BinaryContext &BC,
451 const MCSymbol *Target,
452 const BinaryBasicBlock *TgtBB) const {
453 if (TgtBB) {
454 auto Iter = BBAddresses.find(Val: TgtBB);
455 assert(Iter != BBAddresses.end() && "Unrecognized BB");
456 return Iter->second;
457 }
458 uint64_t EntryID = 0;
459 const BinaryFunction *TargetFunc = BC.getFunctionForSymbol(Symbol: Target, EntryDesc: &EntryID);
460 auto Iter = HotAddresses.find(Val: TargetFunc);
461 if (Iter == HotAddresses.end() || (TargetFunc && EntryID)) {
462 // Look at BinaryContext's resolution for this symbol - this is a symbol not
463 // mapped to a BinaryFunction
464 ErrorOr<uint64_t> ValueOrError = BC.getSymbolValue(Symbol: *Target);
465 assert(ValueOrError && "Unrecognized symbol");
466 return *ValueOrError;
467 }
468 return Iter->second;
469}
470
471Error LongJmpPass::relaxStub(BinaryBasicBlock &StubBB, bool &Modified) {
472 const BinaryFunction &Func = *StubBB.getFunction();
473 const BinaryContext &BC = Func.getBinaryContext();
474 const int Bits = StubBits[&StubBB];
475 // Already working with the largest range?
476 if (Bits == static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8))
477 return Error::success();
478
479 const static int RangeShortJmp = BC.MIB->getShortJmpEncodingSize();
480 const static int RangeSingleInstr = BC.MIB->getUncondBranchEncodingSize();
481 const static uint64_t ShortJmpMask = ~((1ULL << RangeShortJmp) - 1);
482 const static uint64_t SingleInstrMask =
483 ~((1ULL << (RangeSingleInstr - 1)) - 1);
484
485 const MCSymbol *RealTargetSym = BC.MIB->getTargetSymbol(Inst: *StubBB.begin());
486 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(Label: RealTargetSym);
487 uint64_t TgtAddress = getSymbolAddress(BC, Target: RealTargetSym, TgtBB);
488 uint64_t DotAddress = BBAddresses[&StubBB];
489 uint64_t PCRelTgtAddress = DotAddress > TgtAddress ? DotAddress - TgtAddress
490 : TgtAddress - DotAddress;
491 // If it fits in one instruction, do not relax
492 if (!(PCRelTgtAddress & SingleInstrMask))
493 return Error::success();
494
495 // Fits short jmp
496 if (!(PCRelTgtAddress & ShortJmpMask)) {
497 if (Bits >= RangeShortJmp)
498 return Error::success();
499
500 LLVM_DEBUG(dbgs() << "Relaxing stub to short jump. PCRelTgtAddress = "
501 << Twine::utohexstr(PCRelTgtAddress)
502 << " RealTargetSym = " << RealTargetSym->getName()
503 << "\n");
504 relaxStubToShortJmp(StubBB, Tgt: RealTargetSym);
505 StubBits[&StubBB] = RangeShortJmp;
506 Modified = true;
507 return Error::success();
508 }
509
510 // The long jmp uses absolute address on AArch64
511 // So we could not use it for PIC binaries
512 if (BC.isAArch64() && !BC.HasFixedLoadAddress)
513 return createFatalBOLTError(
514 S: "BOLT-ERROR: Unable to relax stub for PIC binary\n");
515
516 LLVM_DEBUG(dbgs() << "Relaxing stub to long jump. PCRelTgtAddress = "
517 << Twine::utohexstr(PCRelTgtAddress)
518 << " RealTargetSym = " << RealTargetSym->getName() << "\n");
519 relaxStubToLongJmp(StubBB, Tgt: RealTargetSym);
520 StubBits[&StubBB] = static_cast<int>(BC.AsmInfo->getCodePointerSize() * 8);
521 Modified = true;
522 return Error::success();
523}
524
525bool LongJmpPass::needsStub(const BinaryBasicBlock &BB, const MCInst &Inst,
526 uint64_t DotAddress) const {
527 const BinaryFunction &Func = *BB.getFunction();
528 const BinaryContext &BC = Func.getBinaryContext();
529 const MCSymbol *TgtSym = BC.MIB->getTargetSymbol(Inst);
530 assert(TgtSym && "getTargetSymbol failed");
531
532 const BinaryBasicBlock *TgtBB = Func.getBasicBlockForLabel(Label: TgtSym);
533 // Check for shared stubs from foreign functions
534 if (!TgtBB) {
535 auto SSIter = SharedStubs.find(Val: TgtSym);
536 if (SSIter != SharedStubs.end())
537 TgtBB = SSIter->second;
538 }
539
540 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
541 assert(BitsAvail < 63 && "PCRelEncodingSize is too large to use int64_t to"
542 "check for out-of-bounds.");
543 int64_t MaxVal = (1ULL << BitsAvail) - 1;
544 int64_t MinVal = -(1ULL << BitsAvail);
545
546 uint64_t PCRelTgtAddress = getSymbolAddress(BC, Target: TgtSym, TgtBB);
547 int64_t PCOffset = (int64_t)(PCRelTgtAddress - DotAddress);
548
549 return PCOffset < MinVal || PCOffset > MaxVal;
550}
551
552Error LongJmpPass::relax(BinaryFunction &Func, bool &Modified) {
553 const BinaryContext &BC = Func.getBinaryContext();
554
555 assert(BC.isAArch64() && "Unsupported arch");
556 constexpr int InsnSize = 4; // AArch64
557 std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
558 Insertions;
559
560 BinaryBasicBlock *Frontier = getBBAtHotColdSplitPoint(Func);
561 uint64_t FrontierAddress = Frontier ? BBAddresses[Frontier] : 0;
562 if (FrontierAddress)
563 FrontierAddress += Frontier->getNumNonPseudos() * InsnSize;
564
565 // Add necessary stubs for branch targets we know we can't fit in the
566 // instruction
567 for (BinaryBasicBlock &BB : Func) {
568 uint64_t DotAddress = BBAddresses[&BB];
569 // Stubs themselves are relaxed on the next loop
570 if (Stubs[&Func].count(x: &BB))
571 continue;
572
573 for (MCInst &Inst : BB) {
574 if (BC.MIB->isPseudo(Inst))
575 continue;
576
577 if (!mayNeedStub(BC, Inst)) {
578 DotAddress += InsnSize;
579 continue;
580 }
581
582 // Check and relax direct branch or call
583 if (!needsStub(BB, Inst, DotAddress)) {
584 DotAddress += InsnSize;
585 continue;
586 }
587 Modified = true;
588
589 // Insert stubs close to the patched BB if call, but far away from the
590 // hot path if a branch, since this branch target is the cold region
591 // (but first check that the far away stub will be in range).
592 BinaryBasicBlock *InsertionPoint = &BB;
593 if (Func.isSimple() && !BC.MIB->isCall(Inst) && FrontierAddress &&
594 !BB.isCold()) {
595 int BitsAvail = BC.MIB->getPCRelEncodingSize(Inst) - 1;
596 uint64_t Mask = ~((1ULL << BitsAvail) - 1);
597 assert(FrontierAddress > DotAddress &&
598 "Hot code should be before the frontier");
599 uint64_t PCRelTgt = FrontierAddress - DotAddress;
600 if (!(PCRelTgt & Mask))
601 InsertionPoint = Frontier;
602 }
603 // Always put stubs at the end of the function if non-simple. We can't
604 // change the layout of non-simple functions because it has jump tables
605 // that we do not control.
606 if (!Func.isSimple())
607 InsertionPoint = &*std::prev(x: Func.end());
608
609 // Create a stub to handle a far-away target
610 Insertions.emplace_back(args&: InsertionPoint,
611 args: replaceTargetWithStub(BB, Inst, DotAddress,
612 StubCreationAddress: InsertionPoint == Frontier
613 ? FrontierAddress
614 : DotAddress));
615
616 DotAddress += InsnSize;
617 }
618 }
619
620 // Relax stubs if necessary
621 for (BinaryBasicBlock &BB : Func) {
622 if (!Stubs[&Func].count(x: &BB) || !BB.isValid())
623 continue;
624
625 if (auto E = relaxStub(StubBB&: BB, Modified))
626 return Error(std::move(E));
627 }
628
629 for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Elmt :
630 Insertions) {
631 if (!Elmt.second)
632 continue;
633 std::vector<std::unique_ptr<BinaryBasicBlock>> NewBBs;
634 NewBBs.emplace_back(args: std::move(Elmt.second));
635 Func.insertBasicBlocks(Start: Elmt.first, NewBBs: std::move(NewBBs), UpdateLayout: true);
636 }
637
638 return Error::success();
639}
640
641void LongJmpPass::relaxLocalBranches(BinaryFunction &BF) {
642 BinaryContext &BC = BF.getBinaryContext();
643 auto &MIB = BC.MIB;
644
645 // Quick path.
646 if (!BF.isSplit() && BF.estimateSize() < ShortestJumpSpan)
647 return;
648
649 auto isBranchOffsetInRange = [&](const MCInst &Inst, int64_t Offset) {
650 const unsigned Bits = MIB->getPCRelEncodingSize(Inst);
651 return isIntN(N: Bits, x: Offset);
652 };
653
654 auto isBlockInRange = [&](const MCInst &Inst, uint64_t InstAddress,
655 const BinaryBasicBlock &BB) {
656 const int64_t Offset = BB.getOutputStartAddress() - InstAddress;
657 return isBranchOffsetInRange(Inst, Offset);
658 };
659
660 // Keep track of *all* function trampolines that are going to be added to the
661 // function layout at the end of relaxation.
662 std::vector<std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>>>
663 FunctionTrampolines;
664
665 // Function fragments are relaxed independently.
666 for (FunctionFragment &FF : BF.getLayout().fragments()) {
667 // Fill out code size estimation for the fragment. Use output BB address
668 // ranges to store offsets from the start of the function fragment.
669 uint64_t CodeSize = 0;
670 for (BinaryBasicBlock *BB : FF) {
671 BB->setOutputStartAddress(CodeSize);
672 CodeSize += BB->estimateSize();
673 BB->setOutputEndAddress(CodeSize);
674 }
675
676 // Dynamically-updated size of the fragment.
677 uint64_t FragmentSize = CodeSize;
678
679 // Size of the trampoline in bytes.
680 constexpr uint64_t TrampolineSize = 4;
681
682 // Trampolines created for the fragment. DestinationBB -> TrampolineBB.
683 // NB: here we store only the first trampoline created for DestinationBB.
684 DenseMap<const BinaryBasicBlock *, BinaryBasicBlock *> FragmentTrampolines;
685
686 // Create a trampoline code after \p BB or at the end of the fragment if BB
687 // is nullptr. If \p UpdateOffsets is true, update FragmentSize and offsets
688 // for basic blocks affected by the insertion of the trampoline.
689 auto addTrampolineAfter = [&](BinaryBasicBlock *BB,
690 BinaryBasicBlock *TargetBB, uint64_t Count,
691 bool UpdateOffsets = true) {
692 FunctionTrampolines.emplace_back(args: BB ? BB : FF.back(),
693 args: BF.createBasicBlock());
694 BinaryBasicBlock *TrampolineBB = FunctionTrampolines.back().second.get();
695
696 MCInst Inst;
697 {
698 auto L = BC.scopeLock();
699 MIB->createUncondBranch(Inst, TBB: TargetBB->getLabel(), Ctx: BC.Ctx.get());
700 }
701 TrampolineBB->addInstruction(Inst);
702 TrampolineBB->addSuccessor(Succ: TargetBB, Count);
703 TrampolineBB->setExecutionCount(Count);
704 const uint64_t TrampolineAddress =
705 BB ? BB->getOutputEndAddress() : FragmentSize;
706 TrampolineBB->setOutputStartAddress(TrampolineAddress);
707 TrampolineBB->setOutputEndAddress(TrampolineAddress + TrampolineSize);
708 TrampolineBB->setFragmentNum(FF.getFragmentNum());
709
710 if (!FragmentTrampolines.lookup(Val: TargetBB))
711 FragmentTrampolines[TargetBB] = TrampolineBB;
712
713 if (!UpdateOffsets)
714 return TrampolineBB;
715
716 FragmentSize += TrampolineSize;
717
718 // If the trampoline was added at the end of the fragment, offsets of
719 // other fragments should stay intact.
720 if (!BB)
721 return TrampolineBB;
722
723 // Update offsets for blocks after BB.
724 for (BinaryBasicBlock *IBB : FF) {
725 if (IBB->getOutputStartAddress() >= TrampolineAddress) {
726 IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
727 TrampolineSize);
728 IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize);
729 }
730 }
731
732 // Update offsets for trampolines in this fragment that are placed after
733 // the new trampoline. Note that trampoline blocks are not part of the
734 // function/fragment layout until we add them right before the return
735 // from relaxLocalBranches().
736 for (auto &Pair : FunctionTrampolines) {
737 BinaryBasicBlock *IBB = Pair.second.get();
738 if (IBB->getFragmentNum() != TrampolineBB->getFragmentNum())
739 continue;
740 if (IBB == TrampolineBB)
741 continue;
742 if (IBB->getOutputStartAddress() >= TrampolineAddress) {
743 IBB->setOutputStartAddress(IBB->getOutputStartAddress() +
744 TrampolineSize);
745 IBB->setOutputEndAddress(IBB->getOutputEndAddress() + TrampolineSize);
746 }
747 }
748
749 return TrampolineBB;
750 };
751
752 // Pre-populate trampolines by splitting unconditional branches from the
753 // containing basic block.
754 for (BinaryBasicBlock *BB : FF) {
755 MCInst *Inst = BB->getLastNonPseudoInstr();
756 if (!Inst || !MIB->isUnconditionalBranch(Inst: *Inst))
757 continue;
758
759 const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst: *Inst);
760 BB->eraseInstruction(II: BB->findInstruction(Inst));
761 BB->setOutputEndAddress(BB->getOutputEndAddress() - TrampolineSize);
762
763 BinaryBasicBlock::BinaryBranchInfo BI;
764 BinaryBasicBlock *TargetBB = BB->getSuccessor(Label: TargetSymbol, BI);
765
766 BinaryBasicBlock *TrampolineBB =
767 addTrampolineAfter(BB, TargetBB, BI.Count, /*UpdateOffsets*/ false);
768 BB->replaceSuccessor(Succ: TargetBB, NewSucc: TrampolineBB, Count: BI.Count);
769 }
770
771 /// Relax the branch \p Inst in basic block \p BB that targets \p TargetBB.
772 /// \p InstAddress contains offset of the branch from the start of the
773 /// containing function fragment.
774 auto relaxBranch = [&](BinaryBasicBlock *BB, MCInst &Inst,
775 uint64_t InstAddress, BinaryBasicBlock *TargetBB) {
776 BinaryFunction *BF = BB->getParent();
777
778 // Use branch taken count for optimal relaxation.
779 const uint64_t Count = BB->getBranchInfo(Succ: *TargetBB).Count;
780 assert(Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
781 "Expected valid branch execution count");
782
783 // Try to reuse an existing trampoline without introducing any new code.
784 BinaryBasicBlock *TrampolineBB = FragmentTrampolines.lookup(Val: TargetBB);
785 if (TrampolineBB && isBlockInRange(Inst, InstAddress, *TrampolineBB)) {
786 BB->replaceSuccessor(Succ: TargetBB, NewSucc: TrampolineBB, Count);
787 TrampolineBB->setExecutionCount(TrampolineBB->getExecutionCount() +
788 Count);
789 auto L = BC.scopeLock();
790 MIB->replaceBranchTarget(Inst, TBB: TrampolineBB->getLabel(), Ctx: BC.Ctx.get());
791 return;
792 }
793
794 // For cold branches, check if we can introduce a trampoline at the end
795 // of the fragment that is within the branch reach. Note that such
796 // trampoline may change address later and become unreachable in which
797 // case we will need further relaxation.
798 const int64_t OffsetToEnd = FragmentSize - InstAddress;
799 if (Count == 0 && isBranchOffsetInRange(Inst, OffsetToEnd)) {
800 TrampolineBB = addTrampolineAfter(nullptr, TargetBB, Count);
801 BB->replaceSuccessor(Succ: TargetBB, NewSucc: TrampolineBB, Count);
802 auto L = BC.scopeLock();
803 MIB->replaceBranchTarget(Inst, TBB: TrampolineBB->getLabel(), Ctx: BC.Ctx.get());
804
805 return;
806 }
807
808 // Insert a new block after the current one and use it as a trampoline.
809 TrampolineBB = addTrampolineAfter(BB, TargetBB, Count);
810
811 // If the other successor is a fall-through, invert the condition code.
812 const BinaryBasicBlock *const NextBB =
813 BF->getLayout().getBasicBlockAfter(BB, /*IgnoreSplits*/ false);
814 if (BB->getConditionalSuccessor(Condition: false) == NextBB) {
815 BB->swapConditionalSuccessors();
816 auto L = BC.scopeLock();
817 MIB->reverseBranchCondition(Inst, TBB: NextBB->getLabel(), Ctx: BC.Ctx.get());
818 } else {
819 auto L = BC.scopeLock();
820 MIB->replaceBranchTarget(Inst, TBB: TrampolineBB->getLabel(), Ctx: BC.Ctx.get());
821 }
822 BB->replaceSuccessor(Succ: TargetBB, NewSucc: TrampolineBB, Count);
823 };
824
825 bool MayNeedRelaxation;
826 uint64_t NumIterations = 0;
827 do {
828 MayNeedRelaxation = false;
829 ++NumIterations;
830 for (auto BBI = FF.begin(); BBI != FF.end(); ++BBI) {
831 BinaryBasicBlock *BB = *BBI;
832 uint64_t NextInstOffset = BB->getOutputStartAddress();
833 for (MCInst &Inst : *BB) {
834 const size_t InstAddress = NextInstOffset;
835 if (!MIB->isPseudo(Inst))
836 NextInstOffset += 4;
837
838 if (!mayNeedStub(BC: BF.getBinaryContext(), Inst))
839 continue;
840
841 const size_t BitsAvailable = MIB->getPCRelEncodingSize(Inst);
842
843 // Span of +/-128MB.
844 if (BitsAvailable == LongestJumpBits)
845 continue;
846
847 const MCSymbol *TargetSymbol = MIB->getTargetSymbol(Inst);
848 BinaryBasicBlock *TargetBB = BB->getSuccessor(Label: TargetSymbol);
849 assert(TargetBB &&
850 "Basic block target expected for conditional branch.");
851
852 // Check if the relaxation is needed.
853 if (TargetBB->getFragmentNum() == FF.getFragmentNum() &&
854 isBlockInRange(Inst, InstAddress, *TargetBB))
855 continue;
856
857 relaxBranch(BB, Inst, InstAddress, TargetBB);
858
859 MayNeedRelaxation = true;
860 }
861 }
862
863 // We may have added new instructions, but the whole fragment is less than
864 // the minimum branch span.
865 if (FragmentSize < ShortestJumpSpan)
866 MayNeedRelaxation = false;
867
868 } while (MayNeedRelaxation);
869
870 LLVM_DEBUG({
871 if (NumIterations > 2) {
872 dbgs() << "BOLT-DEBUG: relaxed fragment " << FF.getFragmentNum().get()
873 << " of " << BF << " in " << NumIterations << " iterations\n";
874 }
875 });
876 (void)NumIterations;
877 }
878
879 // Add trampoline blocks from all fragments to the layout.
880 DenseMap<BinaryBasicBlock *, std::vector<std::unique_ptr<BinaryBasicBlock>>>
881 Insertions;
882 for (std::pair<BinaryBasicBlock *, std::unique_ptr<BinaryBasicBlock>> &Pair :
883 FunctionTrampolines) {
884 if (!Pair.second)
885 continue;
886 Insertions[Pair.first].emplace_back(args: std::move(Pair.second));
887 }
888
889 for (auto &Pair : Insertions) {
890 BF.insertBasicBlocks(Start: Pair.first, NewBBs: std::move(Pair.second),
891 /*UpdateLayout*/ true, /*UpdateCFI*/ UpdateCFIState: true,
892 /*RecomputeLPs*/ RecomputeLandingPads: false);
893 }
894}
895
896Error LongJmpPass::runOnFunctions(BinaryContext &BC) {
897
898 if (opts::CompactCodeModel) {
899 BC.outs()
900 << "BOLT-INFO: relaxing branches for compact code model (<128MB)\n";
901
902 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
903 relaxLocalBranches(BF);
904 };
905
906 ParallelUtilities::PredicateTy SkipPredicate =
907 [&](const BinaryFunction &BF) {
908 return !BC.shouldEmit(Function: BF) || !BF.isSimple();
909 };
910
911 ParallelUtilities::runOnEachFunction(
912 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun,
913 SkipPredicate, LogName: "RelaxLocalBranches");
914
915 return Error::success();
916 }
917
918 BC.outs() << "BOLT-INFO: Starting stub-insertion pass\n";
919 std::vector<BinaryFunction *> Sorted = BC.getSortedFunctions();
920 bool Modified;
921 uint32_t Iterations = 0;
922 do {
923 ++Iterations;
924 Modified = false;
925 tentativeLayout(BC, SortedFunctions&: Sorted);
926 updateStubGroups();
927 for (BinaryFunction *Func : Sorted) {
928 if (auto E = relax(Func&: *Func, Modified))
929 return Error(std::move(E));
930 // Don't ruin non-simple functions, they can't afford to have the layout
931 // changed.
932 if (Modified && Func->isSimple())
933 Func->fixBranches();
934 }
935 } while (Modified);
936 BC.outs() << "BOLT-INFO: Inserted " << NumHotStubs
937 << " stubs in the hot area and " << NumColdStubs
938 << " stubs in the cold area. Shared " << NumSharedStubs
939 << " times, iterated " << Iterations << " times.\n";
940 return Error::success();
941}
942} // namespace bolt
943} // namespace llvm
944

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Passes/LongJmp.cpp