1//===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the BinaryContext class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryContext.h"
14#include "bolt/Core/BinaryEmitter.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "bolt/Utils/Utils.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23#include "llvm/MC/MCAssembler.h"
24#include "llvm/MC/MCContext.h"
25#include "llvm/MC/MCDisassembler/MCDisassembler.h"
26#include "llvm/MC/MCInstPrinter.h"
27#include "llvm/MC/MCObjectStreamer.h"
28#include "llvm/MC/MCObjectWriter.h"
29#include "llvm/MC/MCRegisterInfo.h"
30#include "llvm/MC/MCSectionELF.h"
31#include "llvm/MC/MCStreamer.h"
32#include "llvm/MC/MCSubtargetInfo.h"
33#include "llvm/MC/MCSymbol.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/Error.h"
36#include "llvm/Support/Regex.h"
37#include <algorithm>
38#include <functional>
39#include <iterator>
40#include <unordered_set>
41
42using namespace llvm;
43
44#undef DEBUG_TYPE
45#define DEBUG_TYPE "bolt"
46
47namespace opts {
48
49static cl::opt<bool>
50 NoHugePages("no-huge-pages",
51 cl::desc("use regular size pages for code alignment"),
52 cl::Hidden, cl::cat(BoltCategory));
53
54static cl::opt<bool>
55PrintDebugInfo("print-debug-info",
56 cl::desc("print debug info when printing functions"),
57 cl::Hidden,
58 cl::ZeroOrMore,
59 cl::cat(BoltCategory));
60
61cl::opt<bool> PrintRelocations(
62 "print-relocations",
63 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64 cl::cat(BoltCategory));
65
66static cl::opt<bool>
67PrintMemData("print-mem-data",
68 cl::desc("print memory data annotations when printing functions"),
69 cl::Hidden,
70 cl::ZeroOrMore,
71 cl::cat(BoltCategory));
72
73cl::opt<std::string> CompDirOverride(
74 "comp-dir-override",
75 cl::desc("overrides DW_AT_comp_dir, and provides an alternative base "
76 "location, which is used with DW_AT_dwo_name to construct a path "
77 "to *.dwo files."),
78 cl::Hidden, cl::init(Val: ""), cl::cat(BoltCategory));
79} // namespace opts
80
81namespace llvm {
82namespace bolt {
83
84char BOLTError::ID = 0;
85
86BOLTError::BOLTError(bool IsFatal, const Twine &S)
87 : IsFatal(IsFatal), Msg(S.str()) {}
88
89void BOLTError::log(raw_ostream &OS) const {
90 if (IsFatal)
91 OS << "FATAL ";
92 StringRef ErrMsg = StringRef(Msg);
93 // Prepend our error prefix if it is missing
94 if (ErrMsg.empty()) {
95 OS << "BOLT-ERROR\n";
96 } else {
97 if (!ErrMsg.starts_with(Prefix: "BOLT-ERROR"))
98 OS << "BOLT-ERROR: ";
99 OS << ErrMsg << "\n";
100 }
101}
102
103std::error_code BOLTError::convertToErrorCode() const {
104 return inconvertibleErrorCode();
105}
106
107Error createNonFatalBOLTError(const Twine &S) {
108 return make_error<BOLTError>(/*IsFatal*/ Args: false, Args: S);
109}
110
111Error createFatalBOLTError(const Twine &S) {
112 return make_error<BOLTError>(/*IsFatal*/ Args: true, Args: S);
113}
114
115void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
116 handleAllErrors(E: Error(std::move(E)), Handlers: [&](const BOLTError &E) {
117 if (!E.getMessage().empty())
118 E.log(OS&: this->errs());
119 if (E.isFatal())
120 exit(status: 1);
121 });
122}
123
124BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
125 std::unique_ptr<DWARFContext> DwCtx,
126 std::unique_ptr<Triple> TheTriple,
127 std::shared_ptr<orc::SymbolStringPool> SSP,
128 const Target *TheTarget, std::string TripleName,
129 std::unique_ptr<MCCodeEmitter> MCE,
130 std::unique_ptr<MCObjectFileInfo> MOFI,
131 std::unique_ptr<const MCAsmInfo> AsmInfo,
132 std::unique_ptr<const MCInstrInfo> MII,
133 std::unique_ptr<const MCSubtargetInfo> STI,
134 std::unique_ptr<MCInstPrinter> InstPrinter,
135 std::unique_ptr<const MCInstrAnalysis> MIA,
136 std::unique_ptr<MCPlusBuilder> MIB,
137 std::unique_ptr<const MCRegisterInfo> MRI,
138 std::unique_ptr<MCDisassembler> DisAsm,
139 JournalingStreams Logger)
140 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
141 TheTriple(std::move(TheTriple)), SSP(std::move(SSP)),
142 TheTarget(TheTarget), TripleName(TripleName), MCE(std::move(MCE)),
143 MOFI(std::move(MOFI)), AsmInfo(std::move(AsmInfo)), MII(std::move(MII)),
144 STI(std::move(STI)), InstPrinter(std::move(InstPrinter)),
145 MIA(std::move(MIA)), MIB(std::move(MIB)), MRI(std::move(MRI)),
146 DisAsm(std::move(DisAsm)), Logger(Logger), InitialDynoStats(isAArch64()) {
147 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
148 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
149}
150
151BinaryContext::~BinaryContext() {
152 for (BinarySection *Section : Sections)
153 delete Section;
154 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
155 delete InjectedFunction;
156 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
157 delete JTI.second;
158 clearBinaryData();
159}
160
161/// Create BinaryContext for a given architecture \p ArchName and
162/// triple \p TripleName.
163Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
164 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
165 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
166 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
167 StringRef ArchName = "";
168 std::string FeaturesStr = "";
169 switch (TheTriple.getArch()) {
170 case llvm::Triple::x86_64:
171 if (Features)
172 return createFatalBOLTError(
173 S: "x86_64 target does not use SubtargetFeatures");
174 ArchName = "x86-64";
175 FeaturesStr = "+nopl";
176 break;
177 case llvm::Triple::aarch64:
178 if (Features)
179 return createFatalBOLTError(
180 S: "AArch64 target does not use SubtargetFeatures");
181 ArchName = "aarch64";
182 FeaturesStr = "+all";
183 break;
184 case llvm::Triple::riscv64: {
185 ArchName = "riscv64";
186 if (!Features)
187 return createFatalBOLTError(S: "RISCV target needs SubtargetFeatures");
188 // We rely on relaxation for some transformations (e.g., promoting all calls
189 // to PseudoCALL and then making JITLink relax them). Since the relax
190 // feature is not stored in the object file, we manually enable it.
191 Features->AddFeature(String: "relax");
192 FeaturesStr = Features->getString();
193 break;
194 }
195 default:
196 return createStringError(EC: std::errc::not_supported,
197 Fmt: "BOLT-ERROR: Unrecognized machine in ELF file");
198 }
199
200 const std::string TripleName = TheTriple.str();
201
202 std::string Error;
203 const Target *TheTarget =
204 TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
205 if (!TheTarget)
206 return createStringError(EC: make_error_code(e: std::errc::not_supported),
207 S: Twine("BOLT-ERROR: ", Error));
208
209 std::unique_ptr<const MCRegisterInfo> MRI(
210 TheTarget->createMCRegInfo(TT: TripleName));
211 if (!MRI)
212 return createStringError(
213 EC: make_error_code(e: std::errc::not_supported),
214 S: Twine("BOLT-ERROR: no register info for target ", TripleName));
215
216 // Set up disassembler.
217 std::unique_ptr<MCAsmInfo> AsmInfo(
218 TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCTargetOptions()));
219 if (!AsmInfo)
220 return createStringError(
221 EC: make_error_code(e: std::errc::not_supported),
222 S: Twine("BOLT-ERROR: no assembly info for target ", TripleName));
223 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
224 // we want to emit such names as using @PLT without double quotes to convey
225 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
226 // override the default AsmInfo behavior to emit names the way we want.
227 AsmInfo->setAllowAtInName(true);
228
229 std::unique_ptr<const MCSubtargetInfo> STI(
230 TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: "", Features: FeaturesStr));
231 if (!STI)
232 return createStringError(
233 EC: make_error_code(e: std::errc::not_supported),
234 S: Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
235
236 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
237 if (!MII)
238 return createStringError(
239 EC: make_error_code(e: std::errc::not_supported),
240 S: Twine("BOLT-ERROR: no instruction info for target ", TripleName));
241
242 std::unique_ptr<MCContext> Ctx(
243 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
244 std::unique_ptr<MCObjectFileInfo> MOFI(
245 TheTarget->createMCObjectFileInfo(Ctx&: *Ctx, PIC: IsPIC));
246 Ctx->setObjectFileInfo(MOFI.get());
247 // We do not support X86 Large code model. Change this in the future.
248 bool Large = false;
249 if (TheTriple.getArch() == llvm::Triple::aarch64)
250 Large = true;
251 unsigned LSDAEncoding =
252 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
253 if (IsPIC) {
254 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
255 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
256 }
257
258 std::unique_ptr<MCDisassembler> DisAsm(
259 TheTarget->createMCDisassembler(STI: *STI, Ctx&: *Ctx));
260
261 if (!DisAsm)
262 return createStringError(
263 EC: make_error_code(e: std::errc::not_supported),
264 S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
265
266 std::unique_ptr<const MCInstrAnalysis> MIA(
267 TheTarget->createMCInstrAnalysis(Info: MII.get()));
268 if (!MIA)
269 return createStringError(
270 EC: make_error_code(e: std::errc::not_supported),
271 S: Twine("BOLT-ERROR: failed to create instruction analysis for target ",
272 TripleName));
273
274 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
275 std::unique_ptr<MCInstPrinter> InstructionPrinter(
276 TheTarget->createMCInstPrinter(T: TheTriple, SyntaxVariant: AsmPrinterVariant, MAI: *AsmInfo,
277 MII: *MII, MRI: *MRI));
278 if (!InstructionPrinter)
279 return createStringError(
280 EC: make_error_code(e: std::errc::not_supported),
281 S: Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
282 InstructionPrinter->setPrintImmHex(true);
283
284 std::unique_ptr<MCCodeEmitter> MCE(
285 TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx));
286
287 auto BC = std::make_unique<BinaryContext>(
288 args: std::move(Ctx), args: std::move(DwCtx), args: std::make_unique<Triple>(args&: TheTriple),
289 args: std::move(SSP), args&: TheTarget, args: std::string(TripleName), args: std::move(MCE),
290 args: std::move(MOFI), args: std::move(AsmInfo), args: std::move(MII), args: std::move(STI),
291 args: std::move(InstructionPrinter), args: std::move(MIA), args: nullptr, args: std::move(MRI),
292 args: std::move(DisAsm), args&: Logger);
293
294 BC->LSDAEncoding = LSDAEncoding;
295
296 BC->MAB = std::unique_ptr<MCAsmBackend>(
297 BC->TheTarget->createMCAsmBackend(STI: *BC->STI, MRI: *BC->MRI, Options: MCTargetOptions()));
298
299 BC->setFilename(InputFileName);
300
301 BC->HasFixedLoadAddress = !IsPIC;
302
303 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
304 BC->TheTarget->createMCDisassembler(STI: *BC->STI, Ctx&: *BC->Ctx));
305
306 if (!BC->SymbolicDisAsm)
307 return createStringError(
308 EC: make_error_code(e: std::errc::not_supported),
309 S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
310
311 return std::move(BC);
312}
313
314bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
315 if (opts::HotText &&
316 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
317 return true;
318
319 if (opts::HotData &&
320 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
321 return true;
322
323 if (SymbolName == "_end")
324 return true;
325
326 return false;
327}
328
329std::unique_ptr<MCObjectWriter>
330BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
331 return MAB->createObjectWriter(OS);
332}
333
334bool BinaryContext::validateObjectNesting() const {
335 auto Itr = BinaryDataMap.begin();
336 auto End = BinaryDataMap.end();
337 bool Valid = true;
338 while (Itr != End) {
339 auto Next = std::next(x: Itr);
340 while (Next != End &&
341 Itr->second->getSection() == Next->second->getSection() &&
342 Itr->second->containsRange(Address: Next->second->getAddress(),
343 Size: Next->second->getSize())) {
344 if (Next->second->Parent != Itr->second) {
345 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
346 << "BOLT-WARNING: " << *Itr->second << "\n"
347 << "BOLT-WARNING: " << *Next->second << "\n";
348 Valid = false;
349 }
350 ++Next;
351 }
352 Itr = Next;
353 }
354 return Valid;
355}
356
357bool BinaryContext::validateHoles() const {
358 bool Valid = true;
359 for (BinarySection &Section : sections()) {
360 for (const Relocation &Rel : Section.relocations()) {
361 uint64_t RelAddr = Rel.Offset + Section.getAddress();
362 const BinaryData *BD = getBinaryDataContainingAddress(Address: RelAddr);
363 if (!BD) {
364 this->errs()
365 << "BOLT-WARNING: no BinaryData found for relocation at address"
366 << " 0x" << Twine::utohexstr(Val: RelAddr) << " in " << Section.getName()
367 << "\n";
368 Valid = false;
369 } else if (!BD->getAtomicRoot()) {
370 this->errs()
371 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
372 << "address 0x" << Twine::utohexstr(Val: RelAddr) << " in "
373 << Section.getName() << "\n";
374 Valid = false;
375 }
376 }
377 }
378 return Valid;
379}
380
381void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
382 const uint64_t Address = GAI->second->getAddress();
383 const uint64_t Size = GAI->second->getSize();
384
385 auto fixParents = [&](BinaryDataMapType::iterator Itr,
386 BinaryData *NewParent) {
387 BinaryData *OldParent = Itr->second->Parent;
388 Itr->second->Parent = NewParent;
389 ++Itr;
390 while (Itr != BinaryDataMap.end() && OldParent &&
391 Itr->second->Parent == OldParent) {
392 Itr->second->Parent = NewParent;
393 ++Itr;
394 }
395 };
396
397 // Check if the previous symbol contains the newly added symbol.
398 if (GAI != BinaryDataMap.begin()) {
399 BinaryData *Prev = std::prev(x: GAI)->second;
400 while (Prev) {
401 if (Prev->getSection() == GAI->second->getSection() &&
402 Prev->containsRange(Address, Size)) {
403 fixParents(GAI, Prev);
404 } else {
405 fixParents(GAI, nullptr);
406 }
407 Prev = Prev->Parent;
408 }
409 }
410
411 // Check if the newly added symbol contains any subsequent symbols.
412 if (Size != 0) {
413 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
414 auto Itr = std::next(x: GAI);
415 while (
416 Itr != BinaryDataMap.end() &&
417 BD->containsRange(Address: Itr->second->getAddress(), Size: Itr->second->getSize())) {
418 Itr->second->Parent = BD;
419 ++Itr;
420 }
421 }
422}
423
424iterator_range<BinaryContext::binary_data_iterator>
425BinaryContext::getSubBinaryData(BinaryData *BD) {
426 auto Start = std::next(x: BinaryDataMap.find(x: BD->getAddress()));
427 auto End = Start;
428 while (End != BinaryDataMap.end() && BD->isAncestorOf(BD: End->second))
429 ++End;
430 return make_range(x: Start, y: End);
431}
432
433std::pair<const MCSymbol *, uint64_t>
434BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
435 bool IsPCRel) {
436 if (isAArch64()) {
437 // Check if this is an access to a constant island and create bookkeeping
438 // to keep track of it and emit it later as part of this function.
439 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
440 return std::make_pair(x&: IslandSym, y: 0);
441
442 // Detect custom code written in assembly that refers to arbitrary
443 // constant islands from other functions. Write this reference so we
444 // can pull this constant island and emit it as part of this function
445 // too.
446 auto IslandIter = AddressToConstantIslandMap.lower_bound(x: Address);
447
448 if (IslandIter != AddressToConstantIslandMap.begin() &&
449 (IslandIter == AddressToConstantIslandMap.end() ||
450 IslandIter->first > Address))
451 --IslandIter;
452
453 if (IslandIter != AddressToConstantIslandMap.end()) {
454 // Fall-back to referencing the original constant island in the presence
455 // of dynamic relocs, as we currently do not support cloning them.
456 // Notice: we might fail to link because of this, if the original constant
457 // island we are referring would be emitted too far away.
458 if (IslandIter->second->hasDynamicRelocationAtIsland()) {
459 MCSymbol *IslandSym =
460 IslandIter->second->getOrCreateIslandAccess(Address);
461 if (IslandSym)
462 return std::make_pair(x&: IslandSym, y: 0);
463 } else if (MCSymbol *IslandSym =
464 IslandIter->second->getOrCreateProxyIslandAccess(Address,
465 Referrer&: BF)) {
466 BF.createIslandDependency(Island: IslandSym, BF: IslandIter->second);
467 return std::make_pair(x&: IslandSym, y: 0);
468 }
469 }
470 }
471
472 // Note that the address does not necessarily have to reside inside
473 // a section, it could be an absolute address too.
474 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
475 if (Section && Section->isText()) {
476 if (BF.containsAddress(PC: Address, /*UseMaxSize=*/isAArch64())) {
477 if (Address != BF.getAddress()) {
478 // The address could potentially escape. Mark it as another entry
479 // point into the function.
480 if (opts::Verbosity >= 1) {
481 this->outs() << "BOLT-INFO: potentially escaped address 0x"
482 << Twine::utohexstr(Val: Address) << " in function " << BF
483 << '\n';
484 }
485 BF.HasInternalLabelReference = true;
486 return std::make_pair(
487 x: BF.addEntryPointAtOffset(Offset: Address - BF.getAddress()), y: 0);
488 }
489 } else {
490 addInterproceduralReference(Function: &BF, Address);
491 }
492 }
493
494 // With relocations, catch jump table references outside of the basic block
495 // containing the indirect jump.
496 if (HasRelocations) {
497 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
498 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
499 const MCSymbol *Symbol =
500 getOrCreateJumpTable(Function&: BF, Address, Type: JumpTable::JTT_PIC);
501
502 return std::make_pair(x&: Symbol, y: 0);
503 }
504 }
505
506 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
507 return std::make_pair(x: BD->getSymbol(), y: Address - BD->getAddress());
508
509 // TODO: use DWARF info to get size/alignment here?
510 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, Prefix: "DATAat");
511 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
512 return std::make_pair(x&: TargetSymbol, y: 0);
513}
514
515MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
516 BinaryFunction &BF) {
517 if (!isX86())
518 return MemoryContentsType::UNKNOWN;
519
520 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
521 if (!Section) {
522 // No section - possibly an absolute address. Since we don't allow
523 // internal function addresses to escape the function scope - we
524 // consider it a tail call.
525 if (opts::Verbosity > 1) {
526 this->errs() << "BOLT-WARNING: no section for address 0x"
527 << Twine::utohexstr(Val: Address) << " referenced from function "
528 << BF << '\n';
529 }
530 return MemoryContentsType::UNKNOWN;
531 }
532
533 if (Section->isVirtual()) {
534 // The contents are filled at runtime.
535 return MemoryContentsType::UNKNOWN;
536 }
537
538 // No support for jump tables in code yet.
539 if (Section->isText())
540 return MemoryContentsType::UNKNOWN;
541
542 // Start with checking for PIC jump table. We expect non-PIC jump tables
543 // to have high 32 bits set to 0.
544 if (analyzeJumpTable(Address, Type: JumpTable::JTT_PIC, BF))
545 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
546
547 if (analyzeJumpTable(Address, Type: JumpTable::JTT_NORMAL, BF))
548 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
549
550 return MemoryContentsType::UNKNOWN;
551}
552
553bool BinaryContext::analyzeJumpTable(const uint64_t Address,
554 const JumpTable::JumpTableType Type,
555 const BinaryFunction &BF,
556 const uint64_t NextJTAddress,
557 JumpTable::AddressesType *EntriesAsAddress,
558 bool *HasEntryInFragment) const {
559 // Target address of __builtin_unreachable.
560 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
561
562 // Is one of the targets __builtin_unreachable?
563 bool HasUnreachable = false;
564
565 // Does one of the entries match function start address?
566 bool HasStartAsEntry = false;
567
568 // Number of targets other than __builtin_unreachable.
569 uint64_t NumRealEntries = 0;
570
571 // Size of the jump table without trailing __builtin_unreachable entries.
572 size_t TrimmedSize = 0;
573
574 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
575 if (!EntriesAsAddress)
576 return;
577 EntriesAsAddress->emplace_back(args&: EntryAddress);
578 if (!Unreachable)
579 TrimmedSize = EntriesAsAddress->size();
580 };
581
582 auto printEntryDiagnostics = [&](raw_ostream &OS,
583 const BinaryFunction *TargetBF) {
584 OS << "FAIL: function doesn't contain this address\n";
585 if (!TargetBF)
586 return;
587 OS << " ! function containing this address: " << *TargetBF << '\n';
588 if (!TargetBF->isFragment())
589 return;
590 OS << " ! is a fragment with parents: ";
591 ListSeparator LS;
592 for (BinaryFunction *Parent : TargetBF->ParentFragments)
593 OS << LS << *Parent;
594 OS << '\n';
595 };
596
597 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
598 if (!Section)
599 return false;
600
601 // The upper bound is defined by containing object, section limits, and
602 // the next jump table in memory.
603 uint64_t UpperBound = Section->getEndAddress();
604 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
605 if (JumpTableBD && JumpTableBD->getSize()) {
606 assert(JumpTableBD->getEndAddress() <= UpperBound &&
607 "data object cannot cross a section boundary");
608 UpperBound = JumpTableBD->getEndAddress();
609 }
610 if (NextJTAddress)
611 UpperBound = std::min(a: NextJTAddress, b: UpperBound);
612
613 LLVM_DEBUG({
614 using JTT = JumpTable::JumpTableType;
615 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
616 Address, BF.getPrintName(),
617 Type == JTT::JTT_PIC ? "PIC" : "Normal");
618 });
619 const uint64_t EntrySize = getJumpTableEntrySize(Type);
620 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
621 EntryAddress += EntrySize) {
622 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
623 << " -> ");
624 // Check if there's a proper relocation against the jump table entry.
625 if (HasRelocations) {
626 if (Type == JumpTable::JTT_PIC &&
627 !DataPCRelocations.count(x: EntryAddress)) {
628 LLVM_DEBUG(
629 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
630 break;
631 }
632 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(Address: EntryAddress)) {
633 LLVM_DEBUG(
634 dbgs()
635 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
636 break;
637 }
638 }
639
640 const uint64_t Value =
641 (Type == JumpTable::JTT_PIC)
642 ? Address + *getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize)
643 : *getPointerAtAddress(Address: EntryAddress);
644
645 // __builtin_unreachable() case.
646 if (Value == UnreachableAddress) {
647 addEntryAddress(Value, /*Unreachable*/ true);
648 HasUnreachable = true;
649 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
650 continue;
651 }
652
653 // Function start is another special case. It is allowed in the jump table,
654 // but we need at least one another regular entry to distinguish the table
655 // from, e.g. a function pointer array.
656 if (Value == BF.getAddress()) {
657 HasStartAsEntry = true;
658 addEntryAddress(Value);
659 continue;
660 }
661
662 // Function or one of its fragments.
663 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Address: Value);
664 if (!TargetBF || !areRelatedFragments(LHS: TargetBF, RHS: &BF)) {
665 LLVM_DEBUG(printEntryDiagnostics(dbgs(), TargetBF));
666 (void)printEntryDiagnostics;
667 break;
668 }
669
670 // Check there's an instruction at this offset.
671 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672 !TargetBF->getInstructionAtOffset(Offset: Value - TargetBF->getAddress())) {
673 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674 break;
675 }
676
677 ++NumRealEntries;
678 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679
680 if (TargetBF != &BF && HasEntryInFragment)
681 *HasEntryInFragment = true;
682 addEntryAddress(Value);
683 }
684
685 // Trim direct/normal jump table to exclude trailing unreachable entries that
686 // can collide with a function address.
687 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
688 TrimmedSize != EntriesAsAddress->size() &&
689 getBinaryFunctionAtAddress(Address: UnreachableAddress))
690 EntriesAsAddress->resize(new_size: TrimmedSize);
691
692 // It's a jump table if the number of real entries is more than 1, or there's
693 // one real entry and one or more special targets. If there are only multiple
694 // special targets, then it's not a jump table.
695 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
696}
697
698void BinaryContext::populateJumpTables() {
699 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700 << '\n');
701 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702 ++JTI) {
703 JumpTable *JT = JTI->second;
704
705 if (!llvm::all_of(Range&: JT->Parents, P: std::mem_fn(pm: &BinaryFunction::isSimple)))
706 continue;
707
708 uint64_t NextJTAddress = 0;
709 auto NextJTI = std::next(x: JTI);
710 if (NextJTI != JTE)
711 NextJTAddress = NextJTI->second->getAddress();
712
713 const bool Success =
714 analyzeJumpTable(Address: JT->getAddress(), Type: JT->Type, BF: *(JT->Parents[0]),
715 NextJTAddress, EntriesAsAddress: &JT->EntriesAsAddress, HasEntryInFragment: &JT->IsSplit);
716 if (!Success) {
717 LLVM_DEBUG({
718 dbgs() << "failed to analyze ";
719 JT->print(dbgs());
720 if (NextJTI != JTE) {
721 dbgs() << "next ";
722 NextJTI->second->print(dbgs());
723 }
724 });
725 llvm_unreachable("jump table heuristic failure");
726 }
727 for (BinaryFunction *Frag : JT->Parents) {
728 if (JT->IsSplit)
729 Frag->setHasIndirectTargetToSplitFragment(true);
730 for (uint64_t EntryAddress : JT->EntriesAsAddress)
731 // if target is builtin_unreachable
732 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
733 Frag->IgnoredBranches.emplace_back(Args: EntryAddress - Frag->getAddress(),
734 Args: Frag->getSize());
735 } else if (EntryAddress >= Frag->getAddress() &&
736 EntryAddress < Frag->getAddress() + Frag->getSize()) {
737 Frag->registerReferencedOffset(Offset: EntryAddress - Frag->getAddress());
738 }
739 }
740
741 // In strict mode, erase PC-relative relocation record. Later we check that
742 // all such records are erased and thus have been accounted for.
743 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
744 for (uint64_t Address = JT->getAddress();
745 Address < JT->getAddress() + JT->getSize();
746 Address += JT->EntrySize) {
747 DataPCRelocations.erase(position: DataPCRelocations.find(x: Address));
748 }
749 }
750
751 // Mark to skip the function and all its fragments.
752 for (BinaryFunction *Frag : JT->Parents)
753 if (Frag->hasIndirectTargetToSplitFragment())
754 addFragmentsToSkip(Function: Frag);
755 }
756
757 if (opts::StrictMode && DataPCRelocations.size()) {
758 LLVM_DEBUG({
759 dbgs() << DataPCRelocations.size()
760 << " unclaimed PC-relative relocations left in data:\n";
761 for (uint64_t Reloc : DataPCRelocations)
762 dbgs() << Twine::utohexstr(Reloc) << '\n';
763 });
764 assert(0 && "unclaimed PC-relative relocations left in data\n");
765 }
766 clearList(List&: DataPCRelocations);
767}
768
769void BinaryContext::skipMarkedFragments() {
770 std::vector<BinaryFunction *> FragmentQueue;
771 // Copy the functions to FragmentQueue.
772 FragmentQueue.assign(first: FragmentsToSkip.begin(), last: FragmentsToSkip.end());
773 auto addToWorklist = [&](BinaryFunction *Function) -> void {
774 if (FragmentsToSkip.count(x: Function))
775 return;
776 FragmentQueue.push_back(x: Function);
777 addFragmentsToSkip(Function);
778 };
779 // Functions containing split jump tables need to be skipped with all
780 // fragments (transitively).
781 for (size_t I = 0; I != FragmentQueue.size(); I++) {
782 BinaryFunction *BF = FragmentQueue[I];
783 assert(FragmentsToSkip.count(BF) &&
784 "internal error in traversing function fragments");
785 if (opts::Verbosity >= 1)
786 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
787 BF->setSimple(false);
788 BF->setHasIndirectTargetToSplitFragment(true);
789
790 llvm::for_each(Range&: BF->Fragments, F: addToWorklist);
791 llvm::for_each(Range&: BF->ParentFragments, F: addToWorklist);
792 }
793 if (!FragmentsToSkip.empty())
794 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
795 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
796 << " due to cold fragments\n";
797}
798
799MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
800 uint64_t Size,
801 uint16_t Alignment,
802 unsigned Flags) {
803 auto Itr = BinaryDataMap.find(x: Address);
804 if (Itr != BinaryDataMap.end()) {
805 assert(Itr->second->getSize() == Size || !Size);
806 return Itr->second->getSymbol();
807 }
808
809 std::string Name = (Prefix + "0x" + Twine::utohexstr(Val: Address)).str();
810 assert(!GlobalSymbols.count(Name) && "created name is not unique");
811 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
812}
813
814MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
815 return Ctx->getOrCreateSymbol(Name);
816}
817
818BinaryFunction *BinaryContext::createBinaryFunction(
819 const std::string &Name, BinarySection &Section, uint64_t Address,
820 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
821 auto Result = BinaryFunctions.emplace(
822 args&: Address, args: BinaryFunction(Name, Section, Address, Size, *this));
823 assert(Result.second == true && "unexpected duplicate function");
824 BinaryFunction *BF = &Result.first->second;
825 registerNameAtAddress(Name, Address, Size: SymbolSize ? SymbolSize : Size,
826 Alignment);
827 setSymbolToFunctionMap(Sym: BF->getSymbol(), BF);
828 return BF;
829}
830
831const MCSymbol *
832BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
833 JumpTable::JumpTableType Type) {
834 // Two fragments of same function access same jump table
835 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
836 assert(JT->Type == Type && "jump table types have to match");
837 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
838
839 if (llvm::is_contained(Range&: JT->Parents, Element: &Function))
840 return JT->getFirstLabel();
841
842 // Prevent associating a jump table to a specific fragment twice.
843 auto isSibling = std::bind(f: &BinaryContext::areRelatedFragments, args: this,
844 args: &Function, args: std::placeholders::_1);
845 assert(llvm::all_of(JT->Parents, isSibling) &&
846 "cannot re-use jump table of a different function");
847 (void)isSibling;
848 if (opts::Verbosity > 2) {
849 this->outs() << "BOLT-INFO: multiple fragments access the same jump table"
850 << ": " << *JT->Parents[0] << "; " << Function << '\n';
851 JT->print(OS&: this->outs());
852 }
853 if (JT->Parents.size() == 1)
854 JT->Parents.front()->setHasIndirectTargetToSplitFragment(true);
855 Function.setHasIndirectTargetToSplitFragment(true);
856 // Duplicate the entry for the parent function for easy access
857 JT->Parents.push_back(Elt: &Function);
858 Function.JumpTables.emplace(args&: Address, args&: JT);
859 return JT->getFirstLabel();
860 }
861
862 // Re-use the existing symbol if possible.
863 MCSymbol *JTLabel = nullptr;
864 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
865 if (!isInternalSymbolName(Name: Object->getSymbol()->getName()))
866 JTLabel = Object->getSymbol();
867 }
868
869 const uint64_t EntrySize = getJumpTableEntrySize(Type);
870 if (!JTLabel) {
871 const std::string JumpTableName = generateJumpTableName(BF: Function, Address);
872 JTLabel = registerNameAtAddress(Name: JumpTableName, Address, Size: 0, Alignment: EntrySize);
873 }
874
875 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
876 << " in function " << Function << '\n');
877
878 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
879 JumpTable::LabelMapType{{0, JTLabel}},
880 *getSectionForAddress(Address));
881 JT->Parents.push_back(Elt: &Function);
882 if (opts::Verbosity > 2)
883 JT->print(OS&: this->outs());
884 JumpTables.emplace(args&: Address, args&: JT);
885
886 // Duplicate the entry for the parent function for easy access.
887 Function.JumpTables.emplace(args&: Address, args&: JT);
888 return JTLabel;
889}
890
891std::pair<uint64_t, const MCSymbol *>
892BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
893 const MCSymbol *OldLabel) {
894 auto L = scopeLock();
895 unsigned Offset = 0;
896 bool Found = false;
897 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
898 if (Elmt.second != OldLabel)
899 continue;
900 Offset = Elmt.first;
901 Found = true;
902 break;
903 }
904 assert(Found && "Label not found");
905 (void)Found;
906 MCSymbol *NewLabel = Ctx->createNamedTempSymbol(Name: "duplicatedJT");
907 JumpTable *NewJT =
908 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
909 JumpTable::LabelMapType{{Offset, NewLabel}},
910 *getSectionForAddress(Address: JT->getAddress()));
911 NewJT->Parents = JT->Parents;
912 NewJT->Entries = JT->Entries;
913 NewJT->Counts = JT->Counts;
914 uint64_t JumpTableID = ++DuplicatedJumpTables;
915 // Invert it to differentiate from regular jump tables whose IDs are their
916 // addresses in the input binary memory space
917 JumpTableID = ~JumpTableID;
918 JumpTables.emplace(args&: JumpTableID, args&: NewJT);
919 Function.JumpTables.emplace(args&: JumpTableID, args&: NewJT);
920 return std::make_pair(x&: JumpTableID, y&: NewLabel);
921}
922
923std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
924 uint64_t Address) {
925 size_t Id;
926 uint64_t Offset = 0;
927 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
928 Offset = Address - JT->getAddress();
929 auto JTLabelsIt = JT->Labels.find(x: Offset);
930 if (JTLabelsIt != JT->Labels.end())
931 return std::string(JTLabelsIt->second->getName());
932
933 auto JTIdsIt = JumpTableIds.find(x: JT->getAddress());
934 assert(JTIdsIt != JumpTableIds.end());
935 Id = JTIdsIt->second;
936 } else {
937 Id = JumpTableIds[Address] = BF.JumpTables.size();
938 }
939 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(val: Id) +
940 (Offset ? ("." + std::to_string(val: Offset)) : ""));
941}
942
943bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
944 // FIXME: aarch64 support is missing.
945 if (!isX86())
946 return true;
947
948 if (BF.getSize() == BF.getMaxSize())
949 return true;
950
951 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
952 assert(FunctionData && "cannot get function as data");
953
954 uint64_t Offset = BF.getSize();
955 MCInst Instr;
956 uint64_t InstrSize = 0;
957 uint64_t InstrAddress = BF.getAddress() + Offset;
958 using std::placeholders::_1;
959
960 // Skip instructions that satisfy the predicate condition.
961 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
962 const uint64_t StartOffset = Offset;
963 for (; Offset < BF.getMaxSize();
964 Offset += InstrSize, InstrAddress += InstrSize) {
965 if (!DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset),
966 Address: InstrAddress, CStream&: nulls()))
967 break;
968 if (!Predicate(Instr))
969 break;
970 }
971
972 return Offset - StartOffset;
973 };
974
975 // Skip a sequence of zero bytes.
976 auto skipZeros = [&]() {
977 const uint64_t StartOffset = Offset;
978 for (; Offset < BF.getMaxSize(); ++Offset)
979 if ((*FunctionData)[Offset] != 0)
980 break;
981
982 return Offset - StartOffset;
983 };
984
985 // Accept the whole padding area filled with breakpoints.
986 auto isBreakpoint = std::bind(f: &MCPlusBuilder::isBreakpoint, args: MIB.get(), args: _1);
987 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
988 return true;
989
990 auto isNoop = std::bind(f: &MCPlusBuilder::isNoop, args: MIB.get(), args: _1);
991
992 // Some functions have a jump to the next function or to the padding area
993 // inserted after the body.
994 auto isSkipJump = [&](const MCInst &Instr) {
995 uint64_t TargetAddress = 0;
996 if (MIB->isUnconditionalBranch(Inst: Instr) &&
997 MIB->evaluateBranch(Inst: Instr, Addr: InstrAddress, Size: InstrSize, Target&: TargetAddress)) {
998 if (TargetAddress >= InstrAddress + InstrSize &&
999 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1000 return true;
1001 }
1002 }
1003 return false;
1004 };
1005
1006 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1007 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1008 skipZeros())
1009 ;
1010
1011 if (Offset == BF.getMaxSize())
1012 return true;
1013
1014 if (opts::Verbosity >= 1) {
1015 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1016 << Twine::utohexstr(Val: BF.getAddress() + BF.getSize())
1017 << " starting at offset " << (Offset - BF.getSize())
1018 << " in function " << BF << '\n'
1019 << FunctionData->slice(N: BF.getSize(),
1020 M: BF.getMaxSize() - BF.getSize())
1021 << '\n';
1022 }
1023
1024 return false;
1025}
1026
1027void BinaryContext::adjustCodePadding() {
1028 for (auto &BFI : BinaryFunctions) {
1029 BinaryFunction &BF = BFI.second;
1030 if (!shouldEmit(Function: BF))
1031 continue;
1032
1033 if (!hasValidCodePadding(BF)) {
1034 if (HasRelocations) {
1035 this->errs() << "BOLT-WARNING: function " << BF
1036 << " has invalid padding. Ignoring the function\n";
1037 BF.setIgnored();
1038 } else {
1039 BF.setMaxSize(BF.getSize());
1040 }
1041 }
1042 }
1043}
1044
1045MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1046 uint64_t Size,
1047 uint16_t Alignment,
1048 unsigned Flags) {
1049 // Register the name with MCContext.
1050 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1051
1052 auto GAI = BinaryDataMap.find(x: Address);
1053 BinaryData *BD;
1054 if (GAI == BinaryDataMap.end()) {
1055 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1056 BinarySection &Section =
1057 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1058 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1059 Section, Flags);
1060 GAI = BinaryDataMap.emplace(args&: Address, args&: BD).first;
1061 GlobalSymbols[Name] = BD;
1062 updateObjectNesting(GAI);
1063 } else {
1064 BD = GAI->second;
1065 if (!BD->hasName(Name)) {
1066 GlobalSymbols[Name] = BD;
1067 BD->updateSize(N: Size);
1068 BD->Symbols.push_back(x: Symbol);
1069 }
1070 }
1071
1072 return Symbol;
1073}
1074
1075const BinaryData *
1076BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1077 auto NI = BinaryDataMap.lower_bound(x: Address);
1078 auto End = BinaryDataMap.end();
1079 if ((NI != End && Address == NI->first) ||
1080 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1081 if (NI->second->containsAddress(Address))
1082 return NI->second;
1083
1084 // If this is a sub-symbol, see if a parent data contains the address.
1085 const BinaryData *BD = NI->second->getParent();
1086 while (BD) {
1087 if (BD->containsAddress(Address))
1088 return BD;
1089 BD = BD->getParent();
1090 }
1091 }
1092 return nullptr;
1093}
1094
1095BinaryData *BinaryContext::getGOTSymbol() {
1096 // First tries to find a global symbol with that name
1097 BinaryData *GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_");
1098 if (GOTSymBD)
1099 return GOTSymBD;
1100
1101 // This symbol might be hidden from run-time link, so fetch the local
1102 // definition if available.
1103 GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_/1");
1104 if (!GOTSymBD)
1105 return nullptr;
1106
1107 // If the local symbol is not unique, fail
1108 unsigned Index = 2;
1109 SmallString<30> Storage;
1110 while (const BinaryData *BD =
1111 getBinaryDataByName(Name: Twine("_GLOBAL_OFFSET_TABLE_/")
1112 .concat(Suffix: Twine(Index++))
1113 .toStringRef(Out&: Storage)))
1114 if (BD->getAddress() != GOTSymBD->getAddress())
1115 return nullptr;
1116
1117 return GOTSymBD;
1118}
1119
1120bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1121 auto NI = BinaryDataMap.find(x: Address);
1122 assert(NI != BinaryDataMap.end());
1123 if (NI == BinaryDataMap.end())
1124 return false;
1125 // TODO: it's possible that a jump table starts at the same address
1126 // as a larger blob of private data. When we set the size of the
1127 // jump table, it might be smaller than the total blob size. In this
1128 // case we just leave the original size since (currently) it won't really
1129 // affect anything.
1130 assert((!NI->second->Size || NI->second->Size == Size ||
1131 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1132 "can't change the size of a symbol that has already had its "
1133 "size set");
1134 if (!NI->second->Size) {
1135 NI->second->Size = Size;
1136 updateObjectNesting(GAI: NI);
1137 return true;
1138 }
1139 return false;
1140}
1141
1142void BinaryContext::generateSymbolHashes() {
1143 auto isPadding = [](const BinaryData &BD) {
1144 StringRef Contents = BD.getSection().getContents();
1145 StringRef SymData = Contents.substr(Start: BD.getOffset(), N: BD.getSize());
1146 return (BD.getName().starts_with(Prefix: "HOLEat") ||
1147 SymData.find_first_not_of(C: 0) == StringRef::npos);
1148 };
1149
1150 uint64_t NumCollisions = 0;
1151 for (auto &Entry : BinaryDataMap) {
1152 BinaryData &BD = *Entry.second;
1153 StringRef Name = BD.getName();
1154
1155 if (!isInternalSymbolName(Name))
1156 continue;
1157
1158 // First check if a non-anonymous alias exists and move it to the front.
1159 if (BD.getSymbols().size() > 1) {
1160 auto Itr = llvm::find_if(Range&: BD.getSymbols(), P: [&](const MCSymbol *Symbol) {
1161 return !isInternalSymbolName(Name: Symbol->getName());
1162 });
1163 if (Itr != BD.getSymbols().end()) {
1164 size_t Idx = std::distance(first: BD.getSymbols().begin(), last: Itr);
1165 std::swap(a&: BD.getSymbols()[0], b&: BD.getSymbols()[Idx]);
1166 continue;
1167 }
1168 }
1169
1170 // We have to skip 0 size symbols since they will all collide.
1171 if (BD.getSize() == 0) {
1172 continue;
1173 }
1174
1175 const uint64_t Hash = BD.getSection().hash(BD);
1176 const size_t Idx = Name.find(Str: "0x");
1177 std::string NewName =
1178 (Twine(Name.substr(Start: 0, N: Idx)) + "_" + Twine::utohexstr(Val: Hash)).str();
1179 if (getBinaryDataByName(Name: NewName)) {
1180 // Ignore collisions for symbols that appear to be padding
1181 // (i.e. all zeros or a "hole")
1182 if (!isPadding(BD)) {
1183 if (opts::Verbosity) {
1184 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1185 << " with new name (" << NewName << "), skipping.\n";
1186 }
1187 ++NumCollisions;
1188 }
1189 continue;
1190 }
1191 BD.Symbols.insert(position: BD.Symbols.begin(), x: Ctx->getOrCreateSymbol(Name: NewName));
1192 GlobalSymbols[NewName] = &BD;
1193 }
1194 if (NumCollisions) {
1195 this->errs() << "BOLT-WARNING: " << NumCollisions
1196 << " collisions detected while hashing binary objects";
1197 if (!opts::Verbosity)
1198 this->errs() << ". Use -v=1 to see the list.";
1199 this->errs() << '\n';
1200 }
1201}
1202
1203bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1204 BinaryFunction &Function) {
1205 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1206 if (TargetFunction.isChildOf(Other: Function))
1207 return true;
1208 TargetFunction.addParentFragment(BF&: Function);
1209 Function.addFragment(BF&: TargetFunction);
1210 FragmentClasses.unionSets(V1: &TargetFunction, V2: &Function);
1211 if (!HasRelocations) {
1212 TargetFunction.setSimple(false);
1213 Function.setSimple(false);
1214 }
1215 if (opts::Verbosity >= 1) {
1216 this->outs() << "BOLT-INFO: marking " << TargetFunction
1217 << " as a fragment of " << Function << '\n';
1218 }
1219 return true;
1220}
1221
1222void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1223 MCInst &LoadLowBits,
1224 MCInst &LoadHiBits,
1225 uint64_t Target) {
1226 const MCSymbol *TargetSymbol;
1227 uint64_t Addend = 0;
1228 std::tie(args&: TargetSymbol, args&: Addend) = handleAddressRef(Address: Target, BF,
1229 /*IsPCRel*/ true);
1230 int64_t Val;
1231 MIB->replaceImmWithSymbolRef(Inst&: LoadHiBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), Value&: Val,
1232 RelType: ELF::R_AARCH64_ADR_PREL_PG_HI21);
1233 MIB->replaceImmWithSymbolRef(Inst&: LoadLowBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(),
1234 Value&: Val, RelType: ELF::R_AARCH64_ADD_ABS_LO12_NC);
1235}
1236
1237bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1238 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1239 if (TargetFunction)
1240 return false;
1241
1242 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1243 assert(Section && "cannot get section for referenced address");
1244 if (!Section->isText())
1245 return false;
1246
1247 bool Ret = false;
1248 StringRef SectionContents = Section->getContents();
1249 uint64_t Offset = Address - Section->getAddress();
1250 const uint64_t MaxSize = SectionContents.size() - Offset;
1251 const uint8_t *Bytes =
1252 reinterpret_cast<const uint8_t *>(SectionContents.data());
1253 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1254
1255 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1256 MCInst &Instruction, uint64_t Offset,
1257 uint64_t AbsoluteInstrAddr,
1258 uint64_t TotalSize) -> bool {
1259 MCInst *TargetHiBits, *TargetLowBits;
1260 uint64_t TargetAddress, Count;
1261 Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(),
1262 Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits,
1263 TargetLowBits, Target&: TargetAddress);
1264 if (!Count)
1265 return false;
1266
1267 if (MatchOnly)
1268 return true;
1269
1270 // NOTE The target symbol was created during disassemble's
1271 // handleExternalReference
1272 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, Prefix: "FUNCat");
1273 BinaryFunction *Veneer = createBinaryFunction(Name: VeneerSymbol->getName().str(),
1274 Section&: *Section, Address, Size: TotalSize);
1275 addAdrpAddRelocAArch64(BF&: *Veneer, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits,
1276 Target: TargetAddress);
1277 MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer", Val: true);
1278 Veneer->addInstruction(Offset, Instruction: std::move(Instruction));
1279 --Count;
1280 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1281 MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer", Val: true);
1282 Veneer->addInstruction(Offset: It->first, Instruction: std::move(It->second));
1283 }
1284
1285 Veneer->getOrCreateLocalLabel(Address);
1286 Veneer->setMaxSize(TotalSize);
1287 Veneer->updateState(State: BinaryFunction::State::Disassembled);
1288 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x"
1289 << Twine::utohexstr(Address) << "\n");
1290 return true;
1291 };
1292
1293 uint64_t Size = 0, TotalSize = 0;
1294 BinaryFunction::InstrMapType VeneerInstructions;
1295 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1296 MCInst Instruction;
1297 const uint64_t AbsoluteInstrAddr = Address + Offset;
1298 if (!SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, Bytes: Data.slice(N: Offset),
1299 Address: AbsoluteInstrAddr, CStream&: nulls()))
1300 break;
1301
1302 TotalSize += Size;
1303 if (MIB->isBranch(Inst: Instruction)) {
1304 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1305 AbsoluteInstrAddr, TotalSize);
1306 break;
1307 }
1308
1309 VeneerInstructions.emplace(args&: Offset, args: std::move(Instruction));
1310 }
1311
1312 return Ret;
1313}
1314
1315void BinaryContext::processInterproceduralReferences() {
1316 for (const std::pair<BinaryFunction *, uint64_t> &It :
1317 InterproceduralReferences) {
1318 BinaryFunction &Function = *It.first;
1319 uint64_t Address = It.second;
1320 // Process interprocedural references from ignored functions in BAT mode
1321 // (non-simple in non-relocation mode) to properly register entry points
1322 if (!Address || (Function.isIgnored() && !HasBATSection))
1323 continue;
1324
1325 BinaryFunction *TargetFunction =
1326 getBinaryFunctionContainingAddress(Address);
1327 if (&Function == TargetFunction)
1328 continue;
1329
1330 if (TargetFunction) {
1331 if (TargetFunction->isFragment() &&
1332 !areRelatedFragments(LHS: TargetFunction, RHS: &Function)) {
1333 this->errs()
1334 << "BOLT-WARNING: interprocedural reference between unrelated "
1335 "fragments: "
1336 << Function.getPrintName() << " and "
1337 << TargetFunction->getPrintName() << '\n';
1338 }
1339 if (uint64_t Offset = Address - TargetFunction->getAddress())
1340 TargetFunction->addEntryPointAtOffset(Offset);
1341
1342 continue;
1343 }
1344
1345 // Check if address falls in function padding space - this could be
1346 // unmarked data in code. In this case adjust the padding space size.
1347 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1348 assert(Section && "cannot get section for referenced address");
1349
1350 if (!Section->isText())
1351 continue;
1352
1353 // PLT requires special handling and could be ignored in this context.
1354 StringRef SectionName = Section->getName();
1355 if (SectionName == ".plt" || SectionName == ".plt.got")
1356 continue;
1357
1358 // Check if it is aarch64 veneer written at Address
1359 if (isAArch64() && handleAArch64Veneer(Address))
1360 continue;
1361
1362 if (opts::processAllFunctions()) {
1363 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1364 << "object in code at address 0x"
1365 << Twine::utohexstr(Val: Address) << " belonging to section "
1366 << SectionName << " in current mode\n";
1367 exit(status: 1);
1368 }
1369
1370 TargetFunction = getBinaryFunctionContainingAddress(Address,
1371 /*CheckPastEnd=*/false,
1372 /*UseMaxSize=*/true);
1373 // We are not going to overwrite non-simple functions, but for simple
1374 // ones - adjust the padding size.
1375 if (TargetFunction && TargetFunction->isSimple()) {
1376 this->errs()
1377 << "BOLT-WARNING: function " << *TargetFunction
1378 << " has an object detected in a padding region at address 0x"
1379 << Twine::utohexstr(Val: Address) << '\n';
1380 TargetFunction->setMaxSize(TargetFunction->getSize());
1381 }
1382 }
1383
1384 InterproceduralReferences.clear();
1385}
1386
1387void BinaryContext::postProcessSymbolTable() {
1388 fixBinaryDataHoles();
1389 bool Valid = true;
1390 for (auto &Entry : BinaryDataMap) {
1391 BinaryData *BD = Entry.second;
1392 if ((BD->getName().starts_with(Prefix: "SYMBOLat") ||
1393 BD->getName().starts_with(Prefix: "DATAat")) &&
1394 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1395 BD->getSection()) {
1396 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1397 << "\n";
1398 Valid = false;
1399 }
1400 }
1401 assert(Valid);
1402 (void)Valid;
1403 generateSymbolHashes();
1404}
1405
1406void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1407 BinaryFunction &ParentBF) {
1408 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1409 "cannot merge functions with multiple entry points");
1410
1411 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1412 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1413 SymbolToFunctionMapMutex, std::defer_lock);
1414
1415 const StringRef ChildName = ChildBF.getOneName();
1416
1417 // Move symbols over and update bookkeeping info.
1418 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1419 ParentBF.getSymbols().push_back(Elt: Symbol);
1420 WriteSymbolMapLock.lock();
1421 SymbolToFunctionMap[Symbol] = &ParentBF;
1422 WriteSymbolMapLock.unlock();
1423 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1424 }
1425 ChildBF.getSymbols().clear();
1426
1427 // Move other names the child function is known under.
1428 llvm::move(Range&: ChildBF.Aliases, Out: std::back_inserter(x&: ParentBF.Aliases));
1429 ChildBF.Aliases.clear();
1430
1431 if (HasRelocations) {
1432 // Merge execution counts of ChildBF into those of ParentBF.
1433 // Without relocations, we cannot reliably merge profiles as both functions
1434 // continue to exist and either one can be executed.
1435 ChildBF.mergeProfileDataInto(BF&: ParentBF);
1436
1437 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1438 std::defer_lock);
1439 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1440 std::defer_lock);
1441 // Remove ChildBF from the global set of functions in relocs mode.
1442 ReadBfsLock.lock();
1443 auto FI = BinaryFunctions.find(x: ChildBF.getAddress());
1444 ReadBfsLock.unlock();
1445
1446 assert(FI != BinaryFunctions.end() && "function not found");
1447 assert(&ChildBF == &FI->second && "function mismatch");
1448
1449 WriteBfsLock.lock();
1450 ChildBF.clearDisasmState();
1451 FI = BinaryFunctions.erase(position: FI);
1452 WriteBfsLock.unlock();
1453
1454 } else {
1455 // In non-relocation mode we keep the function, but rename it.
1456 std::string NewName = "__ICF_" + ChildName.str();
1457
1458 WriteCtxLock.lock();
1459 ChildBF.getSymbols().push_back(Elt: Ctx->getOrCreateSymbol(Name: NewName));
1460 WriteCtxLock.unlock();
1461
1462 ChildBF.setFolded(&ParentBF);
1463 }
1464
1465 ParentBF.setHasFunctionsFoldedInto();
1466}
1467
1468void BinaryContext::fixBinaryDataHoles() {
1469 assert(validateObjectNesting() && "object nesting inconsistency detected");
1470
1471 for (BinarySection &Section : allocatableSections()) {
1472 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1473
1474 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1475 BinaryData *BD = Itr->second;
1476 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1477 (BD->getName().starts_with(Prefix: "SYMBOLat0x") ||
1478 BD->getName().starts_with(Prefix: "DATAat0x") ||
1479 BD->getName().starts_with(Prefix: "ANONYMOUS")));
1480 return !isHole && BD->getSection() == Section && !BD->getParent();
1481 };
1482
1483 auto BDStart = BinaryDataMap.begin();
1484 auto BDEnd = BinaryDataMap.end();
1485 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1486 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1487
1488 uint64_t EndAddress = Section.getAddress();
1489
1490 while (Itr != End) {
1491 if (Itr->second->getAddress() > EndAddress) {
1492 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1493 Holes.emplace_back(args&: EndAddress, args&: Gap);
1494 }
1495 EndAddress = Itr->second->getEndAddress();
1496 ++Itr;
1497 }
1498
1499 if (EndAddress < Section.getEndAddress())
1500 Holes.emplace_back(args&: EndAddress, args: Section.getEndAddress() - EndAddress);
1501
1502 // If there is already a symbol at the start of the hole, grow that symbol
1503 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1504 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1505 BinaryData *BD = getBinaryDataAtAddress(Address: Hole.first);
1506 if (BD) {
1507 // BD->getSection() can be != Section if there are sections that
1508 // overlap. In this case it is probably safe to just skip the holes
1509 // since the overlapping section will not(?) have any symbols in it.
1510 if (BD->getSection() == Section)
1511 setBinaryDataSize(Address: Hole.first, Size: Hole.second);
1512 } else {
1513 getOrCreateGlobalSymbol(Address: Hole.first, Prefix: "HOLEat", Size: Hole.second, Alignment: 1);
1514 }
1515 }
1516 }
1517
1518 assert(validateObjectNesting() && "object nesting inconsistency detected");
1519 assert(validateHoles() && "top level hole detected in object map");
1520}
1521
1522void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1523 const BinarySection *CurrentSection = nullptr;
1524 bool FirstSection = true;
1525
1526 for (auto &Entry : BinaryDataMap) {
1527 const BinaryData *BD = Entry.second;
1528 const BinarySection &Section = BD->getSection();
1529 if (FirstSection || Section != *CurrentSection) {
1530 uint64_t Address, Size;
1531 StringRef Name = Section.getName();
1532 if (Section) {
1533 Address = Section.getAddress();
1534 Size = Section.getSize();
1535 } else {
1536 Address = BD->getAddress();
1537 Size = BD->getSize();
1538 }
1539 OS << "BOLT-INFO: Section " << Name << ", "
1540 << "0x" + Twine::utohexstr(Val: Address) << ":"
1541 << "0x" + Twine::utohexstr(Val: Address + Size) << "/" << Size << "\n";
1542 CurrentSection = &Section;
1543 FirstSection = false;
1544 }
1545
1546 OS << "BOLT-INFO: ";
1547 const BinaryData *P = BD->getParent();
1548 while (P) {
1549 OS << " ";
1550 P = P->getParent();
1551 }
1552 OS << *BD << "\n";
1553 }
1554}
1555
1556Expected<unsigned> BinaryContext::getDwarfFile(
1557 StringRef Directory, StringRef FileName, unsigned FileNumber,
1558 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1559 unsigned CUID, unsigned DWARFVersion) {
1560 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1561 return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion: DWARFVersion,
1562 FileNumber);
1563}
1564
1565unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1566 const uint32_t SrcCUID,
1567 unsigned FileIndex) {
1568 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(Offset: SrcCUID);
1569 const DWARFDebugLine::LineTable *LineTable =
1570 DwCtx->getLineTableForUnit(U: SrcUnit);
1571 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1572 LineTable->Prologue.FileNames;
1573 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1574 // means empty dir.
1575 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1576 "FileIndex out of range for the compilation unit.");
1577 StringRef Dir = "";
1578 if (FileNames[FileIndex - 1].DirIdx != 0) {
1579 if (std::optional<const char *> DirName = dwarf::toString(
1580 V: LineTable->Prologue
1581 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1582 Dir = *DirName;
1583 }
1584 }
1585 StringRef FileName = "";
1586 if (std::optional<const char *> FName =
1587 dwarf::toString(V: FileNames[FileIndex - 1].Name))
1588 FileName = *FName;
1589 assert(FileName != "");
1590 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(Offset: DestCUID);
1591 return cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt,
1592 CUID: DestCUID, DWARFVersion: DstUnit->getVersion()));
1593}
1594
1595std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1596 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1597 llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions),
1598 d_first: SortedFunctions.begin(),
1599 F: [](BinaryFunction &BF) { return &BF; });
1600
1601 llvm::stable_sort(Range&: SortedFunctions, C: compareBinaryFunctionByIndex);
1602 return SortedFunctions;
1603}
1604
1605std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1606 std::vector<BinaryFunction *> AllFunctions;
1607 AllFunctions.reserve(n: BinaryFunctions.size() + InjectedBinaryFunctions.size());
1608 llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions),
1609 d_first: std::back_inserter(x&: AllFunctions),
1610 F: [](BinaryFunction &BF) { return &BF; });
1611 llvm::copy(Range&: InjectedBinaryFunctions, Out: std::back_inserter(x&: AllFunctions));
1612
1613 return AllFunctions;
1614}
1615
1616std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1617 auto Iter = DWOCUs.find(x: DWOId);
1618 if (Iter == DWOCUs.end())
1619 return std::nullopt;
1620
1621 return Iter->second;
1622}
1623
1624DWARFContext *BinaryContext::getDWOContext() const {
1625 if (DWOCUs.empty())
1626 return nullptr;
1627 return &DWOCUs.begin()->second->getContext();
1628}
1629
1630/// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1631void BinaryContext::preprocessDWODebugInfo() {
1632 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1633 DWARFUnit *const DwarfUnit = CU.get();
1634 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1635 std::string DWOName = dwarf::toString(
1636 V: DwarfUnit->getUnitDIE().find(
1637 Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1638 Default: "");
1639 SmallString<16> AbsolutePath;
1640 if (!opts::CompDirOverride.empty()) {
1641 sys::path::append(path&: AbsolutePath, a: opts::CompDirOverride);
1642 sys::path::append(path&: AbsolutePath, a: DWOName);
1643 }
1644 DWARFUnit *DWOCU =
1645 DwarfUnit->getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false, DWOAlternativeLocation: AbsolutePath).getDwarfUnit();
1646 if (!DWOCU->isDWOUnit()) {
1647 this->outs()
1648 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1649 << DWOName
1650 << " was not retrieved and won't be updated. Please check "
1651 "relative path.\n";
1652 continue;
1653 }
1654 DWOCUs[*DWOId] = DWOCU;
1655 }
1656 }
1657 if (!DWOCUs.empty())
1658 this->outs() << "BOLT-INFO: processing split DWARF\n";
1659}
1660
1661void BinaryContext::preprocessDebugInfo() {
1662 struct CURange {
1663 uint64_t LowPC;
1664 uint64_t HighPC;
1665 DWARFUnit *Unit;
1666
1667 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1668 };
1669
1670 // Building a map of address ranges to CUs similar to .debug_aranges and use
1671 // it to assign CU to functions.
1672 std::vector<CURange> AllRanges;
1673 AllRanges.reserve(n: DwCtx->getNumCompileUnits());
1674 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1675 Expected<DWARFAddressRangesVector> RangesOrError =
1676 CU->getUnitDIE().getAddressRanges();
1677 if (!RangesOrError) {
1678 consumeError(Err: RangesOrError.takeError());
1679 continue;
1680 }
1681 for (DWARFAddressRange &Range : *RangesOrError) {
1682 // Parts of the debug info could be invalidated due to corresponding code
1683 // being removed from the binary by the linker. Hence we check if the
1684 // address is a valid one.
1685 if (containsAddress(Address: Range.LowPC))
1686 AllRanges.emplace_back(args: CURange{.LowPC: Range.LowPC, .HighPC: Range.HighPC, .Unit: CU.get()});
1687 }
1688
1689 ContainsDwarf5 |= CU->getVersion() >= 5;
1690 ContainsDwarfLegacy |= CU->getVersion() < 5;
1691 }
1692
1693 llvm::sort(C&: AllRanges);
1694 for (auto &KV : BinaryFunctions) {
1695 const uint64_t FunctionAddress = KV.first;
1696 BinaryFunction &Function = KV.second;
1697
1698 auto It = llvm::partition_point(
1699 Range&: AllRanges, P: [=](CURange R) { return R.HighPC <= FunctionAddress; });
1700 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1701 Function.setDWARFUnit(It->Unit);
1702 }
1703
1704 // Discover units with debug info that needs to be updated.
1705 for (const auto &KV : BinaryFunctions) {
1706 const BinaryFunction &BF = KV.second;
1707 if (shouldEmit(Function: BF) && BF.getDWARFUnit())
1708 ProcessedCUs.insert(x: BF.getDWARFUnit());
1709 }
1710
1711 // Clear debug info for functions from units that we are not going to process.
1712 for (auto &KV : BinaryFunctions) {
1713 BinaryFunction &BF = KV.second;
1714 if (BF.getDWARFUnit() && !ProcessedCUs.count(x: BF.getDWARFUnit()))
1715 BF.setDWARFUnit(nullptr);
1716 }
1717
1718 if (opts::Verbosity >= 1) {
1719 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1720 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1721 }
1722
1723 preprocessDWODebugInfo();
1724
1725 // Populate MCContext with DWARF files from all units.
1726 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1727 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1728 const uint64_t CUID = CU->getOffset();
1729 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1730 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1731 Name: GlobalPrefix + "line_table_start" + Twine(CUID)));
1732
1733 if (!ProcessedCUs.count(x: CU.get()))
1734 continue;
1735
1736 const DWARFDebugLine::LineTable *LineTable =
1737 DwCtx->getLineTableForUnit(U: CU.get());
1738 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1739 LineTable->Prologue.FileNames;
1740
1741 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1742 if (DwarfVersion >= 5) {
1743 std::optional<MD5::MD5Result> Checksum;
1744 if (LineTable->Prologue.ContentTypes.HasMD5)
1745 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1746 std::optional<const char *> Name =
1747 dwarf::toString(V: CU->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr);
1748 if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1749 auto Iter = DWOCUs.find(x: *DWOID);
1750 if (Iter == DWOCUs.end()) {
1751 this->errs() << "BOLT-ERROR: DWO CU was not found for " << Name
1752 << '\n';
1753 exit(status: 1);
1754 }
1755 Name = dwarf::toString(
1756 V: Iter->second->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr);
1757 }
1758 BinaryLineTable.setRootFile(Directory: CU->getCompilationDir(), FileName: *Name, Checksum,
1759 Source: std::nullopt);
1760 }
1761
1762 BinaryLineTable.setDwarfVersion(DwarfVersion);
1763
1764 // Assign a unique label to every line table, one per CU.
1765 // Make sure empty debug line tables are registered too.
1766 if (FileNames.empty()) {
1767 cantFail(ValOrErr: getDwarfFile(Directory: "", FileName: "<unknown>", FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt,
1768 CUID, DWARFVersion: DwarfVersion));
1769 continue;
1770 }
1771 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1772 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1773 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1774 // means empty dir.
1775 StringRef Dir = "";
1776 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1777 if (std::optional<const char *> DirName = dwarf::toString(
1778 V: LineTable->Prologue
1779 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1780 Dir = *DirName;
1781 StringRef FileName = "";
1782 if (std::optional<const char *> FName =
1783 dwarf::toString(V: FileNames[I].Name))
1784 FileName = *FName;
1785 assert(FileName != "");
1786 std::optional<MD5::MD5Result> Checksum;
1787 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1788 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1789 cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum, Source: std::nullopt, CUID,
1790 DWARFVersion: DwarfVersion));
1791 }
1792 }
1793}
1794
1795bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1796 if (Function.isPseudo())
1797 return false;
1798
1799 if (opts::processAllFunctions())
1800 return true;
1801
1802 if (Function.isIgnored())
1803 return false;
1804
1805 // In relocation mode we will emit non-simple functions with CFG.
1806 // If the function does not have a CFG it should be marked as ignored.
1807 return HasRelocations || Function.isSimple();
1808}
1809
1810void BinaryContext::dump(const MCInst &Inst) const {
1811 if (LLVM_UNLIKELY(!InstPrinter)) {
1812 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1813 return;
1814 }
1815 InstPrinter->printInst(MI: &Inst, Address: 0, Annot: "", STI: *STI, OS&: dbgs());
1816 dbgs() << "\n";
1817}
1818
1819void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1820 uint32_t Operation = Inst.getOperation();
1821 switch (Operation) {
1822 case MCCFIInstruction::OpSameValue:
1823 OS << "OpSameValue Reg" << Inst.getRegister();
1824 break;
1825 case MCCFIInstruction::OpRememberState:
1826 OS << "OpRememberState";
1827 break;
1828 case MCCFIInstruction::OpRestoreState:
1829 OS << "OpRestoreState";
1830 break;
1831 case MCCFIInstruction::OpOffset:
1832 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1833 break;
1834 case MCCFIInstruction::OpDefCfaRegister:
1835 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1836 break;
1837 case MCCFIInstruction::OpDefCfaOffset:
1838 OS << "OpDefCfaOffset " << Inst.getOffset();
1839 break;
1840 case MCCFIInstruction::OpDefCfa:
1841 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1842 break;
1843 case MCCFIInstruction::OpRelOffset:
1844 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1845 break;
1846 case MCCFIInstruction::OpAdjustCfaOffset:
1847 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1848 break;
1849 case MCCFIInstruction::OpEscape:
1850 OS << "OpEscape";
1851 break;
1852 case MCCFIInstruction::OpRestore:
1853 OS << "OpRestore Reg" << Inst.getRegister();
1854 break;
1855 case MCCFIInstruction::OpUndefined:
1856 OS << "OpUndefined Reg" << Inst.getRegister();
1857 break;
1858 case MCCFIInstruction::OpRegister:
1859 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1860 << Inst.getRegister2();
1861 break;
1862 case MCCFIInstruction::OpWindowSave:
1863 OS << "OpWindowSave";
1864 break;
1865 case MCCFIInstruction::OpGnuArgsSize:
1866 OS << "OpGnuArgsSize";
1867 break;
1868 default:
1869 OS << "Op#" << Operation;
1870 break;
1871 }
1872}
1873
1874MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1875 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1876 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1877 // the end of a data chunk inside code, $d identifies start of data.
1878 if (isX86() || ELFSymbolRef(Symbol).getSize())
1879 return MarkerSymType::NONE;
1880
1881 Expected<StringRef> NameOrError = Symbol.getName();
1882 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1883
1884 if (!TypeOrError || !NameOrError)
1885 return MarkerSymType::NONE;
1886
1887 if (*TypeOrError != SymbolRef::ST_Unknown)
1888 return MarkerSymType::NONE;
1889
1890 if (*NameOrError == "$x" || NameOrError->starts_with(Prefix: "$x."))
1891 return MarkerSymType::CODE;
1892
1893 // $x<ISA>
1894 if (isRISCV() && NameOrError->starts_with(Prefix: "$x"))
1895 return MarkerSymType::CODE;
1896
1897 if (*NameOrError == "$d" || NameOrError->starts_with(Prefix: "$d."))
1898 return MarkerSymType::DATA;
1899
1900 return MarkerSymType::NONE;
1901}
1902
1903bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1904 return getMarkerType(Symbol) != MarkerSymType::NONE;
1905}
1906
1907static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1908 const BinaryFunction *Function,
1909 DWARFContext *DwCtx) {
1910 DebugLineTableRowRef RowRef =
1911 DebugLineTableRowRef::fromSMLoc(Loc: Instruction.getLoc());
1912 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1913 return;
1914
1915 const DWARFDebugLine::LineTable *LineTable;
1916 if (Function && Function->getDWARFUnit() &&
1917 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1918 LineTable = Function->getDWARFLineTable();
1919 } else {
1920 LineTable = DwCtx->getLineTableForUnit(
1921 U: DwCtx->getCompileUnitForOffset(Offset: RowRef.DwCompileUnitIndex));
1922 }
1923 assert(LineTable && "line table expected for instruction with debug info");
1924
1925 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1926 StringRef FileName = "";
1927 if (std::optional<const char *> FName =
1928 dwarf::toString(V: LineTable->Prologue.FileNames[Row.File - 1].Name))
1929 FileName = *FName;
1930 OS << " # debug line " << FileName << ":" << Row.Line;
1931 if (Row.Column)
1932 OS << ":" << Row.Column;
1933 if (Row.Discriminator)
1934 OS << " discriminator:" << Row.Discriminator;
1935}
1936
1937ArrayRef<uint8_t> BinaryContext::extractData(uint64_t Address,
1938 uint64_t Size) const {
1939 ArrayRef<uint8_t> Res;
1940
1941 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
1942 if (!Section || Section->isVirtual())
1943 return Res;
1944
1945 if (!Section->containsRange(Address, Size))
1946 return Res;
1947
1948 auto *Bytes =
1949 reinterpret_cast<const uint8_t *>(Section->getContents().data());
1950 return ArrayRef<uint8_t>(Bytes + Address - Section->getAddress(), Size);
1951}
1952
1953void BinaryContext::printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
1954 uint64_t Offset) const {
1955 DataExtractor DE(Data, AsmInfo->isLittleEndian(),
1956 AsmInfo->getCodePointerSize());
1957 uint64_t DataOffset = 0;
1958 while (DataOffset + 4 <= Data.size()) {
1959 OS << format(Fmt: " %08" PRIx64 ": \t.word\t0x", Vals: Offset + DataOffset);
1960 const auto Word = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 4);
1961 OS << Twine::utohexstr(Val: Word) << '\n';
1962 }
1963 if (DataOffset + 2 <= Data.size()) {
1964 OS << format(Fmt: " %08" PRIx64 ": \t.short\t0x", Vals: Offset + DataOffset);
1965 const auto Short = DE.getUnsigned(offset_ptr: &DataOffset, byte_size: 2);
1966 OS << Twine::utohexstr(Val: Short) << '\n';
1967 }
1968 if (DataOffset + 1 == Data.size()) {
1969 OS << format(Fmt: " %08" PRIx64 ": \t.byte\t0x%x\n", Vals: Offset + DataOffset,
1970 Vals: Data[DataOffset]);
1971 }
1972}
1973
1974void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1975 uint64_t Offset,
1976 const BinaryFunction *Function,
1977 bool PrintMCInst, bool PrintMemData,
1978 bool PrintRelocations,
1979 StringRef Endl) const {
1980 OS << format(Fmt: " %08" PRIx64 ": ", Vals: Offset);
1981 if (MIB->isCFI(Inst: Instruction)) {
1982 uint32_t Offset = Instruction.getOperand(i: 0).getImm();
1983 OS << "\t!CFI\t$" << Offset << "\t; ";
1984 if (Function)
1985 printCFI(OS, Inst: *Function->getCFIFor(Instr: Instruction));
1986 OS << Endl;
1987 return;
1988 }
1989 if (std::optional<uint32_t> DynamicID =
1990 MIB->getDynamicBranchID(Inst: Instruction)) {
1991 OS << "\tjit\t" << MIB->getTargetSymbol(Inst: Instruction)->getName()
1992 << " # ID: " << DynamicID;
1993 } else {
1994 // If there are annotations on the instruction, the MCInstPrinter will fail
1995 // to print the preferred alias as it only does so when the number of
1996 // operands is as expected. See
1997 // https://github.com/llvm/llvm-project/blob/782f1a0d895646c364a53f9dcdd6d4ec1f3e5ea0/llvm/lib/MC/MCInstPrinter.cpp#L142
1998 // Therefore, create a temporary copy of the Inst from which the annotations
1999 // are removed, and print that Inst.
2000 MCInst InstNoAnnot = Instruction;
2001 MIB->stripAnnotations(Inst&: InstNoAnnot);
2002 InstPrinter->printInst(MI: &InstNoAnnot, Address: 0, Annot: "", STI: *STI, OS);
2003 }
2004 if (MIB->isCall(Inst: Instruction)) {
2005 if (MIB->isTailCall(Inst: Instruction))
2006 OS << " # TAILCALL ";
2007 if (MIB->isInvoke(Inst: Instruction)) {
2008 const std::optional<MCPlus::MCLandingPad> EHInfo =
2009 MIB->getEHInfo(Inst: Instruction);
2010 OS << " # handler: ";
2011 if (EHInfo->first)
2012 OS << *EHInfo->first;
2013 else
2014 OS << '0';
2015 OS << "; action: " << EHInfo->second;
2016 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Inst: Instruction);
2017 if (GnuArgsSize >= 0)
2018 OS << "; GNU_args_size = " << GnuArgsSize;
2019 }
2020 } else if (MIB->isIndirectBranch(Inst: Instruction)) {
2021 if (uint64_t JTAddress = MIB->getJumpTable(Inst: Instruction)) {
2022 OS << " # JUMPTABLE @0x" << Twine::utohexstr(Val: JTAddress);
2023 } else {
2024 OS << " # UNKNOWN CONTROL FLOW";
2025 }
2026 }
2027 if (std::optional<uint32_t> Offset = MIB->getOffset(Inst: Instruction))
2028 OS << " # Offset: " << *Offset;
2029 if (std::optional<uint32_t> Size = MIB->getSize(Inst: Instruction))
2030 OS << " # Size: " << *Size;
2031 if (MCSymbol *Label = MIB->getInstLabel(Inst: Instruction))
2032 OS << " # Label: " << *Label;
2033
2034 MIB->printAnnotations(Inst: Instruction, OS);
2035
2036 if (opts::PrintDebugInfo)
2037 printDebugInfo(OS, Instruction, Function, DwCtx: DwCtx.get());
2038
2039 if ((opts::PrintRelocations || PrintRelocations) && Function) {
2040 const uint64_t Size = computeCodeSize(Beg: &Instruction, End: &Instruction + 1);
2041 Function->printRelocations(OS, Offset, Size);
2042 }
2043
2044 OS << Endl;
2045
2046 if (PrintMCInst) {
2047 Instruction.dump_pretty(OS, Printer: InstPrinter.get());
2048 OS << Endl;
2049 }
2050}
2051
2052std::optional<uint64_t>
2053BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2054 uint64_t FileOffset) const {
2055 // Find a segment with a matching file offset.
2056 for (auto &KV : SegmentMapInfo) {
2057 const SegmentInfo &SegInfo = KV.second;
2058 // Only consider executable segments.
2059 if (!SegInfo.IsExecutable)
2060 continue;
2061 // FileOffset is got from perf event,
2062 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2063 // If the pagesize is not equal to SegInfo.Alignment.
2064 // FileOffset and SegInfo.FileOffset should be aligned first,
2065 // and then judge whether they are equal.
2066 if (alignDown(Value: SegInfo.FileOffset, Align: SegInfo.Alignment) ==
2067 alignDown(Value: FileOffset, Align: SegInfo.Alignment)) {
2068 // The function's offset from base address in VAS is aligned by pagesize
2069 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2070 // However, The ELF document says that SegInfo.FileOffset should equal
2071 // to SegInfo.Address, modulo the pagesize.
2072 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2073
2074 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2075 // alignDown(SegInfo.Address, pagesize)
2076 // = SegInfo.Address - (SegInfo.Address % pagesize)
2077 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2078 // = SegInfo.Address - SegInfo.FileOffset +
2079 // alignDown(SegInfo.FileOffset, pagesize)
2080 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2081 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2082 }
2083 }
2084
2085 return std::nullopt;
2086}
2087
2088ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2089 auto SI = AddressToSection.upper_bound(x: Address);
2090 if (SI != AddressToSection.begin()) {
2091 --SI;
2092 uint64_t UpperBound = SI->first + SI->second->getSize();
2093 if (!SI->second->getSize())
2094 UpperBound += 1;
2095 if (UpperBound > Address)
2096 return *SI->second;
2097 }
2098 return std::make_error_code(e: std::errc::bad_address);
2099}
2100
2101ErrorOr<StringRef>
2102BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2103 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2104 return Section->getName();
2105 return std::make_error_code(e: std::errc::bad_address);
2106}
2107
2108BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2109 auto Res = Sections.insert(x: Section);
2110 (void)Res;
2111 assert(Res.second && "can't register the same section twice.");
2112
2113 // Only register allocatable sections in the AddressToSection map.
2114 if (Section->isAllocatable() && Section->getAddress())
2115 AddressToSection.insert(x: std::make_pair(x: Section->getAddress(), y&: Section));
2116 NameToSection.insert(
2117 x: std::make_pair(x: std::string(Section->getName()), y&: Section));
2118 if (Section->hasSectionRef())
2119 SectionRefToBinarySection.insert(
2120 KV: std::make_pair(x: Section->getSectionRef(), y&: Section));
2121
2122 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2123 return *Section;
2124}
2125
2126BinarySection &BinaryContext::registerSection(SectionRef Section) {
2127 return registerSection(Section: new BinarySection(*this, Section));
2128}
2129
2130BinarySection &
2131BinaryContext::registerSection(const Twine &SectionName,
2132 const BinarySection &OriginalSection) {
2133 return registerSection(
2134 Section: new BinarySection(*this, SectionName, OriginalSection));
2135}
2136
2137BinarySection &
2138BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2139 unsigned ELFFlags, uint8_t *Data,
2140 uint64_t Size, unsigned Alignment) {
2141 auto NamedSections = getSectionByName(Name);
2142 if (NamedSections.begin() != NamedSections.end()) {
2143 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2144 "can only update unique sections");
2145 BinarySection *Section = NamedSections.begin()->second;
2146
2147 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2148 const bool Flag = Section->isAllocatable();
2149 (void)Flag;
2150 Section->update(NewData: Data, NewSize: Size, NewAlignment: Alignment, NewELFType: ELFType, NewELFFlags: ELFFlags);
2151 LLVM_DEBUG(dbgs() << *Section << "\n");
2152 // FIXME: Fix section flags/attributes for MachO.
2153 if (isELF())
2154 assert(Flag == Section->isAllocatable() &&
2155 "can't change section allocation status");
2156 return *Section;
2157 }
2158
2159 return registerSection(
2160 Section: new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2161}
2162
2163void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2164 auto NameRange = NameToSection.equal_range(x: Section.getName().str());
2165 while (NameRange.first != NameRange.second) {
2166 if (NameRange.first->second == &Section) {
2167 NameToSection.erase(position: NameRange.first);
2168 break;
2169 }
2170 ++NameRange.first;
2171 }
2172}
2173
2174void BinaryContext::deregisterUnusedSections() {
2175 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName(SectionName: "<absolute>");
2176 for (auto SI = Sections.begin(); SI != Sections.end();) {
2177 BinarySection *Section = *SI;
2178 // We check getOutputData() instead of getOutputSize() because sometimes
2179 // zero-sized .text.cold sections are allocated.
2180 if (Section->hasSectionRef() || Section->getOutputData() ||
2181 (AbsSection && Section == &AbsSection.get())) {
2182 ++SI;
2183 continue;
2184 }
2185
2186 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2187 << '\n';);
2188 deregisterSectionName(Section: *Section);
2189 SI = Sections.erase(position: SI);
2190 delete Section;
2191 }
2192}
2193
2194bool BinaryContext::deregisterSection(BinarySection &Section) {
2195 BinarySection *SectionPtr = &Section;
2196 auto Itr = Sections.find(x: SectionPtr);
2197 if (Itr != Sections.end()) {
2198 auto Range = AddressToSection.equal_range(x: SectionPtr->getAddress());
2199 while (Range.first != Range.second) {
2200 if (Range.first->second == SectionPtr) {
2201 AddressToSection.erase(position: Range.first);
2202 break;
2203 }
2204 ++Range.first;
2205 }
2206
2207 deregisterSectionName(Section: *SectionPtr);
2208 Sections.erase(position: Itr);
2209 delete SectionPtr;
2210 return true;
2211 }
2212 return false;
2213}
2214
2215void BinaryContext::renameSection(BinarySection &Section,
2216 const Twine &NewName) {
2217 auto Itr = Sections.find(x: &Section);
2218 assert(Itr != Sections.end() && "Section must exist to be renamed.");
2219 Sections.erase(position: Itr);
2220
2221 deregisterSectionName(Section);
2222
2223 Section.Name = NewName.str();
2224 Section.setOutputName(Section.Name);
2225
2226 NameToSection.insert(x: std::make_pair(x&: Section.Name, y: &Section));
2227
2228 // Reinsert with the new name.
2229 Sections.insert(x: &Section);
2230}
2231
2232void BinaryContext::printSections(raw_ostream &OS) const {
2233 for (BinarySection *const &Section : Sections)
2234 OS << "BOLT-INFO: " << *Section << "\n";
2235}
2236
2237BinarySection &BinaryContext::absoluteSection() {
2238 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName(SectionName: "<absolute>"))
2239 return *Section;
2240 return registerOrUpdateSection(Name: "<absolute>", ELFType: ELF::SHT_NULL, ELFFlags: 0u);
2241}
2242
2243ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2244 size_t Size) const {
2245 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2246 if (!Section)
2247 return std::make_error_code(e: std::errc::bad_address);
2248
2249 if (Section->isVirtual())
2250 return 0;
2251
2252 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2253 AsmInfo->getCodePointerSize());
2254 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2255 return DE.getUnsigned(offset_ptr: &ValueOffset, byte_size: Size);
2256}
2257
2258ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2259 size_t Size) const {
2260 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2261 if (!Section)
2262 return std::make_error_code(e: std::errc::bad_address);
2263
2264 if (Section->isVirtual())
2265 return 0;
2266
2267 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2268 AsmInfo->getCodePointerSize());
2269 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2270 return DE.getSigned(offset_ptr: &ValueOffset, size: Size);
2271}
2272
2273void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2274 uint32_t Type, uint64_t Addend,
2275 uint64_t Value) {
2276 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2277 assert(Section && "cannot find section for address");
2278 Section->addRelocation(Offset: Address - Section->getAddress(), Symbol, Type, Addend,
2279 Value);
2280}
2281
2282void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2283 uint32_t Type, uint64_t Addend,
2284 uint64_t Value) {
2285 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2286 assert(Section && "cannot find section for address");
2287 Section->addDynamicRelocation(Offset: Address - Section->getAddress(), Symbol, Type,
2288 Addend, Value);
2289}
2290
2291bool BinaryContext::removeRelocationAt(uint64_t Address) {
2292 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2293 assert(Section && "cannot find section for address");
2294 return Section->removeRelocationAt(Offset: Address - Section->getAddress());
2295}
2296
2297const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2298 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2299 if (!Section)
2300 return nullptr;
2301
2302 return Section->getRelocationAt(Offset: Address - Section->getAddress());
2303}
2304
2305const Relocation *
2306BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2307 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2308 if (!Section)
2309 return nullptr;
2310
2311 return Section->getDynamicRelocationAt(Offset: Address - Section->getAddress());
2312}
2313
2314void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2315 const uint64_t Address) {
2316 auto setImmovable = [&](BinaryData &BD) {
2317 BinaryData *Root = BD.getAtomicRoot();
2318 LLVM_DEBUG(if (Root->isMoveable()) {
2319 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2320 << "due to ambiguous relocation referencing 0x"
2321 << Twine::utohexstr(Address) << '\n';
2322 });
2323 Root->setIsMoveable(false);
2324 };
2325
2326 if (Address == BD.getAddress()) {
2327 setImmovable(BD);
2328
2329 // Set previous symbol as immovable
2330 BinaryData *Prev = getBinaryDataContainingAddress(Address: Address - 1);
2331 if (Prev && Prev->getEndAddress() == BD.getAddress())
2332 setImmovable(*Prev);
2333 }
2334
2335 if (Address == BD.getEndAddress()) {
2336 setImmovable(BD);
2337
2338 // Set next symbol as immovable
2339 BinaryData *Next = getBinaryDataContainingAddress(Address: BD.getEndAddress());
2340 if (Next && Next->getAddress() == BD.getEndAddress())
2341 setImmovable(*Next);
2342 }
2343}
2344
2345BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2346 uint64_t *EntryDesc) {
2347 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2348 auto BFI = SymbolToFunctionMap.find(x: Symbol);
2349 if (BFI == SymbolToFunctionMap.end())
2350 return nullptr;
2351
2352 BinaryFunction *BF = BFI->second;
2353 if (EntryDesc)
2354 *EntryDesc = BF->getEntryIDForSymbol(EntrySymbol: Symbol);
2355
2356 return BF;
2357}
2358
2359std::string
2360BinaryContext::generateBugReportMessage(StringRef Message,
2361 const BinaryFunction &Function) const {
2362 std::string Msg;
2363 raw_string_ostream SS(Msg);
2364 SS << "=======================================\n";
2365 SS << "BOLT is unable to proceed because it couldn't properly understand "
2366 "this function.\n";
2367 SS << "If you are running the most recent version of BOLT, you may "
2368 "want to "
2369 "report this and paste this dump.\nPlease check that there is no "
2370 "sensitive contents being shared in this dump.\n";
2371 SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2372 ScopedPrinter SP(SS);
2373 SP.printBinaryBlock(Label: "Function contents", Value: *Function.getData());
2374 SS << "\n";
2375 const_cast<BinaryFunction &>(Function).print(OS&: SS, Annotation: "");
2376 SS << "ERROR: " << Message;
2377 SS << "\n=======================================\n";
2378 return Msg;
2379}
2380
2381BinaryFunction *
2382BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2383 bool IsSimple) {
2384 InjectedBinaryFunctions.push_back(x: new BinaryFunction(Name, *this, IsSimple));
2385 BinaryFunction *BF = InjectedBinaryFunctions.back();
2386 setSymbolToFunctionMap(Sym: BF->getSymbol(), BF);
2387 BF->CurrentState = BinaryFunction::State::CFG;
2388 return BF;
2389}
2390
2391BinaryFunction *
2392BinaryContext::createInstructionPatch(uint64_t Address,
2393 const InstructionListType &Instructions,
2394 const Twine &Name) {
2395 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2396 assert(Section && "cannot get section for patching");
2397 assert(Section->hasSectionRef() && Section->isText() &&
2398 "can only patch input file code sections");
2399
2400 const uint64_t FileOffset =
2401 Section->getInputFileOffset() + Address - Section->getAddress();
2402
2403 std::string PatchName = Name.str();
2404 if (PatchName.empty()) {
2405 // Assign unique name to the patch.
2406 static uint64_t N = 0;
2407 PatchName = "__BP_" + std::to_string(val: N++);
2408 }
2409
2410 BinaryFunction *PBF = createInjectedBinaryFunction(Name: PatchName);
2411 PBF->setOutputAddress(Address);
2412 PBF->setFileOffset(FileOffset);
2413 PBF->setOriginSection(&Section.get());
2414 PBF->addBasicBlock()->addInstructions(R: Instructions);
2415 PBF->setIsPatch(true);
2416
2417 // Don't create symbol table entry if the name wasn't specified.
2418 if (Name.str().empty())
2419 PBF->setAnonymous(true);
2420
2421 return PBF;
2422}
2423
2424std::pair<size_t, size_t>
2425BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2426 // Use the original size for non-simple functions.
2427 if (!BF.isSimple() || BF.isIgnored())
2428 return std::make_pair(x: BF.getSize(), y: 0);
2429
2430 // Adjust branch instruction to match the current layout.
2431 if (FixBranches)
2432 BF.fixBranches();
2433
2434 // Create local MC context to isolate the effect of ephemeral code emission.
2435 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2436 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2437 MCAsmBackend *MAB =
2438 TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions());
2439
2440 SmallString<256> Code;
2441 raw_svector_ostream VecOS(Code);
2442
2443 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS&: VecOS);
2444 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2445 T: *TheTriple, Ctx&: *LocalCtx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW),
2446 Emitter: std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), STI: *STI));
2447
2448 Streamer->initSections(NoExecStack: false, STI: *STI);
2449
2450 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2451 Section->setHasInstructions(true);
2452
2453 // Create symbols in the LocalCtx so that they get destroyed with it.
2454 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2455 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2456
2457 Streamer->switchSection(Section);
2458 Streamer->emitLabel(Symbol: StartLabel);
2459 emitFunctionBody(Streamer&: *Streamer, BF, FF&: BF.getLayout().getMainFragment(),
2460 /*EmitCodeOnly=*/true);
2461 Streamer->emitLabel(Symbol: EndLabel);
2462
2463 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2464 SmallVector<LabelRange> SplitLabels;
2465 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2466 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2467 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2468 SplitLabels.emplace_back(Args: SplitStartLabel, Args: SplitEndLabel);
2469
2470 MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2471 Section: BF.getCodeSectionName(Fragment: FF.getFragmentNum()), Type: ELF::SHT_PROGBITS,
2472 Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2473 SplitSection->setHasInstructions(true);
2474 Streamer->switchSection(Section: SplitSection);
2475
2476 Streamer->emitLabel(Symbol: SplitStartLabel);
2477 emitFunctionBody(Streamer&: *Streamer, BF, FF, /*EmitCodeOnly=*/true);
2478 Streamer->emitLabel(Symbol: SplitEndLabel);
2479 }
2480
2481 MCAssembler &Assembler =
2482 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2483 Assembler.layout();
2484
2485 // Obtain fragment sizes.
2486 std::vector<uint64_t> FragmentSizes;
2487 // Main fragment size.
2488 const uint64_t HotSize = Assembler.getSymbolOffset(S: *EndLabel) -
2489 Assembler.getSymbolOffset(S: *StartLabel);
2490 FragmentSizes.push_back(x: HotSize);
2491 // Split fragment sizes.
2492 uint64_t ColdSize = 0;
2493 for (const auto &Labels : SplitLabels) {
2494 uint64_t Size = Assembler.getSymbolOffset(S: *Labels.second) -
2495 Assembler.getSymbolOffset(S: *Labels.first);
2496 FragmentSizes.push_back(x: Size);
2497 ColdSize += Size;
2498 }
2499
2500 // Populate new start and end offsets of each basic block.
2501 uint64_t FragmentIndex = 0;
2502 for (FunctionFragment &FF : BF.getLayout().fragments()) {
2503 BinaryBasicBlock *PrevBB = nullptr;
2504 for (BinaryBasicBlock *BB : FF) {
2505 const uint64_t BBStartOffset =
2506 Assembler.getSymbolOffset(S: *(BB->getLabel()));
2507 BB->setOutputStartAddress(BBStartOffset);
2508 if (PrevBB)
2509 PrevBB->setOutputEndAddress(BBStartOffset);
2510 PrevBB = BB;
2511 }
2512 if (PrevBB)
2513 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2514 FragmentIndex++;
2515 }
2516
2517 // Clean-up the effect of the code emission.
2518 for (const MCSymbol &Symbol : Assembler.symbols()) {
2519 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2520 MutableSymbol->setUndefined();
2521 MutableSymbol->setIsRegistered(false);
2522 }
2523
2524 return std::make_pair(x: HotSize, y&: ColdSize);
2525}
2526
2527bool BinaryContext::validateInstructionEncoding(
2528 ArrayRef<uint8_t> InputSequence) const {
2529 MCInst Inst;
2530 uint64_t InstSize;
2531 DisAsm->getInstruction(Instr&: Inst, Size&: InstSize, Bytes: InputSequence, Address: 0, CStream&: nulls());
2532 assert(InstSize == InputSequence.size() &&
2533 "Disassembled instruction size does not match the sequence.");
2534
2535 SmallString<256> Code;
2536 SmallVector<MCFixup, 4> Fixups;
2537
2538 MCE->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI);
2539 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2540 if (InputSequence != OutputSequence) {
2541 if (opts::Verbosity > 1) {
2542 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2543 << " input: " << InputSequence << '\n'
2544 << " output: " << OutputSequence << '\n';
2545 }
2546 return false;
2547 }
2548
2549 return true;
2550}
2551
2552uint64_t BinaryContext::getHotThreshold() const {
2553 static uint64_t Threshold = 0;
2554 if (Threshold == 0) {
2555 Threshold = std::max(
2556 a: (uint64_t)opts::ExecutionCountThreshold,
2557 b: NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2558 }
2559 return Threshold;
2560}
2561
2562BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2563 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2564 auto FI = BinaryFunctions.upper_bound(x: Address);
2565 if (FI == BinaryFunctions.begin())
2566 return nullptr;
2567 --FI;
2568
2569 const uint64_t UsedSize =
2570 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2571
2572 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2573 return nullptr;
2574
2575 return &FI->second;
2576}
2577
2578BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2579 // First, try to find a function starting at the given address. If the
2580 // function was folded, this will get us the original folded function if it
2581 // wasn't removed from the list, e.g. in non-relocation mode.
2582 auto BFI = BinaryFunctions.find(x: Address);
2583 if (BFI != BinaryFunctions.end())
2584 return &BFI->second;
2585
2586 // We might have folded the function matching the object at the given
2587 // address. In such case, we look for a function matching the symbol
2588 // registered at the original address. The new function (the one that the
2589 // original was folded into) will hold the symbol.
2590 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2591 uint64_t EntryID = 0;
2592 BinaryFunction *BF = getFunctionForSymbol(Symbol: BD->getSymbol(), EntryDesc: &EntryID);
2593 if (BF && EntryID == 0)
2594 return BF;
2595 }
2596 return nullptr;
2597}
2598
2599/// Deregister JumpTable registered at a given \p Address and delete it.
2600void BinaryContext::deleteJumpTable(uint64_t Address) {
2601 assert(JumpTables.count(Address) && "Must have a jump table at address");
2602 JumpTable *JT = JumpTables.at(k: Address);
2603 for (BinaryFunction *Parent : JT->Parents)
2604 Parent->JumpTables.erase(x: Address);
2605 JumpTables.erase(x: Address);
2606 delete JT;
2607}
2608
2609DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2610 const DWARFAddressRangesVector &InputRanges) const {
2611 DebugAddressRangesVector OutputRanges;
2612
2613 for (const DWARFAddressRange Range : InputRanges) {
2614 auto BFI = BinaryFunctions.lower_bound(x: Range.LowPC);
2615 while (BFI != BinaryFunctions.end()) {
2616 const BinaryFunction &Function = BFI->second;
2617 if (Function.getAddress() >= Range.HighPC)
2618 break;
2619 const DebugAddressRangesVector FunctionRanges =
2620 Function.getOutputAddressRanges();
2621 llvm::move(Range: FunctionRanges, Out: std::back_inserter(x&: OutputRanges));
2622 std::advance(i&: BFI, n: 1);
2623 }
2624 }
2625
2626 return OutputRanges;
2627}
2628
2629} // namespace bolt
2630} // namespace llvm
2631

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Core/BinaryContext.cpp