1//===- bolt/Core/BinaryContext.cpp - Low-level context --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the BinaryContext class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Core/BinaryContext.h"
14#include "bolt/Core/BinaryEmitter.h"
15#include "bolt/Core/BinaryFunction.h"
16#include "bolt/Utils/CommandLineOpts.h"
17#include "bolt/Utils/Utils.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/Twine.h"
20#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
21#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
22#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23#include "llvm/MC/MCAsmLayout.h"
24#include "llvm/MC/MCAssembler.h"
25#include "llvm/MC/MCContext.h"
26#include "llvm/MC/MCDisassembler/MCDisassembler.h"
27#include "llvm/MC/MCInstPrinter.h"
28#include "llvm/MC/MCObjectStreamer.h"
29#include "llvm/MC/MCObjectWriter.h"
30#include "llvm/MC/MCRegisterInfo.h"
31#include "llvm/MC/MCSectionELF.h"
32#include "llvm/MC/MCStreamer.h"
33#include "llvm/MC/MCSubtargetInfo.h"
34#include "llvm/MC/MCSymbol.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Error.h"
37#include "llvm/Support/Regex.h"
38#include <algorithm>
39#include <functional>
40#include <iterator>
41#include <unordered_set>
42
43using namespace llvm;
44
45#undef DEBUG_TYPE
46#define DEBUG_TYPE "bolt"
47
48namespace opts {
49
50cl::opt<bool> NoHugePages("no-huge-pages",
51 cl::desc("use regular size pages for code alignment"),
52 cl::Hidden, cl::cat(BoltCategory));
53
54static cl::opt<bool>
55PrintDebugInfo("print-debug-info",
56 cl::desc("print debug info when printing functions"),
57 cl::Hidden,
58 cl::ZeroOrMore,
59 cl::cat(BoltCategory));
60
61cl::opt<bool> PrintRelocations(
62 "print-relocations",
63 cl::desc("print relocations when printing functions/objects"), cl::Hidden,
64 cl::cat(BoltCategory));
65
66static cl::opt<bool>
67PrintMemData("print-mem-data",
68 cl::desc("print memory data annotations when printing functions"),
69 cl::Hidden,
70 cl::ZeroOrMore,
71 cl::cat(BoltCategory));
72
73cl::opt<std::string> CompDirOverride(
74 "comp-dir-override",
75 cl::desc("overrides DW_AT_comp_dir, and provides an alterantive base "
76 "location, which is used with DW_AT_dwo_name to construct a path "
77 "to *.dwo files."),
78 cl::Hidden, cl::init(Val: ""), cl::cat(BoltCategory));
79} // namespace opts
80
81namespace llvm {
82namespace bolt {
83
84char BOLTError::ID = 0;
85
86BOLTError::BOLTError(bool IsFatal, const Twine &S)
87 : IsFatal(IsFatal), Msg(S.str()) {}
88
89void BOLTError::log(raw_ostream &OS) const {
90 if (IsFatal)
91 OS << "FATAL ";
92 StringRef ErrMsg = StringRef(Msg);
93 // Prepend our error prefix if it is missing
94 if (ErrMsg.empty()) {
95 OS << "BOLT-ERROR\n";
96 } else {
97 if (!ErrMsg.starts_with(Prefix: "BOLT-ERROR"))
98 OS << "BOLT-ERROR: ";
99 OS << ErrMsg << "\n";
100 }
101}
102
103std::error_code BOLTError::convertToErrorCode() const {
104 return inconvertibleErrorCode();
105}
106
107Error createNonFatalBOLTError(const Twine &S) {
108 return make_error<BOLTError>(/*IsFatal*/ Args: false, Args: S);
109}
110
111Error createFatalBOLTError(const Twine &S) {
112 return make_error<BOLTError>(/*IsFatal*/ Args: true, Args: S);
113}
114
115void BinaryContext::logBOLTErrorsAndQuitOnFatal(Error E) {
116 handleAllErrors(E: Error(std::move(E)), Handlers: [&](const BOLTError &E) {
117 if (!E.getMessage().empty())
118 E.log(OS&: this->errs());
119 if (E.isFatal())
120 exit(status: 1);
121 });
122}
123
124BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
125 std::unique_ptr<DWARFContext> DwCtx,
126 std::unique_ptr<Triple> TheTriple,
127 const Target *TheTarget, std::string TripleName,
128 std::unique_ptr<MCCodeEmitter> MCE,
129 std::unique_ptr<MCObjectFileInfo> MOFI,
130 std::unique_ptr<const MCAsmInfo> AsmInfo,
131 std::unique_ptr<const MCInstrInfo> MII,
132 std::unique_ptr<const MCSubtargetInfo> STI,
133 std::unique_ptr<MCInstPrinter> InstPrinter,
134 std::unique_ptr<const MCInstrAnalysis> MIA,
135 std::unique_ptr<MCPlusBuilder> MIB,
136 std::unique_ptr<const MCRegisterInfo> MRI,
137 std::unique_ptr<MCDisassembler> DisAsm,
138 JournalingStreams Logger)
139 : Ctx(std::move(Ctx)), DwCtx(std::move(DwCtx)),
140 TheTriple(std::move(TheTriple)), TheTarget(TheTarget),
141 TripleName(TripleName), MCE(std::move(MCE)), MOFI(std::move(MOFI)),
142 AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
143 InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
144 MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
145 Logger(Logger) {
146 Relocation::Arch = this->TheTriple->getArch();
147 RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
148 PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
149}
150
151BinaryContext::~BinaryContext() {
152 for (BinarySection *Section : Sections)
153 delete Section;
154 for (BinaryFunction *InjectedFunction : InjectedBinaryFunctions)
155 delete InjectedFunction;
156 for (std::pair<const uint64_t, JumpTable *> JTI : JumpTables)
157 delete JTI.second;
158 clearBinaryData();
159}
160
161/// Create BinaryContext for a given architecture \p ArchName and
162/// triple \p TripleName.
163Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
164 Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
165 bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
166 StringRef ArchName = "";
167 std::string FeaturesStr = "";
168 switch (TheTriple.getArch()) {
169 case llvm::Triple::x86_64:
170 if (Features)
171 return createFatalBOLTError(
172 S: "x86_64 target does not use SubtargetFeatures");
173 ArchName = "x86-64";
174 FeaturesStr = "+nopl";
175 break;
176 case llvm::Triple::aarch64:
177 if (Features)
178 return createFatalBOLTError(
179 S: "AArch64 target does not use SubtargetFeatures");
180 ArchName = "aarch64";
181 FeaturesStr = "+all";
182 break;
183 case llvm::Triple::riscv64: {
184 ArchName = "riscv64";
185 if (!Features)
186 return createFatalBOLTError(S: "RISCV target needs SubtargetFeatures");
187 // We rely on relaxation for some transformations (e.g., promoting all calls
188 // to PseudoCALL and then making JITLink relax them). Since the relax
189 // feature is not stored in the object file, we manually enable it.
190 Features->AddFeature(String: "relax");
191 FeaturesStr = Features->getString();
192 break;
193 }
194 default:
195 return createStringError(EC: std::errc::not_supported,
196 Fmt: "BOLT-ERROR: Unrecognized machine in ELF file");
197 }
198
199 const std::string TripleName = TheTriple.str();
200
201 std::string Error;
202 const Target *TheTarget =
203 TargetRegistry::lookupTarget(ArchName: std::string(ArchName), TheTriple, Error);
204 if (!TheTarget)
205 return createStringError(EC: make_error_code(e: std::errc::not_supported),
206 S: Twine("BOLT-ERROR: ", Error));
207
208 std::unique_ptr<const MCRegisterInfo> MRI(
209 TheTarget->createMCRegInfo(TT: TripleName));
210 if (!MRI)
211 return createStringError(
212 EC: make_error_code(e: std::errc::not_supported),
213 S: Twine("BOLT-ERROR: no register info for target ", TripleName));
214
215 // Set up disassembler.
216 std::unique_ptr<MCAsmInfo> AsmInfo(
217 TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCTargetOptions()));
218 if (!AsmInfo)
219 return createStringError(
220 EC: make_error_code(e: std::errc::not_supported),
221 S: Twine("BOLT-ERROR: no assembly info for target ", TripleName));
222 // BOLT creates "func@PLT" symbols for PLT entries. In function assembly dump
223 // we want to emit such names as using @PLT without double quotes to convey
224 // variant kind to the assembler. BOLT doesn't rely on the linker so we can
225 // override the default AsmInfo behavior to emit names the way we want.
226 AsmInfo->setAllowAtInName(true);
227
228 std::unique_ptr<const MCSubtargetInfo> STI(
229 TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: "", Features: FeaturesStr));
230 if (!STI)
231 return createStringError(
232 EC: make_error_code(e: std::errc::not_supported),
233 S: Twine("BOLT-ERROR: no subtarget info for target ", TripleName));
234
235 std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
236 if (!MII)
237 return createStringError(
238 EC: make_error_code(e: std::errc::not_supported),
239 S: Twine("BOLT-ERROR: no instruction info for target ", TripleName));
240
241 std::unique_ptr<MCContext> Ctx(
242 new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
243 std::unique_ptr<MCObjectFileInfo> MOFI(
244 TheTarget->createMCObjectFileInfo(Ctx&: *Ctx, PIC: IsPIC));
245 Ctx->setObjectFileInfo(MOFI.get());
246 // We do not support X86 Large code model. Change this in the future.
247 bool Large = false;
248 if (TheTriple.getArch() == llvm::Triple::aarch64)
249 Large = true;
250 unsigned LSDAEncoding =
251 Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
252 if (IsPIC) {
253 LSDAEncoding = dwarf::DW_EH_PE_pcrel |
254 (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
255 }
256
257 std::unique_ptr<MCDisassembler> DisAsm(
258 TheTarget->createMCDisassembler(STI: *STI, Ctx&: *Ctx));
259
260 if (!DisAsm)
261 return createStringError(
262 EC: make_error_code(e: std::errc::not_supported),
263 S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
264
265 std::unique_ptr<const MCInstrAnalysis> MIA(
266 TheTarget->createMCInstrAnalysis(Info: MII.get()));
267 if (!MIA)
268 return createStringError(
269 EC: make_error_code(e: std::errc::not_supported),
270 S: Twine("BOLT-ERROR: failed to create instruction analysis for target ",
271 TripleName));
272
273 int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
274 std::unique_ptr<MCInstPrinter> InstructionPrinter(
275 TheTarget->createMCInstPrinter(T: TheTriple, SyntaxVariant: AsmPrinterVariant, MAI: *AsmInfo,
276 MII: *MII, MRI: *MRI));
277 if (!InstructionPrinter)
278 return createStringError(
279 EC: make_error_code(e: std::errc::not_supported),
280 S: Twine("BOLT-ERROR: no instruction printer for target ", TripleName));
281 InstructionPrinter->setPrintImmHex(true);
282
283 std::unique_ptr<MCCodeEmitter> MCE(
284 TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx));
285
286 auto BC = std::make_unique<BinaryContext>(
287 args: std::move(Ctx), args: std::move(DwCtx), args: std::make_unique<Triple>(args&: TheTriple),
288 args&: TheTarget, args: std::string(TripleName), args: std::move(MCE), args: std::move(MOFI),
289 args: std::move(AsmInfo), args: std::move(MII), args: std::move(STI),
290 args: std::move(InstructionPrinter), args: std::move(MIA), args: nullptr, args: std::move(MRI),
291 args: std::move(DisAsm), args&: Logger);
292
293 BC->LSDAEncoding = LSDAEncoding;
294
295 BC->MAB = std::unique_ptr<MCAsmBackend>(
296 BC->TheTarget->createMCAsmBackend(STI: *BC->STI, MRI: *BC->MRI, Options: MCTargetOptions()));
297
298 BC->setFilename(InputFileName);
299
300 BC->HasFixedLoadAddress = !IsPIC;
301
302 BC->SymbolicDisAsm = std::unique_ptr<MCDisassembler>(
303 BC->TheTarget->createMCDisassembler(STI: *BC->STI, Ctx&: *BC->Ctx));
304
305 if (!BC->SymbolicDisAsm)
306 return createStringError(
307 EC: make_error_code(e: std::errc::not_supported),
308 S: Twine("BOLT-ERROR: no disassembler info for target ", TripleName));
309
310 return std::move(BC);
311}
312
313bool BinaryContext::forceSymbolRelocations(StringRef SymbolName) const {
314 if (opts::HotText &&
315 (SymbolName == "__hot_start" || SymbolName == "__hot_end"))
316 return true;
317
318 if (opts::HotData &&
319 (SymbolName == "__hot_data_start" || SymbolName == "__hot_data_end"))
320 return true;
321
322 if (SymbolName == "_end")
323 return true;
324
325 return false;
326}
327
328std::unique_ptr<MCObjectWriter>
329BinaryContext::createObjectWriter(raw_pwrite_stream &OS) {
330 return MAB->createObjectWriter(OS);
331}
332
333bool BinaryContext::validateObjectNesting() const {
334 auto Itr = BinaryDataMap.begin();
335 auto End = BinaryDataMap.end();
336 bool Valid = true;
337 while (Itr != End) {
338 auto Next = std::next(x: Itr);
339 while (Next != End &&
340 Itr->second->getSection() == Next->second->getSection() &&
341 Itr->second->containsRange(Address: Next->second->getAddress(),
342 Size: Next->second->getSize())) {
343 if (Next->second->Parent != Itr->second) {
344 this->errs() << "BOLT-WARNING: object nesting incorrect for:\n"
345 << "BOLT-WARNING: " << *Itr->second << "\n"
346 << "BOLT-WARNING: " << *Next->second << "\n";
347 Valid = false;
348 }
349 ++Next;
350 }
351 Itr = Next;
352 }
353 return Valid;
354}
355
356bool BinaryContext::validateHoles() const {
357 bool Valid = true;
358 for (BinarySection &Section : sections()) {
359 for (const Relocation &Rel : Section.relocations()) {
360 uint64_t RelAddr = Rel.Offset + Section.getAddress();
361 const BinaryData *BD = getBinaryDataContainingAddress(Address: RelAddr);
362 if (!BD) {
363 this->errs()
364 << "BOLT-WARNING: no BinaryData found for relocation at address"
365 << " 0x" << Twine::utohexstr(Val: RelAddr) << " in " << Section.getName()
366 << "\n";
367 Valid = false;
368 } else if (!BD->getAtomicRoot()) {
369 this->errs()
370 << "BOLT-WARNING: no atomic BinaryData found for relocation at "
371 << "address 0x" << Twine::utohexstr(Val: RelAddr) << " in "
372 << Section.getName() << "\n";
373 Valid = false;
374 }
375 }
376 }
377 return Valid;
378}
379
380void BinaryContext::updateObjectNesting(BinaryDataMapType::iterator GAI) {
381 const uint64_t Address = GAI->second->getAddress();
382 const uint64_t Size = GAI->second->getSize();
383
384 auto fixParents = [&](BinaryDataMapType::iterator Itr,
385 BinaryData *NewParent) {
386 BinaryData *OldParent = Itr->second->Parent;
387 Itr->second->Parent = NewParent;
388 ++Itr;
389 while (Itr != BinaryDataMap.end() && OldParent &&
390 Itr->second->Parent == OldParent) {
391 Itr->second->Parent = NewParent;
392 ++Itr;
393 }
394 };
395
396 // Check if the previous symbol contains the newly added symbol.
397 if (GAI != BinaryDataMap.begin()) {
398 BinaryData *Prev = std::prev(x: GAI)->second;
399 while (Prev) {
400 if (Prev->getSection() == GAI->second->getSection() &&
401 Prev->containsRange(Address, Size)) {
402 fixParents(GAI, Prev);
403 } else {
404 fixParents(GAI, nullptr);
405 }
406 Prev = Prev->Parent;
407 }
408 }
409
410 // Check if the newly added symbol contains any subsequent symbols.
411 if (Size != 0) {
412 BinaryData *BD = GAI->second->Parent ? GAI->second->Parent : GAI->second;
413 auto Itr = std::next(x: GAI);
414 while (
415 Itr != BinaryDataMap.end() &&
416 BD->containsRange(Address: Itr->second->getAddress(), Size: Itr->second->getSize())) {
417 Itr->second->Parent = BD;
418 ++Itr;
419 }
420 }
421}
422
423iterator_range<BinaryContext::binary_data_iterator>
424BinaryContext::getSubBinaryData(BinaryData *BD) {
425 auto Start = std::next(x: BinaryDataMap.find(x: BD->getAddress()));
426 auto End = Start;
427 while (End != BinaryDataMap.end() && BD->isAncestorOf(BD: End->second))
428 ++End;
429 return make_range(x: Start, y: End);
430}
431
432std::pair<const MCSymbol *, uint64_t>
433BinaryContext::handleAddressRef(uint64_t Address, BinaryFunction &BF,
434 bool IsPCRel) {
435 if (isAArch64()) {
436 // Check if this is an access to a constant island and create bookkeeping
437 // to keep track of it and emit it later as part of this function.
438 if (MCSymbol *IslandSym = BF.getOrCreateIslandAccess(Address))
439 return std::make_pair(x&: IslandSym, y: 0);
440
441 // Detect custom code written in assembly that refers to arbitrary
442 // constant islands from other functions. Write this reference so we
443 // can pull this constant island and emit it as part of this function
444 // too.
445 auto IslandIter = AddressToConstantIslandMap.lower_bound(x: Address);
446
447 if (IslandIter != AddressToConstantIslandMap.begin() &&
448 (IslandIter == AddressToConstantIslandMap.end() ||
449 IslandIter->first > Address))
450 --IslandIter;
451
452 if (IslandIter != AddressToConstantIslandMap.end()) {
453 // Fall-back to referencing the original constant island in the presence
454 // of dynamic relocs, as we currently do not support cloning them.
455 // Notice: we might fail to link because of this, if the original constant
456 // island we are referring would be emitted too far away.
457 if (IslandIter->second->hasDynamicRelocationAtIsland()) {
458 MCSymbol *IslandSym =
459 IslandIter->second->getOrCreateIslandAccess(Address);
460 if (IslandSym)
461 return std::make_pair(x&: IslandSym, y: 0);
462 } else if (MCSymbol *IslandSym =
463 IslandIter->second->getOrCreateProxyIslandAccess(Address,
464 Referrer&: BF)) {
465 BF.createIslandDependency(Island: IslandSym, BF: IslandIter->second);
466 return std::make_pair(x&: IslandSym, y: 0);
467 }
468 }
469 }
470
471 // Note that the address does not necessarily have to reside inside
472 // a section, it could be an absolute address too.
473 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
474 if (Section && Section->isText()) {
475 if (BF.containsAddress(PC: Address, /*UseMaxSize=*/isAArch64())) {
476 if (Address != BF.getAddress()) {
477 // The address could potentially escape. Mark it as another entry
478 // point into the function.
479 if (opts::Verbosity >= 1) {
480 this->outs() << "BOLT-INFO: potentially escaped address 0x"
481 << Twine::utohexstr(Val: Address) << " in function " << BF
482 << '\n';
483 }
484 BF.HasInternalLabelReference = true;
485 return std::make_pair(
486 x: BF.addEntryPointAtOffset(Offset: Address - BF.getAddress()), y: 0);
487 }
488 } else {
489 addInterproceduralReference(Function: &BF, Address);
490 }
491 }
492
493 // With relocations, catch jump table references outside of the basic block
494 // containing the indirect jump.
495 if (HasRelocations) {
496 const MemoryContentsType MemType = analyzeMemoryAt(Address, BF);
497 if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE && IsPCRel) {
498 const MCSymbol *Symbol =
499 getOrCreateJumpTable(Function&: BF, Address, Type: JumpTable::JTT_PIC);
500
501 return std::make_pair(x&: Symbol, y: 0);
502 }
503 }
504
505 if (BinaryData *BD = getBinaryDataContainingAddress(Address))
506 return std::make_pair(x: BD->getSymbol(), y: Address - BD->getAddress());
507
508 // TODO: use DWARF info to get size/alignment here?
509 MCSymbol *TargetSymbol = getOrCreateGlobalSymbol(Address, Prefix: "DATAat");
510 LLVM_DEBUG(dbgs() << "Created symbol " << TargetSymbol->getName() << '\n');
511 return std::make_pair(x&: TargetSymbol, y: 0);
512}
513
514MemoryContentsType BinaryContext::analyzeMemoryAt(uint64_t Address,
515 BinaryFunction &BF) {
516 if (!isX86())
517 return MemoryContentsType::UNKNOWN;
518
519 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
520 if (!Section) {
521 // No section - possibly an absolute address. Since we don't allow
522 // internal function addresses to escape the function scope - we
523 // consider it a tail call.
524 if (opts::Verbosity > 1) {
525 this->errs() << "BOLT-WARNING: no section for address 0x"
526 << Twine::utohexstr(Val: Address) << " referenced from function "
527 << BF << '\n';
528 }
529 return MemoryContentsType::UNKNOWN;
530 }
531
532 if (Section->isVirtual()) {
533 // The contents are filled at runtime.
534 return MemoryContentsType::UNKNOWN;
535 }
536
537 // No support for jump tables in code yet.
538 if (Section->isText())
539 return MemoryContentsType::UNKNOWN;
540
541 // Start with checking for PIC jump table. We expect non-PIC jump tables
542 // to have high 32 bits set to 0.
543 if (analyzeJumpTable(Address, Type: JumpTable::JTT_PIC, BF))
544 return MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
545
546 if (analyzeJumpTable(Address, Type: JumpTable::JTT_NORMAL, BF))
547 return MemoryContentsType::POSSIBLE_JUMP_TABLE;
548
549 return MemoryContentsType::UNKNOWN;
550}
551
552bool BinaryContext::analyzeJumpTable(const uint64_t Address,
553 const JumpTable::JumpTableType Type,
554 const BinaryFunction &BF,
555 const uint64_t NextJTAddress,
556 JumpTable::AddressesType *EntriesAsAddress,
557 bool *HasEntryInFragment) const {
558 // Target address of __builtin_unreachable.
559 const uint64_t UnreachableAddress = BF.getAddress() + BF.getSize();
560
561 // Is one of the targets __builtin_unreachable?
562 bool HasUnreachable = false;
563
564 // Does one of the entries match function start address?
565 bool HasStartAsEntry = false;
566
567 // Number of targets other than __builtin_unreachable.
568 uint64_t NumRealEntries = 0;
569
570 // Size of the jump table without trailing __builtin_unreachable entries.
571 size_t TrimmedSize = 0;
572
573 auto addEntryAddress = [&](uint64_t EntryAddress, bool Unreachable = false) {
574 if (!EntriesAsAddress)
575 return;
576 EntriesAsAddress->emplace_back(args&: EntryAddress);
577 if (!Unreachable)
578 TrimmedSize = EntriesAsAddress->size();
579 };
580
581 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
582 if (!Section)
583 return false;
584
585 // The upper bound is defined by containing object, section limits, and
586 // the next jump table in memory.
587 uint64_t UpperBound = Section->getEndAddress();
588 const BinaryData *JumpTableBD = getBinaryDataAtAddress(Address);
589 if (JumpTableBD && JumpTableBD->getSize()) {
590 assert(JumpTableBD->getEndAddress() <= UpperBound &&
591 "data object cannot cross a section boundary");
592 UpperBound = JumpTableBD->getEndAddress();
593 }
594 if (NextJTAddress)
595 UpperBound = std::min(a: NextJTAddress, b: UpperBound);
596
597 LLVM_DEBUG({
598 using JTT = JumpTable::JumpTableType;
599 dbgs() << formatv("BOLT-DEBUG: analyzeJumpTable @{0:x} in {1}, JTT={2}\n",
600 Address, BF.getPrintName(),
601 Type == JTT::JTT_PIC ? "PIC" : "Normal");
602 });
603 const uint64_t EntrySize = getJumpTableEntrySize(Type);
604 for (uint64_t EntryAddress = Address; EntryAddress <= UpperBound - EntrySize;
605 EntryAddress += EntrySize) {
606 LLVM_DEBUG(dbgs() << " * Checking 0x" << Twine::utohexstr(EntryAddress)
607 << " -> ");
608 // Check if there's a proper relocation against the jump table entry.
609 if (HasRelocations) {
610 if (Type == JumpTable::JTT_PIC &&
611 !DataPCRelocations.count(x: EntryAddress)) {
612 LLVM_DEBUG(
613 dbgs() << "FAIL: JTT_PIC table, no relocation for this address\n");
614 break;
615 }
616 if (Type == JumpTable::JTT_NORMAL && !getRelocationAt(Address: EntryAddress)) {
617 LLVM_DEBUG(
618 dbgs()
619 << "FAIL: JTT_NORMAL table, no relocation for this address\n");
620 break;
621 }
622 }
623
624 const uint64_t Value =
625 (Type == JumpTable::JTT_PIC)
626 ? Address + *getSignedValueAtAddress(Address: EntryAddress, Size: EntrySize)
627 : *getPointerAtAddress(Address: EntryAddress);
628
629 // __builtin_unreachable() case.
630 if (Value == UnreachableAddress) {
631 addEntryAddress(Value, /*Unreachable*/ true);
632 HasUnreachable = true;
633 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} __builtin_unreachable\n", Value));
634 continue;
635 }
636
637 // Function start is another special case. It is allowed in the jump table,
638 // but we need at least one another regular entry to distinguish the table
639 // from, e.g. a function pointer array.
640 if (Value == BF.getAddress()) {
641 HasStartAsEntry = true;
642 addEntryAddress(Value);
643 continue;
644 }
645
646 // Function or one of its fragments.
647 const BinaryFunction *TargetBF = getBinaryFunctionContainingAddress(Address: Value);
648 const bool DoesBelongToFunction =
649 BF.containsAddress(PC: Value) ||
650 (TargetBF && TargetBF->isParentOrChildOf(Other: BF));
651 if (!DoesBelongToFunction) {
652 LLVM_DEBUG({
653 if (!BF.containsAddress(Value)) {
654 dbgs() << "FAIL: function doesn't contain this address\n";
655 if (TargetBF) {
656 dbgs() << " ! function containing this address: "
657 << TargetBF->getPrintName() << '\n';
658 if (TargetBF->isFragment()) {
659 dbgs() << " ! is a fragment";
660 for (BinaryFunction *Parent : TargetBF->ParentFragments)
661 dbgs() << ", parent: " << Parent->getPrintName();
662 dbgs() << '\n';
663 }
664 }
665 }
666 });
667 break;
668 }
669
670 // Check there's an instruction at this offset.
671 if (TargetBF->getState() == BinaryFunction::State::Disassembled &&
672 !TargetBF->getInstructionAtOffset(Offset: Value - TargetBF->getAddress())) {
673 LLVM_DEBUG(dbgs() << formatv("FAIL: no instruction at {0:x}\n", Value));
674 break;
675 }
676
677 ++NumRealEntries;
678 LLVM_DEBUG(dbgs() << formatv("OK: {0:x} real entry\n", Value));
679
680 if (TargetBF != &BF && HasEntryInFragment)
681 *HasEntryInFragment = true;
682 addEntryAddress(Value);
683 }
684
685 // Trim direct/normal jump table to exclude trailing unreachable entries that
686 // can collide with a function address.
687 if (Type == JumpTable::JTT_NORMAL && EntriesAsAddress &&
688 TrimmedSize != EntriesAsAddress->size() &&
689 getBinaryFunctionAtAddress(Address: UnreachableAddress))
690 EntriesAsAddress->resize(new_size: TrimmedSize);
691
692 // It's a jump table if the number of real entries is more than 1, or there's
693 // one real entry and one or more special targets. If there are only multiple
694 // special targets, then it's not a jump table.
695 return NumRealEntries + (HasUnreachable || HasStartAsEntry) >= 2;
696}
697
698void BinaryContext::populateJumpTables() {
699 LLVM_DEBUG(dbgs() << "DataPCRelocations: " << DataPCRelocations.size()
700 << '\n');
701 for (auto JTI = JumpTables.begin(), JTE = JumpTables.end(); JTI != JTE;
702 ++JTI) {
703 JumpTable *JT = JTI->second;
704
705 bool NonSimpleParent = false;
706 for (BinaryFunction *BF : JT->Parents)
707 NonSimpleParent |= !BF->isSimple();
708 if (NonSimpleParent)
709 continue;
710
711 uint64_t NextJTAddress = 0;
712 auto NextJTI = std::next(x: JTI);
713 if (NextJTI != JTE)
714 NextJTAddress = NextJTI->second->getAddress();
715
716 const bool Success =
717 analyzeJumpTable(Address: JT->getAddress(), Type: JT->Type, BF: *(JT->Parents[0]),
718 NextJTAddress, EntriesAsAddress: &JT->EntriesAsAddress, HasEntryInFragment: &JT->IsSplit);
719 if (!Success) {
720 LLVM_DEBUG({
721 dbgs() << "failed to analyze ";
722 JT->print(dbgs());
723 if (NextJTI != JTE) {
724 dbgs() << "next ";
725 NextJTI->second->print(dbgs());
726 }
727 });
728 llvm_unreachable("jump table heuristic failure");
729 }
730 for (BinaryFunction *Frag : JT->Parents) {
731 if (JT->IsSplit)
732 Frag->setHasIndirectTargetToSplitFragment(true);
733 for (uint64_t EntryAddress : JT->EntriesAsAddress)
734 // if target is builtin_unreachable
735 if (EntryAddress == Frag->getAddress() + Frag->getSize()) {
736 Frag->IgnoredBranches.emplace_back(Args: EntryAddress - Frag->getAddress(),
737 Args: Frag->getSize());
738 } else if (EntryAddress >= Frag->getAddress() &&
739 EntryAddress < Frag->getAddress() + Frag->getSize()) {
740 Frag->registerReferencedOffset(Offset: EntryAddress - Frag->getAddress());
741 }
742 }
743
744 // In strict mode, erase PC-relative relocation record. Later we check that
745 // all such records are erased and thus have been accounted for.
746 if (opts::StrictMode && JT->Type == JumpTable::JTT_PIC) {
747 for (uint64_t Address = JT->getAddress();
748 Address < JT->getAddress() + JT->getSize();
749 Address += JT->EntrySize) {
750 DataPCRelocations.erase(position: DataPCRelocations.find(x: Address));
751 }
752 }
753
754 // Mark to skip the function and all its fragments.
755 for (BinaryFunction *Frag : JT->Parents)
756 if (Frag->hasIndirectTargetToSplitFragment())
757 addFragmentsToSkip(Function: Frag);
758 }
759
760 if (opts::StrictMode && DataPCRelocations.size()) {
761 LLVM_DEBUG({
762 dbgs() << DataPCRelocations.size()
763 << " unclaimed PC-relative relocations left in data:\n";
764 for (uint64_t Reloc : DataPCRelocations)
765 dbgs() << Twine::utohexstr(Reloc) << '\n';
766 });
767 assert(0 && "unclaimed PC-relative relocations left in data\n");
768 }
769 clearList(List&: DataPCRelocations);
770}
771
772void BinaryContext::skipMarkedFragments() {
773 std::vector<BinaryFunction *> FragmentQueue;
774 // Copy the functions to FragmentQueue.
775 FragmentQueue.assign(first: FragmentsToSkip.begin(), last: FragmentsToSkip.end());
776 auto addToWorklist = [&](BinaryFunction *Function) -> void {
777 if (FragmentsToSkip.count(x: Function))
778 return;
779 FragmentQueue.push_back(x: Function);
780 addFragmentsToSkip(Function);
781 };
782 // Functions containing split jump tables need to be skipped with all
783 // fragments (transitively).
784 for (size_t I = 0; I != FragmentQueue.size(); I++) {
785 BinaryFunction *BF = FragmentQueue[I];
786 assert(FragmentsToSkip.count(BF) &&
787 "internal error in traversing function fragments");
788 if (opts::Verbosity >= 1)
789 this->errs() << "BOLT-WARNING: Ignoring " << BF->getPrintName() << '\n';
790 BF->setSimple(false);
791 BF->setHasIndirectTargetToSplitFragment(true);
792
793 llvm::for_each(Range&: BF->Fragments, F: addToWorklist);
794 llvm::for_each(Range&: BF->ParentFragments, F: addToWorklist);
795 }
796 if (!FragmentsToSkip.empty())
797 this->errs() << "BOLT-WARNING: skipped " << FragmentsToSkip.size()
798 << " function" << (FragmentsToSkip.size() == 1 ? "" : "s")
799 << " due to cold fragments\n";
800}
801
802MCSymbol *BinaryContext::getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
803 uint64_t Size,
804 uint16_t Alignment,
805 unsigned Flags) {
806 auto Itr = BinaryDataMap.find(x: Address);
807 if (Itr != BinaryDataMap.end()) {
808 assert(Itr->second->getSize() == Size || !Size);
809 return Itr->second->getSymbol();
810 }
811
812 std::string Name = (Prefix + "0x" + Twine::utohexstr(Val: Address)).str();
813 assert(!GlobalSymbols.count(Name) && "created name is not unique");
814 return registerNameAtAddress(Name, Address, Size, Alignment, Flags);
815}
816
817MCSymbol *BinaryContext::getOrCreateUndefinedGlobalSymbol(StringRef Name) {
818 return Ctx->getOrCreateSymbol(Name);
819}
820
821BinaryFunction *BinaryContext::createBinaryFunction(
822 const std::string &Name, BinarySection &Section, uint64_t Address,
823 uint64_t Size, uint64_t SymbolSize, uint16_t Alignment) {
824 auto Result = BinaryFunctions.emplace(
825 args&: Address, args: BinaryFunction(Name, Section, Address, Size, *this));
826 assert(Result.second == true && "unexpected duplicate function");
827 BinaryFunction *BF = &Result.first->second;
828 registerNameAtAddress(Name, Address, Size: SymbolSize ? SymbolSize : Size,
829 Alignment);
830 setSymbolToFunctionMap(Sym: BF->getSymbol(), BF);
831 return BF;
832}
833
834const MCSymbol *
835BinaryContext::getOrCreateJumpTable(BinaryFunction &Function, uint64_t Address,
836 JumpTable::JumpTableType Type) {
837 // Two fragments of same function access same jump table
838 if (JumpTable *JT = getJumpTableContainingAddress(Address)) {
839 assert(JT->Type == Type && "jump table types have to match");
840 assert(Address == JT->getAddress() && "unexpected non-empty jump table");
841
842 // Prevent associating a jump table to a specific fragment twice.
843 // This simple check arises from the assumption: no more than 2 fragments.
844 if (JT->Parents.size() == 1 && JT->Parents[0] != &Function) {
845 assert(JT->Parents[0]->isParentOrChildOf(Function) &&
846 "cannot re-use jump table of a different function");
847 // Duplicate the entry for the parent function for easy access
848 JT->Parents.push_back(Elt: &Function);
849 if (opts::Verbosity > 2) {
850 this->outs() << "BOLT-INFO: Multiple fragments access same jump table: "
851 << JT->Parents[0]->getPrintName() << "; "
852 << Function.getPrintName() << "\n";
853 JT->print(OS&: this->outs());
854 }
855 Function.JumpTables.emplace(args&: Address, args&: JT);
856 JT->Parents[0]->setHasIndirectTargetToSplitFragment(true);
857 JT->Parents[1]->setHasIndirectTargetToSplitFragment(true);
858 }
859
860 bool IsJumpTableParent = false;
861 (void)IsJumpTableParent;
862 for (BinaryFunction *Frag : JT->Parents)
863 if (Frag == &Function)
864 IsJumpTableParent = true;
865 assert(IsJumpTableParent &&
866 "cannot re-use jump table of a different function");
867 return JT->getFirstLabel();
868 }
869
870 // Re-use the existing symbol if possible.
871 MCSymbol *JTLabel = nullptr;
872 if (BinaryData *Object = getBinaryDataAtAddress(Address)) {
873 if (!isInternalSymbolName(Name: Object->getSymbol()->getName()))
874 JTLabel = Object->getSymbol();
875 }
876
877 const uint64_t EntrySize = getJumpTableEntrySize(Type);
878 if (!JTLabel) {
879 const std::string JumpTableName = generateJumpTableName(BF: Function, Address);
880 JTLabel = registerNameAtAddress(Name: JumpTableName, Address, Size: 0, Alignment: EntrySize);
881 }
882
883 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating jump table " << JTLabel->getName()
884 << " in function " << Function << '\n');
885
886 JumpTable *JT = new JumpTable(*JTLabel, Address, EntrySize, Type,
887 JumpTable::LabelMapType{{0, JTLabel}},
888 *getSectionForAddress(Address));
889 JT->Parents.push_back(Elt: &Function);
890 if (opts::Verbosity > 2)
891 JT->print(OS&: this->outs());
892 JumpTables.emplace(args&: Address, args&: JT);
893
894 // Duplicate the entry for the parent function for easy access.
895 Function.JumpTables.emplace(args&: Address, args&: JT);
896 return JTLabel;
897}
898
899std::pair<uint64_t, const MCSymbol *>
900BinaryContext::duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
901 const MCSymbol *OldLabel) {
902 auto L = scopeLock();
903 unsigned Offset = 0;
904 bool Found = false;
905 for (std::pair<const unsigned, MCSymbol *> Elmt : JT->Labels) {
906 if (Elmt.second != OldLabel)
907 continue;
908 Offset = Elmt.first;
909 Found = true;
910 break;
911 }
912 assert(Found && "Label not found");
913 (void)Found;
914 MCSymbol *NewLabel = Ctx->createNamedTempSymbol(Name: "duplicatedJT");
915 JumpTable *NewJT =
916 new JumpTable(*NewLabel, JT->getAddress(), JT->EntrySize, JT->Type,
917 JumpTable::LabelMapType{{Offset, NewLabel}},
918 *getSectionForAddress(Address: JT->getAddress()));
919 NewJT->Parents = JT->Parents;
920 NewJT->Entries = JT->Entries;
921 NewJT->Counts = JT->Counts;
922 uint64_t JumpTableID = ++DuplicatedJumpTables;
923 // Invert it to differentiate from regular jump tables whose IDs are their
924 // addresses in the input binary memory space
925 JumpTableID = ~JumpTableID;
926 JumpTables.emplace(args&: JumpTableID, args&: NewJT);
927 Function.JumpTables.emplace(args&: JumpTableID, args&: NewJT);
928 return std::make_pair(x&: JumpTableID, y&: NewLabel);
929}
930
931std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
932 uint64_t Address) {
933 size_t Id;
934 uint64_t Offset = 0;
935 if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
936 Offset = Address - JT->getAddress();
937 auto Itr = JT->Labels.find(x: Offset);
938 if (Itr != JT->Labels.end())
939 return std::string(Itr->second->getName());
940 Id = JumpTableIds.at(k: JT->getAddress());
941 } else {
942 Id = JumpTableIds[Address] = BF.JumpTables.size();
943 }
944 return ("JUMP_TABLE/" + BF.getOneName().str() + "." + std::to_string(val: Id) +
945 (Offset ? ("." + std::to_string(val: Offset)) : ""));
946}
947
948bool BinaryContext::hasValidCodePadding(const BinaryFunction &BF) {
949 // FIXME: aarch64 support is missing.
950 if (!isX86())
951 return true;
952
953 if (BF.getSize() == BF.getMaxSize())
954 return true;
955
956 ErrorOr<ArrayRef<unsigned char>> FunctionData = BF.getData();
957 assert(FunctionData && "cannot get function as data");
958
959 uint64_t Offset = BF.getSize();
960 MCInst Instr;
961 uint64_t InstrSize = 0;
962 uint64_t InstrAddress = BF.getAddress() + Offset;
963 using std::placeholders::_1;
964
965 // Skip instructions that satisfy the predicate condition.
966 auto skipInstructions = [&](std::function<bool(const MCInst &)> Predicate) {
967 const uint64_t StartOffset = Offset;
968 for (; Offset < BF.getMaxSize();
969 Offset += InstrSize, InstrAddress += InstrSize) {
970 if (!DisAsm->getInstruction(Instr, Size&: InstrSize, Bytes: FunctionData->slice(N: Offset),
971 Address: InstrAddress, CStream&: nulls()))
972 break;
973 if (!Predicate(Instr))
974 break;
975 }
976
977 return Offset - StartOffset;
978 };
979
980 // Skip a sequence of zero bytes.
981 auto skipZeros = [&]() {
982 const uint64_t StartOffset = Offset;
983 for (; Offset < BF.getMaxSize(); ++Offset)
984 if ((*FunctionData)[Offset] != 0)
985 break;
986
987 return Offset - StartOffset;
988 };
989
990 // Accept the whole padding area filled with breakpoints.
991 auto isBreakpoint = std::bind(f: &MCPlusBuilder::isBreakpoint, args: MIB.get(), args: _1);
992 if (skipInstructions(isBreakpoint) && Offset == BF.getMaxSize())
993 return true;
994
995 auto isNoop = std::bind(f: &MCPlusBuilder::isNoop, args: MIB.get(), args: _1);
996
997 // Some functions have a jump to the next function or to the padding area
998 // inserted after the body.
999 auto isSkipJump = [&](const MCInst &Instr) {
1000 uint64_t TargetAddress = 0;
1001 if (MIB->isUnconditionalBranch(Inst: Instr) &&
1002 MIB->evaluateBranch(Inst: Instr, Addr: InstrAddress, Size: InstrSize, Target&: TargetAddress)) {
1003 if (TargetAddress >= InstrAddress + InstrSize &&
1004 TargetAddress <= BF.getAddress() + BF.getMaxSize()) {
1005 return true;
1006 }
1007 }
1008 return false;
1009 };
1010
1011 // Skip over nops, jumps, and zero padding. Allow interleaving (this happens).
1012 while (skipInstructions(isNoop) || skipInstructions(isSkipJump) ||
1013 skipZeros())
1014 ;
1015
1016 if (Offset == BF.getMaxSize())
1017 return true;
1018
1019 if (opts::Verbosity >= 1) {
1020 this->errs() << "BOLT-WARNING: bad padding at address 0x"
1021 << Twine::utohexstr(Val: BF.getAddress() + BF.getSize())
1022 << " starting at offset " << (Offset - BF.getSize())
1023 << " in function " << BF << '\n'
1024 << FunctionData->slice(N: BF.getSize(),
1025 M: BF.getMaxSize() - BF.getSize())
1026 << '\n';
1027 }
1028
1029 return false;
1030}
1031
1032void BinaryContext::adjustCodePadding() {
1033 for (auto &BFI : BinaryFunctions) {
1034 BinaryFunction &BF = BFI.second;
1035 if (!shouldEmit(Function: BF))
1036 continue;
1037
1038 if (!hasValidCodePadding(BF)) {
1039 if (HasRelocations) {
1040 if (opts::Verbosity >= 1) {
1041 this->outs() << "BOLT-INFO: function " << BF
1042 << " has invalid padding. Ignoring the function.\n";
1043 }
1044 BF.setIgnored();
1045 } else {
1046 BF.setMaxSize(BF.getSize());
1047 }
1048 }
1049 }
1050}
1051
1052MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
1053 uint64_t Size,
1054 uint16_t Alignment,
1055 unsigned Flags) {
1056 // Register the name with MCContext.
1057 MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
1058
1059 auto GAI = BinaryDataMap.find(x: Address);
1060 BinaryData *BD;
1061 if (GAI == BinaryDataMap.end()) {
1062 ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
1063 BinarySection &Section =
1064 SectionOrErr ? SectionOrErr.get() : absoluteSection();
1065 BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
1066 Section, Flags);
1067 GAI = BinaryDataMap.emplace(args&: Address, args&: BD).first;
1068 GlobalSymbols[Name] = BD;
1069 updateObjectNesting(GAI);
1070 } else {
1071 BD = GAI->second;
1072 if (!BD->hasName(Name)) {
1073 GlobalSymbols[Name] = BD;
1074 BD->Symbols.push_back(x: Symbol);
1075 }
1076 }
1077
1078 return Symbol;
1079}
1080
1081const BinaryData *
1082BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
1083 auto NI = BinaryDataMap.lower_bound(x: Address);
1084 auto End = BinaryDataMap.end();
1085 if ((NI != End && Address == NI->first) ||
1086 ((NI != BinaryDataMap.begin()) && (NI-- != BinaryDataMap.begin()))) {
1087 if (NI->second->containsAddress(Address))
1088 return NI->second;
1089
1090 // If this is a sub-symbol, see if a parent data contains the address.
1091 const BinaryData *BD = NI->second->getParent();
1092 while (BD) {
1093 if (BD->containsAddress(Address))
1094 return BD;
1095 BD = BD->getParent();
1096 }
1097 }
1098 return nullptr;
1099}
1100
1101BinaryData *BinaryContext::getGOTSymbol() {
1102 // First tries to find a global symbol with that name
1103 BinaryData *GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_");
1104 if (GOTSymBD)
1105 return GOTSymBD;
1106
1107 // This symbol might be hidden from run-time link, so fetch the local
1108 // definition if available.
1109 GOTSymBD = getBinaryDataByName(Name: "_GLOBAL_OFFSET_TABLE_/1");
1110 if (!GOTSymBD)
1111 return nullptr;
1112
1113 // If the local symbol is not unique, fail
1114 unsigned Index = 2;
1115 SmallString<30> Storage;
1116 while (const BinaryData *BD =
1117 getBinaryDataByName(Name: Twine("_GLOBAL_OFFSET_TABLE_/")
1118 .concat(Suffix: Twine(Index++))
1119 .toStringRef(Out&: Storage)))
1120 if (BD->getAddress() != GOTSymBD->getAddress())
1121 return nullptr;
1122
1123 return GOTSymBD;
1124}
1125
1126bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
1127 auto NI = BinaryDataMap.find(x: Address);
1128 assert(NI != BinaryDataMap.end());
1129 if (NI == BinaryDataMap.end())
1130 return false;
1131 // TODO: it's possible that a jump table starts at the same address
1132 // as a larger blob of private data. When we set the size of the
1133 // jump table, it might be smaller than the total blob size. In this
1134 // case we just leave the original size since (currently) it won't really
1135 // affect anything.
1136 assert((!NI->second->Size || NI->second->Size == Size ||
1137 (NI->second->isJumpTable() && NI->second->Size > Size)) &&
1138 "can't change the size of a symbol that has already had its "
1139 "size set");
1140 if (!NI->second->Size) {
1141 NI->second->Size = Size;
1142 updateObjectNesting(GAI: NI);
1143 return true;
1144 }
1145 return false;
1146}
1147
1148void BinaryContext::generateSymbolHashes() {
1149 auto isPadding = [](const BinaryData &BD) {
1150 StringRef Contents = BD.getSection().getContents();
1151 StringRef SymData = Contents.substr(Start: BD.getOffset(), N: BD.getSize());
1152 return (BD.getName().starts_with(Prefix: "HOLEat") ||
1153 SymData.find_first_not_of(C: 0) == StringRef::npos);
1154 };
1155
1156 uint64_t NumCollisions = 0;
1157 for (auto &Entry : BinaryDataMap) {
1158 BinaryData &BD = *Entry.second;
1159 StringRef Name = BD.getName();
1160
1161 if (!isInternalSymbolName(Name))
1162 continue;
1163
1164 // First check if a non-anonymous alias exists and move it to the front.
1165 if (BD.getSymbols().size() > 1) {
1166 auto Itr = llvm::find_if(Range&: BD.getSymbols(), P: [&](const MCSymbol *Symbol) {
1167 return !isInternalSymbolName(Name: Symbol->getName());
1168 });
1169 if (Itr != BD.getSymbols().end()) {
1170 size_t Idx = std::distance(first: BD.getSymbols().begin(), last: Itr);
1171 std::swap(a&: BD.getSymbols()[0], b&: BD.getSymbols()[Idx]);
1172 continue;
1173 }
1174 }
1175
1176 // We have to skip 0 size symbols since they will all collide.
1177 if (BD.getSize() == 0) {
1178 continue;
1179 }
1180
1181 const uint64_t Hash = BD.getSection().hash(BD);
1182 const size_t Idx = Name.find(Str: "0x");
1183 std::string NewName =
1184 (Twine(Name.substr(Start: 0, N: Idx)) + "_" + Twine::utohexstr(Val: Hash)).str();
1185 if (getBinaryDataByName(Name: NewName)) {
1186 // Ignore collisions for symbols that appear to be padding
1187 // (i.e. all zeros or a "hole")
1188 if (!isPadding(BD)) {
1189 if (opts::Verbosity) {
1190 this->errs() << "BOLT-WARNING: collision detected when hashing " << BD
1191 << " with new name (" << NewName << "), skipping.\n";
1192 }
1193 ++NumCollisions;
1194 }
1195 continue;
1196 }
1197 BD.Symbols.insert(position: BD.Symbols.begin(), x: Ctx->getOrCreateSymbol(Name: NewName));
1198 GlobalSymbols[NewName] = &BD;
1199 }
1200 if (NumCollisions) {
1201 this->errs() << "BOLT-WARNING: " << NumCollisions
1202 << " collisions detected while hashing binary objects";
1203 if (!opts::Verbosity)
1204 this->errs() << ". Use -v=1 to see the list.";
1205 this->errs() << '\n';
1206 }
1207}
1208
1209bool BinaryContext::registerFragment(BinaryFunction &TargetFunction,
1210 BinaryFunction &Function) const {
1211 assert(TargetFunction.isFragment() && "TargetFunction must be a fragment");
1212 if (TargetFunction.isChildOf(Other: Function))
1213 return true;
1214 TargetFunction.addParentFragment(BF&: Function);
1215 Function.addFragment(BF&: TargetFunction);
1216 if (!HasRelocations) {
1217 TargetFunction.setSimple(false);
1218 Function.setSimple(false);
1219 }
1220 if (opts::Verbosity >= 1) {
1221 this->outs() << "BOLT-INFO: marking " << TargetFunction
1222 << " as a fragment of " << Function << '\n';
1223 }
1224 return true;
1225}
1226
1227void BinaryContext::addAdrpAddRelocAArch64(BinaryFunction &BF,
1228 MCInst &LoadLowBits,
1229 MCInst &LoadHiBits,
1230 uint64_t Target) {
1231 const MCSymbol *TargetSymbol;
1232 uint64_t Addend = 0;
1233 std::tie(args&: TargetSymbol, args&: Addend) = handleAddressRef(Address: Target, BF,
1234 /*IsPCRel*/ true);
1235 int64_t Val;
1236 MIB->replaceImmWithSymbolRef(Inst&: LoadHiBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(), Value&: Val,
1237 RelType: ELF::R_AARCH64_ADR_PREL_PG_HI21);
1238 MIB->replaceImmWithSymbolRef(Inst&: LoadLowBits, Symbol: TargetSymbol, Addend, Ctx: Ctx.get(),
1239 Value&: Val, RelType: ELF::R_AARCH64_ADD_ABS_LO12_NC);
1240}
1241
1242bool BinaryContext::handleAArch64Veneer(uint64_t Address, bool MatchOnly) {
1243 BinaryFunction *TargetFunction = getBinaryFunctionContainingAddress(Address);
1244 if (TargetFunction)
1245 return false;
1246
1247 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1248 assert(Section && "cannot get section for referenced address");
1249 if (!Section->isText())
1250 return false;
1251
1252 bool Ret = false;
1253 StringRef SectionContents = Section->getContents();
1254 uint64_t Offset = Address - Section->getAddress();
1255 const uint64_t MaxSize = SectionContents.size() - Offset;
1256 const uint8_t *Bytes =
1257 reinterpret_cast<const uint8_t *>(SectionContents.data());
1258 ArrayRef<uint8_t> Data(Bytes + Offset, MaxSize);
1259
1260 auto matchVeneer = [&](BinaryFunction::InstrMapType &Instructions,
1261 MCInst &Instruction, uint64_t Offset,
1262 uint64_t AbsoluteInstrAddr,
1263 uint64_t TotalSize) -> bool {
1264 MCInst *TargetHiBits, *TargetLowBits;
1265 uint64_t TargetAddress, Count;
1266 Count = MIB->matchLinkerVeneer(Begin: Instructions.begin(), End: Instructions.end(),
1267 Address: AbsoluteInstrAddr, CurInst: Instruction, TargetHiBits,
1268 TargetLowBits, Target&: TargetAddress);
1269 if (!Count)
1270 return false;
1271
1272 if (MatchOnly)
1273 return true;
1274
1275 // NOTE The target symbol was created during disassemble's
1276 // handleExternalReference
1277 const MCSymbol *VeneerSymbol = getOrCreateGlobalSymbol(Address, Prefix: "FUNCat");
1278 BinaryFunction *Veneer = createBinaryFunction(Name: VeneerSymbol->getName().str(),
1279 Section&: *Section, Address, Size: TotalSize);
1280 addAdrpAddRelocAArch64(BF&: *Veneer, LoadLowBits&: *TargetLowBits, LoadHiBits&: *TargetHiBits,
1281 Target: TargetAddress);
1282 MIB->addAnnotation(Inst&: Instruction, Name: "AArch64Veneer", Val: true);
1283 Veneer->addInstruction(Offset, Instruction: std::move(Instruction));
1284 --Count;
1285 for (auto It = Instructions.rbegin(); Count != 0; ++It, --Count) {
1286 MIB->addAnnotation(Inst&: It->second, Name: "AArch64Veneer", Val: true);
1287 Veneer->addInstruction(Offset: It->first, Instruction: std::move(It->second));
1288 }
1289
1290 Veneer->getOrCreateLocalLabel(Address);
1291 Veneer->setMaxSize(TotalSize);
1292 Veneer->updateState(State: BinaryFunction::State::Disassembled);
1293 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: handling veneer function at 0x" << Address
1294 << "\n");
1295 return true;
1296 };
1297
1298 uint64_t Size = 0, TotalSize = 0;
1299 BinaryFunction::InstrMapType VeneerInstructions;
1300 for (Offset = 0; Offset < MaxSize; Offset += Size) {
1301 MCInst Instruction;
1302 const uint64_t AbsoluteInstrAddr = Address + Offset;
1303 if (!SymbolicDisAsm->getInstruction(Instr&: Instruction, Size, Bytes: Data.slice(N: Offset),
1304 Address: AbsoluteInstrAddr, CStream&: nulls()))
1305 break;
1306
1307 TotalSize += Size;
1308 if (MIB->isBranch(Inst: Instruction)) {
1309 Ret = matchVeneer(VeneerInstructions, Instruction, Offset,
1310 AbsoluteInstrAddr, TotalSize);
1311 break;
1312 }
1313
1314 VeneerInstructions.emplace(args&: Offset, args: std::move(Instruction));
1315 }
1316
1317 return Ret;
1318}
1319
1320void BinaryContext::processInterproceduralReferences() {
1321 for (const std::pair<BinaryFunction *, uint64_t> &It :
1322 InterproceduralReferences) {
1323 BinaryFunction &Function = *It.first;
1324 uint64_t Address = It.second;
1325 if (!Address || Function.isIgnored())
1326 continue;
1327
1328 BinaryFunction *TargetFunction =
1329 getBinaryFunctionContainingAddress(Address);
1330 if (&Function == TargetFunction)
1331 continue;
1332
1333 if (TargetFunction) {
1334 if (TargetFunction->isFragment() &&
1335 !TargetFunction->isChildOf(Other: Function)) {
1336 this->errs()
1337 << "BOLT-WARNING: interprocedural reference between unrelated "
1338 "fragments: "
1339 << Function.getPrintName() << " and "
1340 << TargetFunction->getPrintName() << '\n';
1341 }
1342 if (uint64_t Offset = Address - TargetFunction->getAddress())
1343 TargetFunction->addEntryPointAtOffset(Offset);
1344
1345 continue;
1346 }
1347
1348 // Check if address falls in function padding space - this could be
1349 // unmarked data in code. In this case adjust the padding space size.
1350 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
1351 assert(Section && "cannot get section for referenced address");
1352
1353 if (!Section->isText())
1354 continue;
1355
1356 // PLT requires special handling and could be ignored in this context.
1357 StringRef SectionName = Section->getName();
1358 if (SectionName == ".plt" || SectionName == ".plt.got")
1359 continue;
1360
1361 // Check if it is aarch64 veneer written at Address
1362 if (isAArch64() && handleAArch64Veneer(Address))
1363 continue;
1364
1365 if (opts::processAllFunctions()) {
1366 this->errs() << "BOLT-ERROR: cannot process binaries with unmarked "
1367 << "object in code at address 0x"
1368 << Twine::utohexstr(Val: Address) << " belonging to section "
1369 << SectionName << " in current mode\n";
1370 exit(status: 1);
1371 }
1372
1373 TargetFunction = getBinaryFunctionContainingAddress(Address,
1374 /*CheckPastEnd=*/false,
1375 /*UseMaxSize=*/true);
1376 // We are not going to overwrite non-simple functions, but for simple
1377 // ones - adjust the padding size.
1378 if (TargetFunction && TargetFunction->isSimple()) {
1379 this->errs()
1380 << "BOLT-WARNING: function " << *TargetFunction
1381 << " has an object detected in a padding region at address 0x"
1382 << Twine::utohexstr(Val: Address) << '\n';
1383 TargetFunction->setMaxSize(TargetFunction->getSize());
1384 }
1385 }
1386
1387 InterproceduralReferences.clear();
1388}
1389
1390void BinaryContext::postProcessSymbolTable() {
1391 fixBinaryDataHoles();
1392 bool Valid = true;
1393 for (auto &Entry : BinaryDataMap) {
1394 BinaryData *BD = Entry.second;
1395 if ((BD->getName().starts_with(Prefix: "SYMBOLat") ||
1396 BD->getName().starts_with(Prefix: "DATAat")) &&
1397 !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
1398 BD->getSection()) {
1399 this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
1400 << "\n";
1401 Valid = false;
1402 }
1403 }
1404 assert(Valid);
1405 (void)Valid;
1406 generateSymbolHashes();
1407}
1408
1409void BinaryContext::foldFunction(BinaryFunction &ChildBF,
1410 BinaryFunction &ParentBF) {
1411 assert(!ChildBF.isMultiEntry() && !ParentBF.isMultiEntry() &&
1412 "cannot merge functions with multiple entry points");
1413
1414 std::unique_lock<llvm::sys::RWMutex> WriteCtxLock(CtxMutex, std::defer_lock);
1415 std::unique_lock<llvm::sys::RWMutex> WriteSymbolMapLock(
1416 SymbolToFunctionMapMutex, std::defer_lock);
1417
1418 const StringRef ChildName = ChildBF.getOneName();
1419
1420 // Move symbols over and update bookkeeping info.
1421 for (MCSymbol *Symbol : ChildBF.getSymbols()) {
1422 ParentBF.getSymbols().push_back(Elt: Symbol);
1423 WriteSymbolMapLock.lock();
1424 SymbolToFunctionMap[Symbol] = &ParentBF;
1425 WriteSymbolMapLock.unlock();
1426 // NB: there's no need to update BinaryDataMap and GlobalSymbols.
1427 }
1428 ChildBF.getSymbols().clear();
1429
1430 // Move other names the child function is known under.
1431 llvm::move(Range&: ChildBF.Aliases, Out: std::back_inserter(x&: ParentBF.Aliases));
1432 ChildBF.Aliases.clear();
1433
1434 if (HasRelocations) {
1435 // Merge execution counts of ChildBF into those of ParentBF.
1436 // Without relocations, we cannot reliably merge profiles as both functions
1437 // continue to exist and either one can be executed.
1438 ChildBF.mergeProfileDataInto(BF&: ParentBF);
1439
1440 std::shared_lock<llvm::sys::RWMutex> ReadBfsLock(BinaryFunctionsMutex,
1441 std::defer_lock);
1442 std::unique_lock<llvm::sys::RWMutex> WriteBfsLock(BinaryFunctionsMutex,
1443 std::defer_lock);
1444 // Remove ChildBF from the global set of functions in relocs mode.
1445 ReadBfsLock.lock();
1446 auto FI = BinaryFunctions.find(x: ChildBF.getAddress());
1447 ReadBfsLock.unlock();
1448
1449 assert(FI != BinaryFunctions.end() && "function not found");
1450 assert(&ChildBF == &FI->second && "function mismatch");
1451
1452 WriteBfsLock.lock();
1453 ChildBF.clearDisasmState();
1454 FI = BinaryFunctions.erase(position: FI);
1455 WriteBfsLock.unlock();
1456
1457 } else {
1458 // In non-relocation mode we keep the function, but rename it.
1459 std::string NewName = "__ICF_" + ChildName.str();
1460
1461 WriteCtxLock.lock();
1462 ChildBF.getSymbols().push_back(Elt: Ctx->getOrCreateSymbol(Name: NewName));
1463 WriteCtxLock.unlock();
1464
1465 ChildBF.setFolded(&ParentBF);
1466 }
1467
1468 ParentBF.setHasFunctionsFoldedInto();
1469}
1470
1471void BinaryContext::fixBinaryDataHoles() {
1472 assert(validateObjectNesting() && "object nesting inconsistency detected");
1473
1474 for (BinarySection &Section : allocatableSections()) {
1475 std::vector<std::pair<uint64_t, uint64_t>> Holes;
1476
1477 auto isNotHole = [&Section](const binary_data_iterator &Itr) {
1478 BinaryData *BD = Itr->second;
1479 bool isHole = (!BD->getParent() && !BD->getSize() && BD->isObject() &&
1480 (BD->getName().starts_with(Prefix: "SYMBOLat0x") ||
1481 BD->getName().starts_with(Prefix: "DATAat0x") ||
1482 BD->getName().starts_with(Prefix: "ANONYMOUS")));
1483 return !isHole && BD->getSection() == Section && !BD->getParent();
1484 };
1485
1486 auto BDStart = BinaryDataMap.begin();
1487 auto BDEnd = BinaryDataMap.end();
1488 auto Itr = FilteredBinaryDataIterator(isNotHole, BDStart, BDEnd);
1489 auto End = FilteredBinaryDataIterator(isNotHole, BDEnd, BDEnd);
1490
1491 uint64_t EndAddress = Section.getAddress();
1492
1493 while (Itr != End) {
1494 if (Itr->second->getAddress() > EndAddress) {
1495 uint64_t Gap = Itr->second->getAddress() - EndAddress;
1496 Holes.emplace_back(args&: EndAddress, args&: Gap);
1497 }
1498 EndAddress = Itr->second->getEndAddress();
1499 ++Itr;
1500 }
1501
1502 if (EndAddress < Section.getEndAddress())
1503 Holes.emplace_back(args&: EndAddress, args: Section.getEndAddress() - EndAddress);
1504
1505 // If there is already a symbol at the start of the hole, grow that symbol
1506 // to cover the rest. Otherwise, create a new symbol to cover the hole.
1507 for (std::pair<uint64_t, uint64_t> &Hole : Holes) {
1508 BinaryData *BD = getBinaryDataAtAddress(Address: Hole.first);
1509 if (BD) {
1510 // BD->getSection() can be != Section if there are sections that
1511 // overlap. In this case it is probably safe to just skip the holes
1512 // since the overlapping section will not(?) have any symbols in it.
1513 if (BD->getSection() == Section)
1514 setBinaryDataSize(Address: Hole.first, Size: Hole.second);
1515 } else {
1516 getOrCreateGlobalSymbol(Address: Hole.first, Prefix: "HOLEat", Size: Hole.second, Alignment: 1);
1517 }
1518 }
1519 }
1520
1521 assert(validateObjectNesting() && "object nesting inconsistency detected");
1522 assert(validateHoles() && "top level hole detected in object map");
1523}
1524
1525void BinaryContext::printGlobalSymbols(raw_ostream &OS) const {
1526 const BinarySection *CurrentSection = nullptr;
1527 bool FirstSection = true;
1528
1529 for (auto &Entry : BinaryDataMap) {
1530 const BinaryData *BD = Entry.second;
1531 const BinarySection &Section = BD->getSection();
1532 if (FirstSection || Section != *CurrentSection) {
1533 uint64_t Address, Size;
1534 StringRef Name = Section.getName();
1535 if (Section) {
1536 Address = Section.getAddress();
1537 Size = Section.getSize();
1538 } else {
1539 Address = BD->getAddress();
1540 Size = BD->getSize();
1541 }
1542 OS << "BOLT-INFO: Section " << Name << ", "
1543 << "0x" + Twine::utohexstr(Val: Address) << ":"
1544 << "0x" + Twine::utohexstr(Val: Address + Size) << "/" << Size << "\n";
1545 CurrentSection = &Section;
1546 FirstSection = false;
1547 }
1548
1549 OS << "BOLT-INFO: ";
1550 const BinaryData *P = BD->getParent();
1551 while (P) {
1552 OS << " ";
1553 P = P->getParent();
1554 }
1555 OS << *BD << "\n";
1556 }
1557}
1558
1559Expected<unsigned> BinaryContext::getDwarfFile(
1560 StringRef Directory, StringRef FileName, unsigned FileNumber,
1561 std::optional<MD5::MD5Result> Checksum, std::optional<StringRef> Source,
1562 unsigned CUID, unsigned DWARFVersion) {
1563 DwarfLineTable &Table = DwarfLineTablesCUMap[CUID];
1564 return Table.tryGetFile(Directory, FileName, Checksum, Source, DwarfVersion: DWARFVersion,
1565 FileNumber);
1566}
1567
1568unsigned BinaryContext::addDebugFilenameToUnit(const uint32_t DestCUID,
1569 const uint32_t SrcCUID,
1570 unsigned FileIndex) {
1571 DWARFCompileUnit *SrcUnit = DwCtx->getCompileUnitForOffset(Offset: SrcCUID);
1572 const DWARFDebugLine::LineTable *LineTable =
1573 DwCtx->getLineTableForUnit(U: SrcUnit);
1574 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1575 LineTable->Prologue.FileNames;
1576 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1577 // means empty dir.
1578 assert(FileIndex > 0 && FileIndex <= FileNames.size() &&
1579 "FileIndex out of range for the compilation unit.");
1580 StringRef Dir = "";
1581 if (FileNames[FileIndex - 1].DirIdx != 0) {
1582 if (std::optional<const char *> DirName = dwarf::toString(
1583 V: LineTable->Prologue
1584 .IncludeDirectories[FileNames[FileIndex - 1].DirIdx - 1])) {
1585 Dir = *DirName;
1586 }
1587 }
1588 StringRef FileName = "";
1589 if (std::optional<const char *> FName =
1590 dwarf::toString(V: FileNames[FileIndex - 1].Name))
1591 FileName = *FName;
1592 assert(FileName != "");
1593 DWARFCompileUnit *DstUnit = DwCtx->getCompileUnitForOffset(Offset: DestCUID);
1594 return cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt,
1595 CUID: DestCUID, DWARFVersion: DstUnit->getVersion()));
1596}
1597
1598std::vector<BinaryFunction *> BinaryContext::getSortedFunctions() {
1599 std::vector<BinaryFunction *> SortedFunctions(BinaryFunctions.size());
1600 llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions),
1601 d_first: SortedFunctions.begin(),
1602 F: [](BinaryFunction &BF) { return &BF; });
1603
1604 llvm::stable_sort(Range&: SortedFunctions,
1605 C: [](const BinaryFunction *A, const BinaryFunction *B) {
1606 if (A->hasValidIndex() && B->hasValidIndex()) {
1607 return A->getIndex() < B->getIndex();
1608 }
1609 return A->hasValidIndex();
1610 });
1611 return SortedFunctions;
1612}
1613
1614std::vector<BinaryFunction *> BinaryContext::getAllBinaryFunctions() {
1615 std::vector<BinaryFunction *> AllFunctions;
1616 AllFunctions.reserve(n: BinaryFunctions.size() + InjectedBinaryFunctions.size());
1617 llvm::transform(Range: llvm::make_second_range(c&: BinaryFunctions),
1618 d_first: std::back_inserter(x&: AllFunctions),
1619 F: [](BinaryFunction &BF) { return &BF; });
1620 llvm::copy(Range&: InjectedBinaryFunctions, Out: std::back_inserter(x&: AllFunctions));
1621
1622 return AllFunctions;
1623}
1624
1625std::optional<DWARFUnit *> BinaryContext::getDWOCU(uint64_t DWOId) {
1626 auto Iter = DWOCUs.find(x: DWOId);
1627 if (Iter == DWOCUs.end())
1628 return std::nullopt;
1629
1630 return Iter->second;
1631}
1632
1633DWARFContext *BinaryContext::getDWOContext() const {
1634 if (DWOCUs.empty())
1635 return nullptr;
1636 return &DWOCUs.begin()->second->getContext();
1637}
1638
1639/// Handles DWO sections that can either be in .o, .dwo or .dwp files.
1640void BinaryContext::preprocessDWODebugInfo() {
1641 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1642 DWARFUnit *const DwarfUnit = CU.get();
1643 if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
1644 std::string DWOName = dwarf::toString(
1645 V: DwarfUnit->getUnitDIE().find(
1646 Attrs: {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
1647 Default: "");
1648 SmallString<16> AbsolutePath;
1649 if (!opts::CompDirOverride.empty()) {
1650 sys::path::append(path&: AbsolutePath, a: opts::CompDirOverride);
1651 sys::path::append(path&: AbsolutePath, a: DWOName);
1652 }
1653 DWARFUnit *DWOCU =
1654 DwarfUnit->getNonSkeletonUnitDIE(ExtractUnitDIEOnly: false, DWOAlternativeLocation: AbsolutePath).getDwarfUnit();
1655 if (!DWOCU->isDWOUnit()) {
1656 this->outs()
1657 << "BOLT-WARNING: Debug Fission: DWO debug information for "
1658 << DWOName
1659 << " was not retrieved and won't be updated. Please check "
1660 "relative path.\n";
1661 continue;
1662 }
1663 DWOCUs[*DWOId] = DWOCU;
1664 }
1665 }
1666 if (!DWOCUs.empty())
1667 this->outs() << "BOLT-INFO: processing split DWARF\n";
1668}
1669
1670void BinaryContext::preprocessDebugInfo() {
1671 struct CURange {
1672 uint64_t LowPC;
1673 uint64_t HighPC;
1674 DWARFUnit *Unit;
1675
1676 bool operator<(const CURange &Other) const { return LowPC < Other.LowPC; }
1677 };
1678
1679 // Building a map of address ranges to CUs similar to .debug_aranges and use
1680 // it to assign CU to functions.
1681 std::vector<CURange> AllRanges;
1682 AllRanges.reserve(n: DwCtx->getNumCompileUnits());
1683 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1684 Expected<DWARFAddressRangesVector> RangesOrError =
1685 CU->getUnitDIE().getAddressRanges();
1686 if (!RangesOrError) {
1687 consumeError(Err: RangesOrError.takeError());
1688 continue;
1689 }
1690 for (DWARFAddressRange &Range : *RangesOrError) {
1691 // Parts of the debug info could be invalidated due to corresponding code
1692 // being removed from the binary by the linker. Hence we check if the
1693 // address is a valid one.
1694 if (containsAddress(Address: Range.LowPC))
1695 AllRanges.emplace_back(args: CURange{.LowPC: Range.LowPC, .HighPC: Range.HighPC, .Unit: CU.get()});
1696 }
1697
1698 ContainsDwarf5 |= CU->getVersion() >= 5;
1699 ContainsDwarfLegacy |= CU->getVersion() < 5;
1700 }
1701
1702 llvm::sort(C&: AllRanges);
1703 for (auto &KV : BinaryFunctions) {
1704 const uint64_t FunctionAddress = KV.first;
1705 BinaryFunction &Function = KV.second;
1706
1707 auto It = llvm::partition_point(
1708 Range&: AllRanges, P: [=](CURange R) { return R.HighPC <= FunctionAddress; });
1709 if (It != AllRanges.end() && It->LowPC <= FunctionAddress)
1710 Function.setDWARFUnit(It->Unit);
1711 }
1712
1713 // Discover units with debug info that needs to be updated.
1714 for (const auto &KV : BinaryFunctions) {
1715 const BinaryFunction &BF = KV.second;
1716 if (shouldEmit(Function: BF) && BF.getDWARFUnit())
1717 ProcessedCUs.insert(x: BF.getDWARFUnit());
1718 }
1719
1720 // Clear debug info for functions from units that we are not going to process.
1721 for (auto &KV : BinaryFunctions) {
1722 BinaryFunction &BF = KV.second;
1723 if (BF.getDWARFUnit() && !ProcessedCUs.count(x: BF.getDWARFUnit()))
1724 BF.setDWARFUnit(nullptr);
1725 }
1726
1727 if (opts::Verbosity >= 1) {
1728 this->outs() << "BOLT-INFO: " << ProcessedCUs.size() << " out of "
1729 << DwCtx->getNumCompileUnits() << " CUs will be updated\n";
1730 }
1731
1732 preprocessDWODebugInfo();
1733
1734 // Populate MCContext with DWARF files from all units.
1735 StringRef GlobalPrefix = AsmInfo->getPrivateGlobalPrefix();
1736 for (const std::unique_ptr<DWARFUnit> &CU : DwCtx->compile_units()) {
1737 const uint64_t CUID = CU->getOffset();
1738 DwarfLineTable &BinaryLineTable = getDwarfLineTable(CUID);
1739 BinaryLineTable.setLabel(Ctx->getOrCreateSymbol(
1740 Name: GlobalPrefix + "line_table_start" + Twine(CUID)));
1741
1742 if (!ProcessedCUs.count(x: CU.get()))
1743 continue;
1744
1745 const DWARFDebugLine::LineTable *LineTable =
1746 DwCtx->getLineTableForUnit(U: CU.get());
1747 const std::vector<DWARFDebugLine::FileNameEntry> &FileNames =
1748 LineTable->Prologue.FileNames;
1749
1750 uint16_t DwarfVersion = LineTable->Prologue.getVersion();
1751 if (DwarfVersion >= 5) {
1752 std::optional<MD5::MD5Result> Checksum;
1753 if (LineTable->Prologue.ContentTypes.HasMD5)
1754 Checksum = LineTable->Prologue.FileNames[0].Checksum;
1755 std::optional<const char *> Name =
1756 dwarf::toString(V: CU->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr);
1757 if (std::optional<uint64_t> DWOID = CU->getDWOId()) {
1758 auto Iter = DWOCUs.find(x: *DWOID);
1759 assert(Iter != DWOCUs.end() && "DWO CU was not found.");
1760 Name = dwarf::toString(
1761 V: Iter->second->getUnitDIE().find(Attr: dwarf::DW_AT_name), Default: nullptr);
1762 }
1763 BinaryLineTable.setRootFile(Directory: CU->getCompilationDir(), FileName: *Name, Checksum,
1764 Source: std::nullopt);
1765 }
1766
1767 BinaryLineTable.setDwarfVersion(DwarfVersion);
1768
1769 // Assign a unique label to every line table, one per CU.
1770 // Make sure empty debug line tables are registered too.
1771 if (FileNames.empty()) {
1772 cantFail(ValOrErr: getDwarfFile(Directory: "", FileName: "<unknown>", FileNumber: 0, Checksum: std::nullopt, Source: std::nullopt,
1773 CUID, DWARFVersion: DwarfVersion));
1774 continue;
1775 }
1776 const uint32_t Offset = DwarfVersion < 5 ? 1 : 0;
1777 for (size_t I = 0, Size = FileNames.size(); I != Size; ++I) {
1778 // Dir indexes start at 1, as DWARF file numbers, and a dir index 0
1779 // means empty dir.
1780 StringRef Dir = "";
1781 if (FileNames[I].DirIdx != 0 || DwarfVersion >= 5)
1782 if (std::optional<const char *> DirName = dwarf::toString(
1783 V: LineTable->Prologue
1784 .IncludeDirectories[FileNames[I].DirIdx - Offset]))
1785 Dir = *DirName;
1786 StringRef FileName = "";
1787 if (std::optional<const char *> FName =
1788 dwarf::toString(V: FileNames[I].Name))
1789 FileName = *FName;
1790 assert(FileName != "");
1791 std::optional<MD5::MD5Result> Checksum;
1792 if (DwarfVersion >= 5 && LineTable->Prologue.ContentTypes.HasMD5)
1793 Checksum = LineTable->Prologue.FileNames[I].Checksum;
1794 cantFail(ValOrErr: getDwarfFile(Directory: Dir, FileName, FileNumber: 0, Checksum, Source: std::nullopt, CUID,
1795 DWARFVersion: DwarfVersion));
1796 }
1797 }
1798}
1799
1800bool BinaryContext::shouldEmit(const BinaryFunction &Function) const {
1801 if (Function.isPseudo())
1802 return false;
1803
1804 if (opts::processAllFunctions())
1805 return true;
1806
1807 if (Function.isIgnored())
1808 return false;
1809
1810 // In relocation mode we will emit non-simple functions with CFG.
1811 // If the function does not have a CFG it should be marked as ignored.
1812 return HasRelocations || Function.isSimple();
1813}
1814
1815void BinaryContext::dump(const MCInst &Inst) const {
1816 if (LLVM_UNLIKELY(!InstPrinter)) {
1817 dbgs() << "Cannot dump for InstPrinter is not initialized.\n";
1818 return;
1819 }
1820 InstPrinter->printInst(MI: &Inst, Address: 0, Annot: "", STI: *STI, OS&: dbgs());
1821 dbgs() << "\n";
1822}
1823
1824void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) {
1825 uint32_t Operation = Inst.getOperation();
1826 switch (Operation) {
1827 case MCCFIInstruction::OpSameValue:
1828 OS << "OpSameValue Reg" << Inst.getRegister();
1829 break;
1830 case MCCFIInstruction::OpRememberState:
1831 OS << "OpRememberState";
1832 break;
1833 case MCCFIInstruction::OpRestoreState:
1834 OS << "OpRestoreState";
1835 break;
1836 case MCCFIInstruction::OpOffset:
1837 OS << "OpOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1838 break;
1839 case MCCFIInstruction::OpDefCfaRegister:
1840 OS << "OpDefCfaRegister Reg" << Inst.getRegister();
1841 break;
1842 case MCCFIInstruction::OpDefCfaOffset:
1843 OS << "OpDefCfaOffset " << Inst.getOffset();
1844 break;
1845 case MCCFIInstruction::OpDefCfa:
1846 OS << "OpDefCfa Reg" << Inst.getRegister() << " " << Inst.getOffset();
1847 break;
1848 case MCCFIInstruction::OpRelOffset:
1849 OS << "OpRelOffset Reg" << Inst.getRegister() << " " << Inst.getOffset();
1850 break;
1851 case MCCFIInstruction::OpAdjustCfaOffset:
1852 OS << "OfAdjustCfaOffset " << Inst.getOffset();
1853 break;
1854 case MCCFIInstruction::OpEscape:
1855 OS << "OpEscape";
1856 break;
1857 case MCCFIInstruction::OpRestore:
1858 OS << "OpRestore Reg" << Inst.getRegister();
1859 break;
1860 case MCCFIInstruction::OpUndefined:
1861 OS << "OpUndefined Reg" << Inst.getRegister();
1862 break;
1863 case MCCFIInstruction::OpRegister:
1864 OS << "OpRegister Reg" << Inst.getRegister() << " Reg"
1865 << Inst.getRegister2();
1866 break;
1867 case MCCFIInstruction::OpWindowSave:
1868 OS << "OpWindowSave";
1869 break;
1870 case MCCFIInstruction::OpGnuArgsSize:
1871 OS << "OpGnuArgsSize";
1872 break;
1873 default:
1874 OS << "Op#" << Operation;
1875 break;
1876 }
1877}
1878
1879MarkerSymType BinaryContext::getMarkerType(const SymbolRef &Symbol) const {
1880 // For aarch64 and riscv, the ABI defines mapping symbols so we identify data
1881 // in the code section (see IHI0056B). $x identifies a symbol starting code or
1882 // the end of a data chunk inside code, $d identifies start of data.
1883 if (isX86() || ELFSymbolRef(Symbol).getSize())
1884 return MarkerSymType::NONE;
1885
1886 Expected<StringRef> NameOrError = Symbol.getName();
1887 Expected<object::SymbolRef::Type> TypeOrError = Symbol.getType();
1888
1889 if (!TypeOrError || !NameOrError)
1890 return MarkerSymType::NONE;
1891
1892 if (*TypeOrError != SymbolRef::ST_Unknown)
1893 return MarkerSymType::NONE;
1894
1895 if (*NameOrError == "$x" || NameOrError->starts_with(Prefix: "$x."))
1896 return MarkerSymType::CODE;
1897
1898 // $x<ISA>
1899 if (isRISCV() && NameOrError->starts_with(Prefix: "$x"))
1900 return MarkerSymType::CODE;
1901
1902 if (*NameOrError == "$d" || NameOrError->starts_with(Prefix: "$d."))
1903 return MarkerSymType::DATA;
1904
1905 return MarkerSymType::NONE;
1906}
1907
1908bool BinaryContext::isMarker(const SymbolRef &Symbol) const {
1909 return getMarkerType(Symbol) != MarkerSymType::NONE;
1910}
1911
1912static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
1913 const BinaryFunction *Function,
1914 DWARFContext *DwCtx) {
1915 DebugLineTableRowRef RowRef =
1916 DebugLineTableRowRef::fromSMLoc(Loc: Instruction.getLoc());
1917 if (RowRef == DebugLineTableRowRef::NULL_ROW)
1918 return;
1919
1920 const DWARFDebugLine::LineTable *LineTable;
1921 if (Function && Function->getDWARFUnit() &&
1922 Function->getDWARFUnit()->getOffset() == RowRef.DwCompileUnitIndex) {
1923 LineTable = Function->getDWARFLineTable();
1924 } else {
1925 LineTable = DwCtx->getLineTableForUnit(
1926 U: DwCtx->getCompileUnitForOffset(Offset: RowRef.DwCompileUnitIndex));
1927 }
1928 assert(LineTable && "line table expected for instruction with debug info");
1929
1930 const DWARFDebugLine::Row &Row = LineTable->Rows[RowRef.RowIndex - 1];
1931 StringRef FileName = "";
1932 if (std::optional<const char *> FName =
1933 dwarf::toString(V: LineTable->Prologue.FileNames[Row.File - 1].Name))
1934 FileName = *FName;
1935 OS << " # debug line " << FileName << ":" << Row.Line;
1936 if (Row.Column)
1937 OS << ":" << Row.Column;
1938 if (Row.Discriminator)
1939 OS << " discriminator:" << Row.Discriminator;
1940}
1941
1942void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
1943 uint64_t Offset,
1944 const BinaryFunction *Function,
1945 bool PrintMCInst, bool PrintMemData,
1946 bool PrintRelocations,
1947 StringRef Endl) const {
1948 OS << format(Fmt: " %08" PRIx64 ": ", Vals: Offset);
1949 if (MIB->isCFI(Inst: Instruction)) {
1950 uint32_t Offset = Instruction.getOperand(i: 0).getImm();
1951 OS << "\t!CFI\t$" << Offset << "\t; ";
1952 if (Function)
1953 printCFI(OS, Inst: *Function->getCFIFor(Instr: Instruction));
1954 OS << Endl;
1955 return;
1956 }
1957 if (std::optional<uint32_t> DynamicID =
1958 MIB->getDynamicBranchID(Inst: Instruction)) {
1959 OS << "\tjit\t" << MIB->getTargetSymbol(Inst: Instruction)->getName()
1960 << " # ID: " << DynamicID;
1961 } else {
1962 InstPrinter->printInst(MI: &Instruction, Address: 0, Annot: "", STI: *STI, OS);
1963 }
1964 if (MIB->isCall(Inst: Instruction)) {
1965 if (MIB->isTailCall(Inst: Instruction))
1966 OS << " # TAILCALL ";
1967 if (MIB->isInvoke(Inst: Instruction)) {
1968 const std::optional<MCPlus::MCLandingPad> EHInfo =
1969 MIB->getEHInfo(Inst: Instruction);
1970 OS << " # handler: ";
1971 if (EHInfo->first)
1972 OS << *EHInfo->first;
1973 else
1974 OS << '0';
1975 OS << "; action: " << EHInfo->second;
1976 const int64_t GnuArgsSize = MIB->getGnuArgsSize(Inst: Instruction);
1977 if (GnuArgsSize >= 0)
1978 OS << "; GNU_args_size = " << GnuArgsSize;
1979 }
1980 } else if (MIB->isIndirectBranch(Inst: Instruction)) {
1981 if (uint64_t JTAddress = MIB->getJumpTable(Inst: Instruction)) {
1982 OS << " # JUMPTABLE @0x" << Twine::utohexstr(Val: JTAddress);
1983 } else {
1984 OS << " # UNKNOWN CONTROL FLOW";
1985 }
1986 }
1987 if (std::optional<uint32_t> Offset = MIB->getOffset(Inst: Instruction))
1988 OS << " # Offset: " << *Offset;
1989 if (std::optional<uint32_t> Size = MIB->getSize(Inst: Instruction))
1990 OS << " # Size: " << *Size;
1991 if (MCSymbol *Label = MIB->getInstLabel(Inst: Instruction))
1992 OS << " # Label: " << *Label;
1993
1994 MIB->printAnnotations(Inst: Instruction, OS);
1995
1996 if (opts::PrintDebugInfo)
1997 printDebugInfo(OS, Instruction, Function, DwCtx: DwCtx.get());
1998
1999 if ((opts::PrintRelocations || PrintRelocations) && Function) {
2000 const uint64_t Size = computeCodeSize(Beg: &Instruction, End: &Instruction + 1);
2001 Function->printRelocations(OS, Offset, Size);
2002 }
2003
2004 OS << Endl;
2005
2006 if (PrintMCInst) {
2007 Instruction.dump_pretty(OS, Printer: InstPrinter.get());
2008 OS << Endl;
2009 }
2010}
2011
2012std::optional<uint64_t>
2013BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
2014 uint64_t FileOffset) const {
2015 // Find a segment with a matching file offset.
2016 for (auto &KV : SegmentMapInfo) {
2017 const SegmentInfo &SegInfo = KV.second;
2018 // FileOffset is got from perf event,
2019 // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
2020 // If the pagesize is not equal to SegInfo.Alignment.
2021 // FileOffset and SegInfo.FileOffset should be aligned first,
2022 // and then judge whether they are equal.
2023 if (alignDown(Value: SegInfo.FileOffset, Align: SegInfo.Alignment) ==
2024 alignDown(Value: FileOffset, Align: SegInfo.Alignment)) {
2025 // The function's offset from base address in VAS is aligned by pagesize
2026 // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
2027 // However, The ELF document says that SegInfo.FileOffset should equal
2028 // to SegInfo.Address, modulo the pagesize.
2029 // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
2030
2031 // So alignDown(SegInfo.Address, pagesize) can be calculated by:
2032 // alignDown(SegInfo.Address, pagesize)
2033 // = SegInfo.Address - (SegInfo.Address % pagesize)
2034 // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
2035 // = SegInfo.Address - SegInfo.FileOffset +
2036 // alignDown(SegInfo.FileOffset, pagesize)
2037 // = SegInfo.Address - SegInfo.FileOffset + FileOffset
2038 return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
2039 }
2040 }
2041
2042 return std::nullopt;
2043}
2044
2045ErrorOr<BinarySection &> BinaryContext::getSectionForAddress(uint64_t Address) {
2046 auto SI = AddressToSection.upper_bound(x: Address);
2047 if (SI != AddressToSection.begin()) {
2048 --SI;
2049 uint64_t UpperBound = SI->first + SI->second->getSize();
2050 if (!SI->second->getSize())
2051 UpperBound += 1;
2052 if (UpperBound > Address)
2053 return *SI->second;
2054 }
2055 return std::make_error_code(e: std::errc::bad_address);
2056}
2057
2058ErrorOr<StringRef>
2059BinaryContext::getSectionNameForAddress(uint64_t Address) const {
2060 if (ErrorOr<const BinarySection &> Section = getSectionForAddress(Address))
2061 return Section->getName();
2062 return std::make_error_code(e: std::errc::bad_address);
2063}
2064
2065BinarySection &BinaryContext::registerSection(BinarySection *Section) {
2066 auto Res = Sections.insert(x: Section);
2067 (void)Res;
2068 assert(Res.second && "can't register the same section twice.");
2069
2070 // Only register allocatable sections in the AddressToSection map.
2071 if (Section->isAllocatable() && Section->getAddress())
2072 AddressToSection.insert(x: std::make_pair(x: Section->getAddress(), y&: Section));
2073 NameToSection.insert(
2074 x: std::make_pair(x: std::string(Section->getName()), y&: Section));
2075 if (Section->hasSectionRef())
2076 SectionRefToBinarySection.insert(
2077 KV: std::make_pair(x: Section->getSectionRef(), y&: Section));
2078
2079 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: registering " << *Section << "\n");
2080 return *Section;
2081}
2082
2083BinarySection &BinaryContext::registerSection(SectionRef Section) {
2084 return registerSection(Section: new BinarySection(*this, Section));
2085}
2086
2087BinarySection &
2088BinaryContext::registerSection(const Twine &SectionName,
2089 const BinarySection &OriginalSection) {
2090 return registerSection(
2091 Section: new BinarySection(*this, SectionName, OriginalSection));
2092}
2093
2094BinarySection &
2095BinaryContext::registerOrUpdateSection(const Twine &Name, unsigned ELFType,
2096 unsigned ELFFlags, uint8_t *Data,
2097 uint64_t Size, unsigned Alignment) {
2098 auto NamedSections = getSectionByName(Name);
2099 if (NamedSections.begin() != NamedSections.end()) {
2100 assert(std::next(NamedSections.begin()) == NamedSections.end() &&
2101 "can only update unique sections");
2102 BinarySection *Section = NamedSections.begin()->second;
2103
2104 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: updating " << *Section << " -> ");
2105 const bool Flag = Section->isAllocatable();
2106 (void)Flag;
2107 Section->update(NewData: Data, NewSize: Size, NewAlignment: Alignment, NewELFType: ELFType, NewELFFlags: ELFFlags);
2108 LLVM_DEBUG(dbgs() << *Section << "\n");
2109 // FIXME: Fix section flags/attributes for MachO.
2110 if (isELF())
2111 assert(Flag == Section->isAllocatable() &&
2112 "can't change section allocation status");
2113 return *Section;
2114 }
2115
2116 return registerSection(
2117 Section: new BinarySection(*this, Name, Data, Size, Alignment, ELFType, ELFFlags));
2118}
2119
2120void BinaryContext::deregisterSectionName(const BinarySection &Section) {
2121 auto NameRange = NameToSection.equal_range(x: Section.getName().str());
2122 while (NameRange.first != NameRange.second) {
2123 if (NameRange.first->second == &Section) {
2124 NameToSection.erase(position: NameRange.first);
2125 break;
2126 }
2127 ++NameRange.first;
2128 }
2129}
2130
2131void BinaryContext::deregisterUnusedSections() {
2132 ErrorOr<BinarySection &> AbsSection = getUniqueSectionByName(SectionName: "<absolute>");
2133 for (auto SI = Sections.begin(); SI != Sections.end();) {
2134 BinarySection *Section = *SI;
2135 // We check getOutputData() instead of getOutputSize() because sometimes
2136 // zero-sized .text.cold sections are allocated.
2137 if (Section->hasSectionRef() || Section->getOutputData() ||
2138 (AbsSection && Section == &AbsSection.get())) {
2139 ++SI;
2140 continue;
2141 }
2142
2143 LLVM_DEBUG(dbgs() << "LLVM-DEBUG: deregistering " << Section->getName()
2144 << '\n';);
2145 deregisterSectionName(Section: *Section);
2146 SI = Sections.erase(position: SI);
2147 delete Section;
2148 }
2149}
2150
2151bool BinaryContext::deregisterSection(BinarySection &Section) {
2152 BinarySection *SectionPtr = &Section;
2153 auto Itr = Sections.find(x: SectionPtr);
2154 if (Itr != Sections.end()) {
2155 auto Range = AddressToSection.equal_range(x: SectionPtr->getAddress());
2156 while (Range.first != Range.second) {
2157 if (Range.first->second == SectionPtr) {
2158 AddressToSection.erase(position: Range.first);
2159 break;
2160 }
2161 ++Range.first;
2162 }
2163
2164 deregisterSectionName(Section: *SectionPtr);
2165 Sections.erase(position: Itr);
2166 delete SectionPtr;
2167 return true;
2168 }
2169 return false;
2170}
2171
2172void BinaryContext::renameSection(BinarySection &Section,
2173 const Twine &NewName) {
2174 auto Itr = Sections.find(x: &Section);
2175 assert(Itr != Sections.end() && "Section must exist to be renamed.");
2176 Sections.erase(position: Itr);
2177
2178 deregisterSectionName(Section);
2179
2180 Section.Name = NewName.str();
2181 Section.setOutputName(Section.Name);
2182
2183 NameToSection.insert(x: std::make_pair(x&: Section.Name, y: &Section));
2184
2185 // Reinsert with the new name.
2186 Sections.insert(x: &Section);
2187}
2188
2189void BinaryContext::printSections(raw_ostream &OS) const {
2190 for (BinarySection *const &Section : Sections)
2191 OS << "BOLT-INFO: " << *Section << "\n";
2192}
2193
2194BinarySection &BinaryContext::absoluteSection() {
2195 if (ErrorOr<BinarySection &> Section = getUniqueSectionByName(SectionName: "<absolute>"))
2196 return *Section;
2197 return registerOrUpdateSection(Name: "<absolute>", ELFType: ELF::SHT_NULL, ELFFlags: 0u);
2198}
2199
2200ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
2201 size_t Size) const {
2202 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2203 if (!Section)
2204 return std::make_error_code(e: std::errc::bad_address);
2205
2206 if (Section->isVirtual())
2207 return 0;
2208
2209 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2210 AsmInfo->getCodePointerSize());
2211 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2212 return DE.getUnsigned(offset_ptr: &ValueOffset, byte_size: Size);
2213}
2214
2215ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
2216 size_t Size) const {
2217 const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2218 if (!Section)
2219 return std::make_error_code(e: std::errc::bad_address);
2220
2221 if (Section->isVirtual())
2222 return 0;
2223
2224 DataExtractor DE(Section->getContents(), AsmInfo->isLittleEndian(),
2225 AsmInfo->getCodePointerSize());
2226 auto ValueOffset = static_cast<uint64_t>(Address - Section->getAddress());
2227 return DE.getSigned(offset_ptr: &ValueOffset, size: Size);
2228}
2229
2230void BinaryContext::addRelocation(uint64_t Address, MCSymbol *Symbol,
2231 uint64_t Type, uint64_t Addend,
2232 uint64_t Value) {
2233 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2234 assert(Section && "cannot find section for address");
2235 Section->addRelocation(Offset: Address - Section->getAddress(), Symbol, Type, Addend,
2236 Value);
2237}
2238
2239void BinaryContext::addDynamicRelocation(uint64_t Address, MCSymbol *Symbol,
2240 uint64_t Type, uint64_t Addend,
2241 uint64_t Value) {
2242 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2243 assert(Section && "cannot find section for address");
2244 Section->addDynamicRelocation(Offset: Address - Section->getAddress(), Symbol, Type,
2245 Addend, Value);
2246}
2247
2248bool BinaryContext::removeRelocationAt(uint64_t Address) {
2249 ErrorOr<BinarySection &> Section = getSectionForAddress(Address);
2250 assert(Section && "cannot find section for address");
2251 return Section->removeRelocationAt(Offset: Address - Section->getAddress());
2252}
2253
2254const Relocation *BinaryContext::getRelocationAt(uint64_t Address) const {
2255 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2256 if (!Section)
2257 return nullptr;
2258
2259 return Section->getRelocationAt(Offset: Address - Section->getAddress());
2260}
2261
2262const Relocation *
2263BinaryContext::getDynamicRelocationAt(uint64_t Address) const {
2264 ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
2265 if (!Section)
2266 return nullptr;
2267
2268 return Section->getDynamicRelocationAt(Offset: Address - Section->getAddress());
2269}
2270
2271void BinaryContext::markAmbiguousRelocations(BinaryData &BD,
2272 const uint64_t Address) {
2273 auto setImmovable = [&](BinaryData &BD) {
2274 BinaryData *Root = BD.getAtomicRoot();
2275 LLVM_DEBUG(if (Root->isMoveable()) {
2276 dbgs() << "BOLT-DEBUG: setting " << *Root << " as immovable "
2277 << "due to ambiguous relocation referencing 0x"
2278 << Twine::utohexstr(Address) << '\n';
2279 });
2280 Root->setIsMoveable(false);
2281 };
2282
2283 if (Address == BD.getAddress()) {
2284 setImmovable(BD);
2285
2286 // Set previous symbol as immovable
2287 BinaryData *Prev = getBinaryDataContainingAddress(Address: Address - 1);
2288 if (Prev && Prev->getEndAddress() == BD.getAddress())
2289 setImmovable(*Prev);
2290 }
2291
2292 if (Address == BD.getEndAddress()) {
2293 setImmovable(BD);
2294
2295 // Set next symbol as immovable
2296 BinaryData *Next = getBinaryDataContainingAddress(Address: BD.getEndAddress());
2297 if (Next && Next->getAddress() == BD.getEndAddress())
2298 setImmovable(*Next);
2299 }
2300}
2301
2302BinaryFunction *BinaryContext::getFunctionForSymbol(const MCSymbol *Symbol,
2303 uint64_t *EntryDesc) {
2304 std::shared_lock<llvm::sys::RWMutex> Lock(SymbolToFunctionMapMutex);
2305 auto BFI = SymbolToFunctionMap.find(x: Symbol);
2306 if (BFI == SymbolToFunctionMap.end())
2307 return nullptr;
2308
2309 BinaryFunction *BF = BFI->second;
2310 if (EntryDesc)
2311 *EntryDesc = BF->getEntryIDForSymbol(EntrySymbol: Symbol);
2312
2313 return BF;
2314}
2315
2316std::string
2317BinaryContext::generateBugReportMessage(StringRef Message,
2318 const BinaryFunction &Function) const {
2319 std::string Msg;
2320 raw_string_ostream SS(Msg);
2321 SS << "=======================================\n";
2322 SS << "BOLT is unable to proceed because it couldn't properly understand "
2323 "this function.\n";
2324 SS << "If you are running the most recent version of BOLT, you may "
2325 "want to "
2326 "report this and paste this dump.\nPlease check that there is no "
2327 "sensitive contents being shared in this dump.\n";
2328 SS << "\nOffending function: " << Function.getPrintName() << "\n\n";
2329 ScopedPrinter SP(SS);
2330 SP.printBinaryBlock(Label: "Function contents", Value: *Function.getData());
2331 SS << "\n";
2332 const_cast<BinaryFunction &>(Function).print(OS&: SS, Annotation: "");
2333 SS << "ERROR: " << Message;
2334 SS << "\n=======================================\n";
2335 return Msg;
2336}
2337
2338BinaryFunction *
2339BinaryContext::createInjectedBinaryFunction(const std::string &Name,
2340 bool IsSimple) {
2341 InjectedBinaryFunctions.push_back(x: new BinaryFunction(Name, *this, IsSimple));
2342 BinaryFunction *BF = InjectedBinaryFunctions.back();
2343 setSymbolToFunctionMap(Sym: BF->getSymbol(), BF);
2344 BF->CurrentState = BinaryFunction::State::CFG;
2345 return BF;
2346}
2347
2348std::pair<size_t, size_t>
2349BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
2350 // Adjust branch instruction to match the current layout.
2351 if (FixBranches)
2352 BF.fixBranches();
2353
2354 // Create local MC context to isolate the effect of ephemeral code emission.
2355 IndependentCodeEmitter MCEInstance = createIndependentMCCodeEmitter();
2356 MCContext *LocalCtx = MCEInstance.LocalCtx.get();
2357 MCAsmBackend *MAB =
2358 TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions());
2359
2360 SmallString<256> Code;
2361 raw_svector_ostream VecOS(Code);
2362
2363 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS&: VecOS);
2364 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
2365 T: *TheTriple, Ctx&: *LocalCtx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW),
2366 Emitter: std::unique_ptr<MCCodeEmitter>(MCEInstance.MCE.release()), STI: *STI,
2367 /*RelaxAll=*/false,
2368 /*IncrementalLinkerCompatible=*/false,
2369 /*DWARFMustBeAtTheEnd=*/false));
2370
2371 Streamer->initSections(NoExecStack: false, STI: *STI);
2372
2373 MCSection *Section = MCEInstance.LocalMOFI->getTextSection();
2374 Section->setHasInstructions(true);
2375
2376 // Create symbols in the LocalCtx so that they get destroyed with it.
2377 MCSymbol *StartLabel = LocalCtx->createTempSymbol();
2378 MCSymbol *EndLabel = LocalCtx->createTempSymbol();
2379
2380 Streamer->switchSection(Section);
2381 Streamer->emitLabel(Symbol: StartLabel);
2382 emitFunctionBody(Streamer&: *Streamer, BF, FF&: BF.getLayout().getMainFragment(),
2383 /*EmitCodeOnly=*/true);
2384 Streamer->emitLabel(Symbol: EndLabel);
2385
2386 using LabelRange = std::pair<const MCSymbol *, const MCSymbol *>;
2387 SmallVector<LabelRange> SplitLabels;
2388 for (FunctionFragment &FF : BF.getLayout().getSplitFragments()) {
2389 MCSymbol *const SplitStartLabel = LocalCtx->createTempSymbol();
2390 MCSymbol *const SplitEndLabel = LocalCtx->createTempSymbol();
2391 SplitLabels.emplace_back(Args: SplitStartLabel, Args: SplitEndLabel);
2392
2393 MCSectionELF *const SplitSection = LocalCtx->getELFSection(
2394 Section: BF.getCodeSectionName(Fragment: FF.getFragmentNum()), Type: ELF::SHT_PROGBITS,
2395 Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
2396 SplitSection->setHasInstructions(true);
2397 Streamer->switchSection(Section: SplitSection);
2398
2399 Streamer->emitLabel(Symbol: SplitStartLabel);
2400 emitFunctionBody(Streamer&: *Streamer, BF, FF, /*EmitCodeOnly=*/true);
2401 Streamer->emitLabel(Symbol: SplitEndLabel);
2402 // To avoid calling MCObjectStreamer::flushPendingLabels() which is
2403 // private
2404 Streamer->emitBytes(Data: StringRef(""));
2405 Streamer->switchSection(Section);
2406 }
2407
2408 // To avoid calling MCObjectStreamer::flushPendingLabels() which is private or
2409 // MCStreamer::Finish(), which does more than we want
2410 Streamer->emitBytes(Data: StringRef(""));
2411
2412 MCAssembler &Assembler =
2413 static_cast<MCObjectStreamer *>(Streamer.get())->getAssembler();
2414 MCAsmLayout Layout(Assembler);
2415 Assembler.layout(Layout);
2416
2417 // Obtain fragment sizes.
2418 std::vector<uint64_t> FragmentSizes;
2419 // Main fragment size.
2420 const uint64_t HotSize =
2421 Layout.getSymbolOffset(S: *EndLabel) - Layout.getSymbolOffset(S: *StartLabel);
2422 FragmentSizes.push_back(x: HotSize);
2423 // Split fragment sizes.
2424 uint64_t ColdSize = 0;
2425 for (const auto &Labels : SplitLabels) {
2426 uint64_t Size = Layout.getSymbolOffset(S: *Labels.second) -
2427 Layout.getSymbolOffset(S: *Labels.first);
2428 FragmentSizes.push_back(x: Size);
2429 ColdSize += Size;
2430 }
2431
2432 // Populate new start and end offsets of each basic block.
2433 uint64_t FragmentIndex = 0;
2434 for (FunctionFragment &FF : BF.getLayout().fragments()) {
2435 BinaryBasicBlock *PrevBB = nullptr;
2436 for (BinaryBasicBlock *BB : FF) {
2437 const uint64_t BBStartOffset = Layout.getSymbolOffset(S: *(BB->getLabel()));
2438 BB->setOutputStartAddress(BBStartOffset);
2439 if (PrevBB)
2440 PrevBB->setOutputEndAddress(BBStartOffset);
2441 PrevBB = BB;
2442 }
2443 if (PrevBB)
2444 PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
2445 FragmentIndex++;
2446 }
2447
2448 // Clean-up the effect of the code emission.
2449 for (const MCSymbol &Symbol : Assembler.symbols()) {
2450 MCSymbol *MutableSymbol = const_cast<MCSymbol *>(&Symbol);
2451 MutableSymbol->setUndefined();
2452 MutableSymbol->setIsRegistered(false);
2453 }
2454
2455 return std::make_pair(x: HotSize, y&: ColdSize);
2456}
2457
2458bool BinaryContext::validateInstructionEncoding(
2459 ArrayRef<uint8_t> InputSequence) const {
2460 MCInst Inst;
2461 uint64_t InstSize;
2462 DisAsm->getInstruction(Instr&: Inst, Size&: InstSize, Bytes: InputSequence, Address: 0, CStream&: nulls());
2463 assert(InstSize == InputSequence.size() &&
2464 "Disassembled instruction size does not match the sequence.");
2465
2466 SmallString<256> Code;
2467 SmallVector<MCFixup, 4> Fixups;
2468
2469 MCE->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI);
2470 auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
2471 if (InputSequence != OutputSequence) {
2472 if (opts::Verbosity > 1) {
2473 this->errs() << "BOLT-WARNING: mismatched encoding detected\n"
2474 << " input: " << InputSequence << '\n'
2475 << " output: " << OutputSequence << '\n';
2476 }
2477 return false;
2478 }
2479
2480 return true;
2481}
2482
2483uint64_t BinaryContext::getHotThreshold() const {
2484 static uint64_t Threshold = 0;
2485 if (Threshold == 0) {
2486 Threshold = std::max(
2487 a: (uint64_t)opts::ExecutionCountThreshold,
2488 b: NumProfiledFuncs ? SumExecutionCount / (2 * NumProfiledFuncs) : 1);
2489 }
2490 return Threshold;
2491}
2492
2493BinaryFunction *BinaryContext::getBinaryFunctionContainingAddress(
2494 uint64_t Address, bool CheckPastEnd, bool UseMaxSize) {
2495 auto FI = BinaryFunctions.upper_bound(x: Address);
2496 if (FI == BinaryFunctions.begin())
2497 return nullptr;
2498 --FI;
2499
2500 const uint64_t UsedSize =
2501 UseMaxSize ? FI->second.getMaxSize() : FI->second.getSize();
2502
2503 if (Address >= FI->first + UsedSize + (CheckPastEnd ? 1 : 0))
2504 return nullptr;
2505
2506 return &FI->second;
2507}
2508
2509BinaryFunction *BinaryContext::getBinaryFunctionAtAddress(uint64_t Address) {
2510 // First, try to find a function starting at the given address. If the
2511 // function was folded, this will get us the original folded function if it
2512 // wasn't removed from the list, e.g. in non-relocation mode.
2513 auto BFI = BinaryFunctions.find(x: Address);
2514 if (BFI != BinaryFunctions.end())
2515 return &BFI->second;
2516
2517 // We might have folded the function matching the object at the given
2518 // address. In such case, we look for a function matching the symbol
2519 // registered at the original address. The new function (the one that the
2520 // original was folded into) will hold the symbol.
2521 if (const BinaryData *BD = getBinaryDataAtAddress(Address)) {
2522 uint64_t EntryID = 0;
2523 BinaryFunction *BF = getFunctionForSymbol(Symbol: BD->getSymbol(), EntryDesc: &EntryID);
2524 if (BF && EntryID == 0)
2525 return BF;
2526 }
2527 return nullptr;
2528}
2529
2530DebugAddressRangesVector BinaryContext::translateModuleAddressRanges(
2531 const DWARFAddressRangesVector &InputRanges) const {
2532 DebugAddressRangesVector OutputRanges;
2533
2534 for (const DWARFAddressRange Range : InputRanges) {
2535 auto BFI = BinaryFunctions.lower_bound(x: Range.LowPC);
2536 while (BFI != BinaryFunctions.end()) {
2537 const BinaryFunction &Function = BFI->second;
2538 if (Function.getAddress() >= Range.HighPC)
2539 break;
2540 const DebugAddressRangesVector FunctionRanges =
2541 Function.getOutputAddressRanges();
2542 llvm::move(Range: FunctionRanges, Out: std::back_inserter(x&: OutputRanges));
2543 std::advance(i&: BFI, n: 1);
2544 }
2545 }
2546
2547 return OutputRanges;
2548}
2549
2550} // namespace bolt
2551} // namespace llvm
2552

source code of bolt/lib/Core/BinaryContext.cpp