1//===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Context for processing binary executable/library files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef BOLT_CORE_BINARY_CONTEXT_H
14#define BOLT_CORE_BINARY_CONTEXT_H
15
16#include "bolt/Core/AddressMap.h"
17#include "bolt/Core/BinaryData.h"
18#include "bolt/Core/BinarySection.h"
19#include "bolt/Core/DebugData.h"
20#include "bolt/Core/JumpTable.h"
21#include "bolt/Core/MCPlusBuilder.h"
22#include "bolt/RuntimeLibs/RuntimeLibrary.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/iterator.h"
26#include "llvm/BinaryFormat/Dwarf.h"
27#include "llvm/BinaryFormat/MachO.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCCodeEmitter.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCObjectFileInfo.h"
32#include "llvm/MC/MCObjectWriter.h"
33#include "llvm/MC/MCSectionELF.h"
34#include "llvm/MC/MCSectionMachO.h"
35#include "llvm/MC/MCStreamer.h"
36#include "llvm/MC/MCSymbol.h"
37#include "llvm/MC/TargetRegistry.h"
38#include "llvm/Support/ErrorOr.h"
39#include "llvm/Support/RWMutex.h"
40#include "llvm/Support/raw_ostream.h"
41#include "llvm/TargetParser/Triple.h"
42#include <functional>
43#include <list>
44#include <map>
45#include <optional>
46#include <set>
47#include <string>
48#include <system_error>
49#include <type_traits>
50#include <unordered_map>
51#include <vector>
52
53namespace llvm {
54class MCDisassembler;
55class MCInstPrinter;
56
57using namespace object;
58
59namespace bolt {
60
61class BinaryFunction;
62
63/// Information on loadable part of the file.
64struct SegmentInfo {
65 uint64_t Address; /// Address of the segment in memory.
66 uint64_t Size; /// Size of the segment in memory.
67 uint64_t FileOffset; /// Offset in the file.
68 uint64_t FileSize; /// Size in file.
69 uint64_t Alignment; /// Alignment of the segment.
70
71 void print(raw_ostream &OS) const {
72 OS << "SegmentInfo { Address: 0x"
73 << Twine::utohexstr(Val: Address) << ", Size: 0x"
74 << Twine::utohexstr(Val: Size) << ", FileOffset: 0x"
75 << Twine::utohexstr(Val: FileOffset) << ", FileSize: 0x"
76 << Twine::utohexstr(Val: FileSize) << ", Alignment: 0x"
77 << Twine::utohexstr(Val: Alignment) << "}";
78 };
79};
80
81inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
82 SegInfo.print(OS);
83 return OS;
84}
85
86// AArch64-specific symbol markers used to delimit code/data in .text.
87enum class MarkerSymType : char {
88 NONE = 0,
89 CODE,
90 DATA,
91};
92
93enum class MemoryContentsType : char {
94 UNKNOWN = 0, /// Unknown contents.
95 POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
96 POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
97};
98
99/// Helper function to truncate a \p Value to given size in \p Bytes.
100inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
101 return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
102}
103
104/// Filter iterator.
105template <typename ItrType,
106 typename PredType = std::function<bool(const ItrType &)>>
107class FilterIterator {
108 using inner_traits = std::iterator_traits<ItrType>;
109 using Iterator = FilterIterator;
110
111 PredType Pred;
112 ItrType Itr, End;
113
114 void prev() {
115 while (!Pred(--Itr))
116 ;
117 }
118 void next() {
119 ++Itr;
120 nextMatching();
121 }
122 void nextMatching() {
123 while (Itr != End && !Pred(Itr))
124 ++Itr;
125 }
126
127public:
128 using iterator_category = std::bidirectional_iterator_tag;
129 using value_type = typename inner_traits::value_type;
130 using difference_type = typename inner_traits::difference_type;
131 using pointer = typename inner_traits::pointer;
132 using reference = typename inner_traits::reference;
133
134 Iterator &operator++() { next(); return *this; }
135 Iterator &operator--() { prev(); return *this; }
136 Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
137 Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
138 bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
139 bool operator!=(const Iterator &Other) const { return !operator==(Other); }
140 reference operator*() { return *Itr; }
141 pointer operator->() { return &operator*(); }
142 FilterIterator(PredType Pred, ItrType Itr, ItrType End)
143 : Pred(Pred), Itr(Itr), End(End) {
144 nextMatching();
145 }
146};
147
148/// BOLT-exclusive errors generated in core BOLT libraries, optionally holding a
149/// string message and whether it is fatal or not. In case it is fatal and if
150/// BOLT is running as a standalone process, the process might be killed as soon
151/// as the error is checked.
152class BOLTError : public ErrorInfo<BOLTError> {
153public:
154 static char ID;
155
156 BOLTError(bool IsFatal, const Twine &S = Twine());
157 void log(raw_ostream &OS) const override;
158 bool isFatal() const { return IsFatal; }
159
160 const std::string &getMessage() const { return Msg; }
161 std::error_code convertToErrorCode() const override;
162
163private:
164 bool IsFatal;
165 std::string Msg;
166};
167
168/// Streams used by BOLT to log regular or error events
169struct JournalingStreams {
170 raw_ostream &Out;
171 raw_ostream &Err;
172};
173
174Error createNonFatalBOLTError(const Twine &S);
175Error createFatalBOLTError(const Twine &S);
176
177class BinaryContext {
178 BinaryContext() = delete;
179
180 /// Name of the binary file the context originated from.
181 std::string Filename;
182
183 /// Unique build ID if available for the binary.
184 std::optional<std::string> FileBuildID;
185
186 /// Set of all sections.
187 struct CompareSections {
188 bool operator()(const BinarySection *A, const BinarySection *B) const {
189 return *A < *B;
190 }
191 };
192 using SectionSetType = std::set<BinarySection *, CompareSections>;
193 SectionSetType Sections;
194
195 using SectionIterator = pointee_iterator<SectionSetType::iterator>;
196 using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
197
198 using FilteredSectionIterator = FilterIterator<SectionIterator>;
199 using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
200
201 /// Map virtual address to a section. It is possible to have more than one
202 /// section mapped to the same address, e.g. non-allocatable sections.
203 using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
204 AddressToSectionMapType AddressToSection;
205
206 /// multimap of section name to BinarySection object. Some binaries
207 /// have multiple sections with the same name.
208 using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
209 NameToSectionMapType NameToSection;
210
211 /// Map section references to BinarySection for matching sections in the
212 /// input file to internal section representation.
213 DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
214
215 /// Low level section registration.
216 BinarySection &registerSection(BinarySection *Section);
217
218 /// Store all functions in the binary, sorted by original address.
219 std::map<uint64_t, BinaryFunction> BinaryFunctions;
220
221 /// A mutex that is used to control parallel accesses to BinaryFunctions
222 mutable llvm::sys::RWMutex BinaryFunctionsMutex;
223
224 /// Functions injected by BOLT
225 std::vector<BinaryFunction *> InjectedBinaryFunctions;
226
227 /// Jump tables for all functions mapped by address.
228 std::map<uint64_t, JumpTable *> JumpTables;
229
230 /// Locations of PC-relative relocations in data objects.
231 std::unordered_set<uint64_t> DataPCRelocations;
232
233 /// Used in duplicateJumpTable() to uniquely identify a JT clone
234 /// Start our IDs with a high number so getJumpTableContainingAddress checks
235 /// with size won't overflow
236 uint32_t DuplicatedJumpTables{0x10000000};
237
238 /// Function fragments to skip.
239 std::unordered_set<BinaryFunction *> FragmentsToSkip;
240
241 /// The runtime library.
242 std::unique_ptr<RuntimeLibrary> RtLibrary;
243
244 /// DWP Context.
245 std::shared_ptr<DWARFContext> DWPContext;
246
247 /// A map of DWO Ids to CUs.
248 using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
249 DWOIdToCUMapType DWOCUs;
250
251 bool ContainsDwarf5{false};
252 bool ContainsDwarfLegacy{false};
253
254 /// Mapping from input to output addresses.
255 std::optional<AddressMap> IOAddressMap;
256
257 /// Preprocess DWO debug information.
258 void preprocessDWODebugInfo();
259
260 /// DWARF line info for CUs.
261 std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
262
263 /// Internal helper for removing section name from a lookup table.
264 void deregisterSectionName(const BinarySection &Section);
265
266public:
267 static Expected<std::unique_ptr<BinaryContext>>
268 createBinaryContext(Triple TheTriple, StringRef InputFileName,
269 SubtargetFeatures *Features, bool IsPIC,
270 std::unique_ptr<DWARFContext> DwCtx,
271 JournalingStreams Logger);
272
273 /// Superset of compiler units that will contain overwritten code that needs
274 /// new debug info. In a few cases, functions may end up not being
275 /// overwritten, but it is okay to re-generate debug info for them.
276 std::set<const DWARFUnit *> ProcessedCUs;
277
278 // Setup MCPlus target builder
279 void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
280 MIB = std::move(TargetBuilder);
281 }
282
283 /// Return function fragments to skip.
284 const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
285 return FragmentsToSkip;
286 }
287
288 /// Add function fragment to skip
289 void addFragmentsToSkip(BinaryFunction *Function) {
290 FragmentsToSkip.insert(x: Function);
291 }
292
293 void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
294
295 /// Given DWOId returns CU if it exists in DWOCUs.
296 std::optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
297
298 /// Returns DWOContext if it exists.
299 DWARFContext *getDWOContext() const;
300
301 /// Get Number of DWOCUs in a map.
302 uint32_t getNumDWOCUs() { return DWOCUs.size(); }
303
304 /// Returns true if DWARF5 is used.
305 bool isDWARF5Used() const { return ContainsDwarf5; }
306
307 /// Returns true if DWARF4 or lower is used.
308 bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
309
310 std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
311 return DwarfLineTablesCUMap;
312 }
313
314 DwarfLineTable &getDwarfLineTable(unsigned CUID) {
315 return DwarfLineTablesCUMap[CUID];
316 }
317
318 Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
319 unsigned FileNumber,
320 std::optional<MD5::MD5Result> Checksum,
321 std::optional<StringRef> Source,
322 unsigned CUID, unsigned DWARFVersion);
323
324 /// [start memory address] -> [segment info] mapping.
325 std::map<uint64_t, SegmentInfo> SegmentMapInfo;
326
327 /// Symbols that are expected to be undefined in MCContext during emission.
328 std::unordered_set<MCSymbol *> UndefinedSymbols;
329
330 /// [name] -> [BinaryData*] map used for global symbol resolution.
331 using SymbolMapType = StringMap<BinaryData *>;
332 SymbolMapType GlobalSymbols;
333
334 /// [address] -> [BinaryData], ...
335 /// Addresses never change.
336 /// Note: it is important that clients do not hold on to instances of
337 /// BinaryData* while the map is still being modified during BinaryFunction
338 /// disassembly. This is because of the possibility that a regular
339 /// BinaryData is later discovered to be a JumpTable.
340 using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
341 using binary_data_iterator = BinaryDataMapType::iterator;
342 using binary_data_const_iterator = BinaryDataMapType::const_iterator;
343 BinaryDataMapType BinaryDataMap;
344
345 using FilteredBinaryDataConstIterator =
346 FilterIterator<binary_data_const_iterator>;
347 using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
348
349 StringRef getFilename() const { return Filename; }
350 void setFilename(StringRef Name) { Filename = std::string(Name); }
351
352 std::optional<StringRef> getFileBuildID() const {
353 if (FileBuildID)
354 return StringRef(*FileBuildID);
355
356 return std::nullopt;
357 }
358 void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
359
360 bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
361 void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
362
363 /// Return true if relocations against symbol with a given name
364 /// must be created.
365 bool forceSymbolRelocations(StringRef SymbolName) const;
366
367 uint64_t getNumUnusedProfiledObjects() const {
368 return NumUnusedProfiledObjects;
369 }
370 void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
371
372 RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
373 void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
374 assert(!RtLibrary && "Cannot set runtime library twice.");
375 RtLibrary = std::move(Lib);
376 }
377
378 /// Return BinaryFunction containing a given \p Address or nullptr if
379 /// no registered function contains the \p Address.
380 ///
381 /// In a binary a function has somewhat vague boundaries. E.g. a function can
382 /// refer to the first byte past the end of the function, and it will still be
383 /// referring to this function, not the function following it in the address
384 /// space. Thus we have the following flags that allow to lookup for
385 /// a function where a caller has more context for the search.
386 ///
387 /// If \p CheckPastEnd is true and the \p Address falls on a byte
388 /// immediately following the last byte of some function and there's no other
389 /// function that starts there, then return the function as the one containing
390 /// the \p Address. This is useful when we need to locate functions for
391 /// references pointing immediately past a function body.
392 ///
393 /// If \p UseMaxSize is true, then include the space between this function
394 /// body and the next object in address ranges that we check.
395 BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
396 bool CheckPastEnd = false,
397 bool UseMaxSize = false);
398 const BinaryFunction *
399 getBinaryFunctionContainingAddress(uint64_t Address,
400 bool CheckPastEnd = false,
401 bool UseMaxSize = false) const {
402 return const_cast<BinaryContext *>(this)
403 ->getBinaryFunctionContainingAddress(Address, CheckPastEnd, UseMaxSize);
404 }
405
406 /// Return a BinaryFunction that starts at a given \p Address.
407 BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
408
409 const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
410 return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
411 Address);
412 }
413
414 /// Return size of an entry for the given jump table \p Type.
415 uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
416 return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
417 }
418
419 /// Return JumpTable containing a given \p Address.
420 JumpTable *getJumpTableContainingAddress(uint64_t Address) {
421 auto JTI = JumpTables.upper_bound(x: Address);
422 if (JTI == JumpTables.begin())
423 return nullptr;
424 --JTI;
425 if (JTI->first + JTI->second->getSize() > Address)
426 return JTI->second;
427 if (JTI->second->getSize() == 0 && JTI->first == Address)
428 return JTI->second;
429 return nullptr;
430 }
431
432 unsigned getDWARFEncodingSize(unsigned Encoding) {
433 if (Encoding == dwarf::DW_EH_PE_omit)
434 return 0;
435 switch (Encoding & 0x0f) {
436 default:
437 llvm_unreachable("unknown encoding");
438 case dwarf::DW_EH_PE_absptr:
439 case dwarf::DW_EH_PE_signed:
440 return AsmInfo->getCodePointerSize();
441 case dwarf::DW_EH_PE_udata2:
442 case dwarf::DW_EH_PE_sdata2:
443 return 2;
444 case dwarf::DW_EH_PE_udata4:
445 case dwarf::DW_EH_PE_sdata4:
446 return 4;
447 case dwarf::DW_EH_PE_udata8:
448 case dwarf::DW_EH_PE_sdata8:
449 return 8;
450 }
451 }
452
453 /// [MCSymbol] -> [BinaryFunction]
454 ///
455 /// As we fold identical functions, multiple symbols can point
456 /// to the same BinaryFunction.
457 std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
458
459 /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
460 mutable llvm::sys::RWMutex SymbolToFunctionMapMutex;
461
462 /// Look up the symbol entry that contains the given \p Address (based on
463 /// the start address and size for each symbol). Returns a pointer to
464 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
465 const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
466
467 /// Update the Parent fields in BinaryDatas after adding a new entry into
468 /// \p BinaryDataMap.
469 void updateObjectNesting(BinaryDataMapType::iterator GAI);
470
471 /// Validate that if object address ranges overlap that the object with
472 /// the larger range is a parent of the object with the smaller range.
473 bool validateObjectNesting() const;
474
475 /// Validate that there are no top level "holes" in each section
476 /// and that all relocations with a section are mapped to a valid
477 /// top level BinaryData.
478 bool validateHoles() const;
479
480 /// Produce output address ranges based on input ranges for some module.
481 DebugAddressRangesVector translateModuleAddressRanges(
482 const DWARFAddressRangesVector &InputRanges) const;
483
484 /// Get a bogus "absolute" section that will be associated with all
485 /// absolute BinaryDatas.
486 BinarySection &absoluteSection();
487
488 /// Process "holes" in between known BinaryData objects. For now,
489 /// symbols are padded with the space before the next BinaryData object.
490 void fixBinaryDataHoles();
491
492 /// Generate names based on data hashes for unknown symbols.
493 void generateSymbolHashes();
494
495 /// Construct BinaryFunction object and add it to internal maps.
496 BinaryFunction *createBinaryFunction(const std::string &Name,
497 BinarySection &Section, uint64_t Address,
498 uint64_t Size, uint64_t SymbolSize = 0,
499 uint16_t Alignment = 0);
500
501 /// Return all functions for this rewrite instance.
502 std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
503 return BinaryFunctions;
504 }
505
506 /// Return all functions for this rewrite instance.
507 const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
508 return BinaryFunctions;
509 }
510
511 /// Create BOLT-injected function
512 BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
513 bool IsSimple = true);
514
515 std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
516 return InjectedBinaryFunctions;
517 }
518
519 /// Return vector with all functions, i.e. include functions from the input
520 /// binary and functions created by BOLT.
521 std::vector<BinaryFunction *> getAllBinaryFunctions();
522
523 /// Construct a jump table for \p Function at \p Address or return an existing
524 /// one at that location.
525 ///
526 /// May create an embedded jump table and return its label as the second
527 /// element of the pair.
528 const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
529 uint64_t Address,
530 JumpTable::JumpTableType Type);
531
532 /// Analyze a possible jump table of type \p Type at a given \p Address.
533 /// \p BF is a function referencing the jump table.
534 /// Return true if the jump table was detected at \p Address, and false
535 /// otherwise.
536 ///
537 /// If \p NextJTAddress is different from zero, it is used as an upper
538 /// bound for jump table memory layout.
539 ///
540 /// Optionally, populate \p Address from jump table entries. The entries
541 /// could be partially populated if the jump table detection fails.
542 bool analyzeJumpTable(const uint64_t Address,
543 const JumpTable::JumpTableType Type,
544 const BinaryFunction &BF,
545 const uint64_t NextJTAddress = 0,
546 JumpTable::AddressesType *EntriesAsAddress = nullptr,
547 bool *HasEntryInFragment = nullptr) const;
548
549 /// After jump table locations are established, this function will populate
550 /// their EntriesAsAddress based on memory contents.
551 void populateJumpTables();
552
553 /// Returns a jump table ID and label pointing to the duplicated jump table.
554 /// Ordinarily, jump tables are identified by their address in the input
555 /// binary. We return an ID with the high bit set to differentiate it from
556 /// regular addresses, avoiding conflicts with standard jump tables.
557 std::pair<uint64_t, const MCSymbol *>
558 duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
559 const MCSymbol *OldLabel);
560
561 /// Generate a unique name for jump table at a given \p Address belonging
562 /// to function \p BF.
563 std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
564
565 /// Free memory used by JumpTable's EntriesAsAddress
566 void clearJumpTableTempData() {
567 for (auto &JTI : JumpTables) {
568 JumpTable &JT = *JTI.second;
569 JumpTable::AddressesType Temp;
570 Temp.swap(x&: JT.EntriesAsAddress);
571 }
572 }
573 /// Return true if the array of bytes represents a valid code padding.
574 bool hasValidCodePadding(const BinaryFunction &BF);
575
576 /// Verify padding area between functions, and adjust max function size
577 /// accordingly.
578 void adjustCodePadding();
579
580 /// Regular page size.
581 unsigned RegularPageSize{0x1000};
582 static constexpr unsigned RegularPageSizeX86 = 0x1000;
583 static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
584
585 /// Huge page size to use.
586 static constexpr unsigned HugePageSize = 0x200000;
587
588 /// Addresses reserved for kernel on x86_64 start at this location.
589 static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000;
590
591 /// Map address to a constant island owner (constant data in code section)
592 std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
593
594 /// A map from jump table address to insertion order. Used for generating
595 /// jump table names.
596 std::map<uint64_t, size_t> JumpTableIds;
597
598 std::unique_ptr<MCContext> Ctx;
599
600 /// A mutex that is used to control parallel accesses to Ctx
601 mutable llvm::sys::RWMutex CtxMutex;
602 std::unique_lock<llvm::sys::RWMutex> scopeLock() const {
603 return std::unique_lock<llvm::sys::RWMutex>(CtxMutex);
604 }
605
606 std::unique_ptr<DWARFContext> DwCtx;
607
608 std::unique_ptr<Triple> TheTriple;
609
610 const Target *TheTarget;
611
612 std::string TripleName;
613
614 std::unique_ptr<MCCodeEmitter> MCE;
615
616 std::unique_ptr<MCObjectFileInfo> MOFI;
617
618 std::unique_ptr<const MCAsmInfo> AsmInfo;
619
620 std::unique_ptr<const MCInstrInfo> MII;
621
622 std::unique_ptr<const MCSubtargetInfo> STI;
623
624 std::unique_ptr<MCInstPrinter> InstPrinter;
625
626 std::unique_ptr<const MCInstrAnalysis> MIA;
627
628 std::unique_ptr<MCPlusBuilder> MIB;
629
630 std::unique_ptr<const MCRegisterInfo> MRI;
631
632 std::unique_ptr<MCDisassembler> DisAsm;
633
634 /// Symbolic disassembler.
635 std::unique_ptr<MCDisassembler> SymbolicDisAsm;
636
637 std::unique_ptr<MCAsmBackend> MAB;
638
639 /// Allows BOLT to print to log whenever it is necessary (with or without
640 /// const references)
641 mutable JournalingStreams Logger;
642
643 /// Indicates if the binary is Linux kernel.
644 bool IsLinuxKernel{false};
645
646 /// Indicates if relocations are available for usage.
647 bool HasRelocations{false};
648
649 /// Indicates if the binary is stripped
650 bool IsStripped{false};
651
652 /// Indicates if the binary contains split functions.
653 bool HasSplitFunctions{false};
654
655 /// Indicates if the function ordering of the binary is finalized.
656 bool HasFinalizedFunctionOrder{false};
657
658 /// Indicates if a separate .text.warm section is needed that contains
659 /// function fragments with
660 /// FunctionFragment::getFragmentNum() == FragmentNum::warm()
661 bool HasWarmSection{false};
662
663 /// Is the binary always loaded at a fixed address. Shared objects and
664 /// position-independent executables (PIEs) are examples of binaries that
665 /// will have HasFixedLoadAddress set to false.
666 bool HasFixedLoadAddress{true};
667
668 /// True if the binary has no dynamic dependencies, i.e., if it was statically
669 /// linked.
670 bool IsStaticExecutable{false};
671
672 /// Set to true if the binary contains PT_INTERP header.
673 bool HasInterpHeader{false};
674
675 /// Indicates if any of local symbols used for functions or data objects
676 /// have an origin file name available.
677 bool HasSymbolsWithFileName{false};
678
679 /// Sum of execution count of all functions
680 uint64_t SumExecutionCount{0};
681
682 /// Number of functions with profile information
683 uint64_t NumProfiledFuncs{0};
684
685 /// Number of functions with stale profile information
686 uint64_t NumStaleProfileFuncs{0};
687
688 /// Number of objects in profile whose profile was ignored.
689 uint64_t NumUnusedProfiledObjects{0};
690
691 /// Total hotness score according to profiling data for this binary.
692 uint64_t TotalScore{0};
693
694 /// Binary-wide aggregated stats.
695 struct BinaryStats {
696 /// Stats for macro-fusion.
697 uint64_t MissedMacroFusionPairs{0};
698 uint64_t MissedMacroFusionExecCount{0};
699
700 /// Stats for stale profile matching:
701 /// the total number of basic blocks in the profile
702 uint32_t NumStaleBlocks{0};
703 /// the number of matched basic blocks
704 uint32_t NumMatchedBlocks{0};
705 /// the total count of samples in the profile
706 uint64_t StaleSampleCount{0};
707 /// the count of matched samples
708 uint64_t MatchedSampleCount{0};
709 /// the number of stale functions that have matching number of blocks in
710 /// the profile
711 uint64_t NumStaleFuncsWithEqualBlockCount{0};
712 /// the number of blocks that have matching size but a differing hash
713 uint64_t NumStaleBlocksWithEqualIcount{0};
714 } Stats;
715
716 // Address of the first allocated segment.
717 uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
718
719 /// Track next available address for new allocatable sections. RewriteInstance
720 /// sets this prior to running BOLT passes, so layout passes are aware of the
721 /// final addresses functions will have.
722 uint64_t LayoutStartAddress{0};
723
724 /// Old .text info.
725 uint64_t OldTextSectionAddress{0};
726 uint64_t OldTextSectionOffset{0};
727 uint64_t OldTextSectionSize{0};
728
729 /// Address of the code/function that is executed before any other code in
730 /// the binary.
731 std::optional<uint64_t> StartFunctionAddress;
732
733 /// Address of the code/function that is going to be executed right before
734 /// the execution of the binary is completed.
735 std::optional<uint64_t> FiniFunctionAddress;
736
737 /// DT_FINI.
738 std::optional<uint64_t> FiniAddress;
739
740 /// DT_FINI_ARRAY. Only used when DT_FINI is not set.
741 std::optional<uint64_t> FiniArrayAddress;
742
743 /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
744 std::optional<uint64_t> FiniArraySize;
745
746 /// Page alignment used for code layout.
747 uint64_t PageAlign{HugePageSize};
748
749 /// True if the binary requires immediate relocation processing.
750 bool RequiresZNow{false};
751
752 /// List of functions that always trap.
753 std::vector<const BinaryFunction *> TrappedFunctions;
754
755 /// List of external addresses in the code that are not a function start
756 /// and are referenced from BinaryFunction.
757 std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
758
759 /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
760 /// enum Constants, e.g. DW_EH_PE_omit.
761 unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
762
763 BinaryContext(std::unique_ptr<MCContext> Ctx,
764 std::unique_ptr<DWARFContext> DwCtx,
765 std::unique_ptr<Triple> TheTriple, const Target *TheTarget,
766 std::string TripleName, std::unique_ptr<MCCodeEmitter> MCE,
767 std::unique_ptr<MCObjectFileInfo> MOFI,
768 std::unique_ptr<const MCAsmInfo> AsmInfo,
769 std::unique_ptr<const MCInstrInfo> MII,
770 std::unique_ptr<const MCSubtargetInfo> STI,
771 std::unique_ptr<MCInstPrinter> InstPrinter,
772 std::unique_ptr<const MCInstrAnalysis> MIA,
773 std::unique_ptr<MCPlusBuilder> MIB,
774 std::unique_ptr<const MCRegisterInfo> MRI,
775 std::unique_ptr<MCDisassembler> DisAsm,
776 JournalingStreams Logger);
777
778 ~BinaryContext();
779
780 std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
781
782 bool isELF() const { return TheTriple->isOSBinFormatELF(); }
783
784 bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
785
786 bool isAArch64() const {
787 return TheTriple->getArch() == llvm::Triple::aarch64;
788 }
789
790 bool isX86() const {
791 return TheTriple->getArch() == llvm::Triple::x86 ||
792 TheTriple->getArch() == llvm::Triple::x86_64;
793 }
794
795 bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
796
797 // AArch64-specific functions to check if symbol is used to delimit
798 // code/data in .text. Code is marked by $x, data by $d.
799 MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
800 bool isMarker(const SymbolRef &Symbol) const;
801
802 /// Iterate over all BinaryData.
803 iterator_range<binary_data_const_iterator> getBinaryData() const {
804 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
805 }
806
807 /// Iterate over all BinaryData.
808 iterator_range<binary_data_iterator> getBinaryData() {
809 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
810 }
811
812 /// Iterate over all BinaryData associated with the given \p Section.
813 iterator_range<FilteredBinaryDataConstIterator>
814 getBinaryDataForSection(const BinarySection &Section) const {
815 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
816 if (Begin != BinaryDataMap.begin())
817 --Begin;
818 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
819 auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
820 return Itr->second->getSection() == Section;
821 };
822 return make_range(x: FilteredBinaryDataConstIterator(pred, Begin, End),
823 y: FilteredBinaryDataConstIterator(pred, End, End));
824 }
825
826 /// Iterate over all BinaryData associated with the given \p Section.
827 iterator_range<FilteredBinaryDataIterator>
828 getBinaryDataForSection(BinarySection &Section) {
829 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
830 if (Begin != BinaryDataMap.begin())
831 --Begin;
832 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
833 auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
834 return Itr->second->getSection() == Section;
835 };
836 return make_range(x: FilteredBinaryDataIterator(pred, Begin, End),
837 y: FilteredBinaryDataIterator(pred, End, End));
838 }
839
840 /// Iterate over all the sub-symbols of /p BD (if any).
841 iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
842
843 /// Clear the global symbol address -> name(s) map.
844 void clearBinaryData() {
845 GlobalSymbols.clear();
846 for (auto &Entry : BinaryDataMap)
847 delete Entry.second;
848 BinaryDataMap.clear();
849 }
850
851 /// Process \p Address reference from code in function \BF.
852 /// \p IsPCRel indicates if the reference is PC-relative.
853 /// Return <Symbol, Addend> pair corresponding to the \p Address.
854 std::pair<const MCSymbol *, uint64_t>
855 handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
856
857 /// Analyze memory contents at the given \p Address and return the type of
858 /// memory contents (such as a possible jump table).
859 MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
860
861 /// Return a value of the global \p Symbol or an error if the value
862 /// was not set.
863 ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
864 const BinaryData *BD = getBinaryDataByName(Name: Symbol.getName());
865 if (!BD)
866 return std::make_error_code(e: std::errc::bad_address);
867 return BD->getAddress();
868 }
869
870 /// Return a global symbol registered at a given \p Address and \p Size.
871 /// If no symbol exists, create one with unique name using \p Prefix.
872 /// If there are multiple symbols registered at the \p Address, then
873 /// return the first one.
874 MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
875 uint64_t Size = 0, uint16_t Alignment = 0,
876 unsigned Flags = 0);
877
878 /// Create a global symbol without registering an address.
879 MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
880
881 /// Register a symbol with \p Name at a given \p Address using \p Size,
882 /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
883 /// of \p Flags.
884 MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
885 uint64_t Size, uint16_t Alignment,
886 unsigned Flags = 0);
887
888 /// Return BinaryData registered at a given \p Address or nullptr if no
889 /// global symbol was registered at the location.
890 const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
891 auto NI = BinaryDataMap.find(x: Address);
892 return NI != BinaryDataMap.end() ? NI->second : nullptr;
893 }
894
895 BinaryData *getBinaryDataAtAddress(uint64_t Address) {
896 auto NI = BinaryDataMap.find(x: Address);
897 return NI != BinaryDataMap.end() ? NI->second : nullptr;
898 }
899
900 /// Look up the symbol entry that contains the given \p Address (based on
901 /// the start address and size for each symbol). Returns a pointer to
902 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
903 const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
904 return getBinaryDataContainingAddressImpl(Address);
905 }
906
907 BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
908 return const_cast<BinaryData *>(
909 getBinaryDataContainingAddressImpl(Address));
910 }
911
912 /// Return BinaryData for the given \p Name or nullptr if no
913 /// global symbol with that name exists.
914 const BinaryData *getBinaryDataByName(StringRef Name) const {
915 return GlobalSymbols.lookup(Key: Name);
916 }
917
918 BinaryData *getBinaryDataByName(StringRef Name) {
919 return GlobalSymbols.lookup(Key: Name);
920 }
921
922 /// Return registered PLT entry BinaryData with the given \p Name
923 /// or nullptr if no global PLT symbol with that name exists.
924 const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
925 if (const BinaryData *Data = getBinaryDataByName(Name: Name.str() + "@PLT"))
926 return Data;
927
928 // The symbol name might contain versioning information e.g
929 // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
930 // without it.
931 size_t At = Name.find(Str: "@");
932 if (At != std::string::npos)
933 return getBinaryDataByName(Name: Name.str().substr(pos: 0, n: At) + "@PLT");
934
935 return nullptr;
936 }
937
938 /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
939 /// at GOT, or null if it is not present in the input binary symtab.
940 BinaryData *getGOTSymbol();
941
942 /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
943 bool isGOTSymbol(StringRef SymName) const {
944 return SymName == "_GLOBAL_OFFSET_TABLE_";
945 }
946
947 /// Return true if \p SymbolName was generated internally and was not present
948 /// in the input binary.
949 bool isInternalSymbolName(const StringRef Name) {
950 return Name.starts_with(Prefix: "SYMBOLat") || Name.starts_with(Prefix: "DATAat") ||
951 Name.starts_with(Prefix: "HOLEat");
952 }
953
954 MCSymbol *getHotTextStartSymbol() const {
955 return Ctx->getOrCreateSymbol(Name: "__hot_start");
956 }
957
958 MCSymbol *getHotTextEndSymbol() const {
959 return Ctx->getOrCreateSymbol(Name: "__hot_end");
960 }
961
962 MCSection *getTextSection() const { return MOFI->getTextSection(); }
963
964 /// Return code section with a given name.
965 MCSection *getCodeSection(StringRef SectionName) const {
966 if (isELF())
967 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS,
968 Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
969 else
970 return Ctx->getMachOSection(Segment: "__TEXT", Section: SectionName,
971 TypeAndAttributes: MachO::S_ATTR_PURE_INSTRUCTIONS,
972 K: SectionKind::getText());
973 }
974
975 /// Return data section with a given name.
976 MCSection *getDataSection(StringRef SectionName) const {
977 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS, Flags: ELF::SHF_ALLOC);
978 }
979
980 /// \name Pre-assigned Section Names
981 /// @{
982
983 const char *getMainCodeSectionName() const { return ".text"; }
984
985 const char *getWarmCodeSectionName() const { return ".text.warm"; }
986
987 const char *getColdCodeSectionName() const { return ".text.cold"; }
988
989 const char *getHotTextMoverSectionName() const { return ".text.mover"; }
990
991 const char *getInjectedCodeSectionName() const { return ".text.injected"; }
992
993 const char *getInjectedColdCodeSectionName() const {
994 return ".text.injected.cold";
995 }
996
997 ErrorOr<BinarySection &> getGdbIndexSection() const {
998 return getUniqueSectionByName(SectionName: ".gdb_index");
999 }
1000
1001 ErrorOr<BinarySection &> getDebugNamesSection() const {
1002 return getUniqueSectionByName(SectionName: ".debug_names");
1003 }
1004
1005 /// @}
1006
1007 /// Register \p TargetFunction as a fragment of \p Function if checks pass:
1008 /// - if \p TargetFunction name matches \p Function name with a suffix:
1009 /// fragment_name == parent_name.cold(.\d+)?
1010 /// True if the Function is registered, false if the check failed.
1011 bool registerFragment(BinaryFunction &TargetFunction,
1012 BinaryFunction &Function) const;
1013
1014 /// Add interprocedural reference for \p Function to \p Address
1015 void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
1016 InterproceduralReferences.push_back(x: {Function, Address});
1017 }
1018
1019 /// Used to fix the target of linker-generated AArch64 adrp + add
1020 /// sequence with no relocation info.
1021 void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
1022 MCInst &LoadHiBits, uint64_t Target);
1023
1024 /// Return true if AARch64 veneer was successfully matched at a given
1025 /// \p Address and register veneer binary function if \p MatchOnly
1026 /// argument is false.
1027 bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
1028
1029 /// Resolve inter-procedural dependencies from
1030 void processInterproceduralReferences();
1031
1032 /// Skip functions with all parent and child fragments transitively.
1033 void skipMarkedFragments();
1034
1035 /// Perform any necessary post processing on the symbol table after
1036 /// function disassembly is complete. This processing fixes top
1037 /// level data holes and makes sure the symbol table is valid.
1038 /// It also assigns all memory profiling info to the appropriate
1039 /// BinaryData objects.
1040 void postProcessSymbolTable();
1041
1042 /// Set the size of the global symbol located at \p Address. Return
1043 /// false if no symbol exists, true otherwise.
1044 bool setBinaryDataSize(uint64_t Address, uint64_t Size);
1045
1046 /// Print the global symbol table.
1047 void printGlobalSymbols(raw_ostream &OS) const;
1048
1049 /// Register information about the given \p Section so we can look up
1050 /// sections by address.
1051 BinarySection &registerSection(SectionRef Section);
1052
1053 /// Register a copy of /p OriginalSection under a different name.
1054 BinarySection &registerSection(const Twine &SectionName,
1055 const BinarySection &OriginalSection);
1056
1057 /// Register or update the information for the section with the given
1058 /// /p Name. If the section already exists, the information in the
1059 /// section will be updated with the new data.
1060 BinarySection &registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1061 unsigned ELFFlags,
1062 uint8_t *Data = nullptr,
1063 uint64_t Size = 0,
1064 unsigned Alignment = 1);
1065
1066 /// Register the information for the note (non-allocatable) section
1067 /// with the given /p Name. If the section already exists, the
1068 /// information in the section will be updated with the new data.
1069 BinarySection &
1070 registerOrUpdateNoteSection(const Twine &Name, uint8_t *Data = nullptr,
1071 uint64_t Size = 0, unsigned Alignment = 1,
1072 bool IsReadOnly = true,
1073 unsigned ELFType = ELF::SHT_PROGBITS) {
1074 return registerOrUpdateSection(Name, ELFType,
1075 ELFFlags: BinarySection::getFlags(IsReadOnly), Data,
1076 Size, Alignment);
1077 }
1078
1079 /// Remove sections that were preregistered but never used.
1080 void deregisterUnusedSections();
1081
1082 /// Remove the given /p Section from the set of all sections. Return
1083 /// true if the section was removed (and deleted), otherwise false.
1084 bool deregisterSection(BinarySection &Section);
1085
1086 /// Re-register \p Section under the \p NewName.
1087 void renameSection(BinarySection &Section, const Twine &NewName);
1088
1089 /// Iterate over all registered sections.
1090 iterator_range<FilteredSectionIterator> sections() {
1091 auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
1092 return make_range(
1093 x: FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
1094 y: FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
1095 }
1096
1097 /// Iterate over all registered sections.
1098 iterator_range<FilteredSectionConstIterator> sections() const {
1099 return const_cast<BinaryContext *>(this)->sections();
1100 }
1101
1102 /// Iterate over all registered allocatable sections.
1103 iterator_range<FilteredSectionIterator> allocatableSections() {
1104 auto isAllocatable = [](const SectionIterator &Itr) {
1105 return *Itr && Itr->isAllocatable();
1106 };
1107 return make_range(
1108 x: FilteredSectionIterator(isAllocatable, Sections.begin(),
1109 Sections.end()),
1110 y: FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
1111 }
1112
1113 /// Iterate over all registered code sections.
1114 iterator_range<FilteredSectionIterator> textSections() {
1115 auto isText = [](const SectionIterator &Itr) {
1116 return *Itr && Itr->isAllocatable() && Itr->isText();
1117 };
1118 return make_range(
1119 x: FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1120 y: FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1121 }
1122
1123 /// Iterate over all registered allocatable sections.
1124 iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1125 return const_cast<BinaryContext *>(this)->allocatableSections();
1126 }
1127
1128 /// Iterate over all registered non-allocatable sections.
1129 iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1130 auto notAllocated = [](const SectionIterator &Itr) {
1131 return *Itr && !Itr->isAllocatable();
1132 };
1133 return make_range(
1134 x: FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1135 y: FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1136 }
1137
1138 /// Iterate over all registered non-allocatable sections.
1139 iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1140 return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1141 }
1142
1143 /// Iterate over all allocatable relocation sections.
1144 iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1145 auto isAllocatableRela = [](const SectionIterator &Itr) {
1146 return *Itr && Itr->isAllocatable() && Itr->isRela();
1147 };
1148 return make_range(x: FilteredSectionIterator(isAllocatableRela,
1149 Sections.begin(), Sections.end()),
1150 y: FilteredSectionIterator(isAllocatableRela, Sections.end(),
1151 Sections.end()));
1152 }
1153
1154 /// Return base address for the shared object or PIE based on the segment
1155 /// mapping information. \p MMapAddress is an address where one of the
1156 /// segments was mapped. \p FileOffset is the offset in the file of the
1157 /// mapping. Note that \p FileOffset should be page-aligned and could be
1158 /// different from the file offset of the segment which could be unaligned.
1159 /// If no segment is found that matches \p FileOffset, return std::nullopt.
1160 std::optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1161 uint64_t FileOffset) const;
1162
1163 /// Check if the address belongs to this binary's static allocation space.
1164 bool containsAddress(uint64_t Address) const {
1165 return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1166 }
1167
1168 /// Return section name containing the given \p Address.
1169 ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1170
1171 /// Print all sections.
1172 void printSections(raw_ostream &OS) const;
1173
1174 /// Return largest section containing the given \p Address. These
1175 /// functions only work for allocatable sections, i.e. ones with non-zero
1176 /// addresses.
1177 ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
1178 ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1179 return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1180 }
1181
1182 /// Return internal section representation for a section in a file.
1183 BinarySection *getSectionForSectionRef(SectionRef Section) const {
1184 return SectionRefToBinarySection.lookup(Val: Section);
1185 }
1186
1187 /// Return section(s) associated with given \p Name.
1188 iterator_range<NameToSectionMapType::iterator>
1189 getSectionByName(const Twine &Name) {
1190 return make_range(p: NameToSection.equal_range(x: Name.str()));
1191 }
1192 iterator_range<NameToSectionMapType::const_iterator>
1193 getSectionByName(const Twine &Name) const {
1194 return make_range(p: NameToSection.equal_range(x: Name.str()));
1195 }
1196
1197 /// Return the unique section associated with given \p Name.
1198 /// If there is more than one section with the same name, return an error
1199 /// object.
1200 ErrorOr<BinarySection &>
1201 getUniqueSectionByName(const Twine &SectionName) const {
1202 auto Sections = getSectionByName(Name: SectionName);
1203 if (Sections.begin() != Sections.end() &&
1204 std::next(x: Sections.begin()) == Sections.end())
1205 return *Sections.begin()->second;
1206 return std::make_error_code(e: std::errc::bad_address);
1207 }
1208
1209 /// Return an unsigned value of \p Size stored at \p Address. The address has
1210 /// to be a valid statically allocated address for the binary.
1211 ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1212 size_t Size) const;
1213
1214 /// Return a signed value of \p Size stored at \p Address. The address has
1215 /// to be a valid statically allocated address for the binary.
1216 ErrorOr<uint64_t> getSignedValueAtAddress(uint64_t Address,
1217 size_t Size) const;
1218
1219 /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
1220 ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1221 return getUnsignedValueAtAddress(Address, Size: AsmInfo->getCodePointerSize());
1222 }
1223
1224 /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1225 /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1226 /// is merged into that of \p ParentBF. This function is thread safe.
1227 void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1228
1229 /// Add a Section relocation at a given \p Address.
1230 void addRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1231 uint64_t Addend = 0, uint64_t Value = 0);
1232
1233 /// Return a relocation registered at a given \p Address, or nullptr if there
1234 /// is no relocation at such address.
1235 const Relocation *getRelocationAt(uint64_t Address) const;
1236
1237 /// Register a presence of PC-relative relocation at the given \p Address.
1238 void addPCRelativeDataRelocation(uint64_t Address) {
1239 DataPCRelocations.emplace(args&: Address);
1240 }
1241
1242 /// Register dynamic relocation at \p Address.
1243 void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint64_t Type,
1244 uint64_t Addend, uint64_t Value = 0);
1245
1246 /// Return a dynamic relocation registered at a given \p Address, or nullptr
1247 /// if there is no dynamic relocation at such address.
1248 const Relocation *getDynamicRelocationAt(uint64_t Address) const;
1249
1250 /// Remove registered relocation at a given \p Address.
1251 bool removeRelocationAt(uint64_t Address);
1252
1253 /// This function makes sure that symbols referenced by ambiguous relocations
1254 /// are marked as immovable. For now, if a section relocation points at the
1255 /// boundary between two symbols then those symbols are marked as immovable.
1256 void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1257
1258 /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1259 /// nullptr, set it to entry descriminator corresponding to \p Symbol
1260 /// (0 for single-entry functions). This function is thread safe.
1261 BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1262 uint64_t *EntryDesc = nullptr);
1263
1264 const BinaryFunction *
1265 getFunctionForSymbol(const MCSymbol *Symbol,
1266 uint64_t *EntryDesc = nullptr) const {
1267 return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1268 EntryDesc);
1269 }
1270
1271 /// Associate the symbol \p Sym with the function \p BF for lookups with
1272 /// getFunctionForSymbol().
1273 void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1274 SymbolToFunctionMap[Sym] = BF;
1275 }
1276
1277 /// Populate some internal data structures with debug info.
1278 void preprocessDebugInfo();
1279
1280 /// Add a filename entry from SrcCUID to DestCUID.
1281 unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1282 const uint32_t SrcCUID, unsigned FileIndex);
1283
1284 /// Return functions in output layout order
1285 std::vector<BinaryFunction *> getSortedFunctions();
1286
1287 /// Do the best effort to calculate the size of the function by emitting
1288 /// its code, and relaxing branch instructions. By default, branch
1289 /// instructions are updated to match the layout. Pass \p FixBranches set to
1290 /// false if the branches are known to be up to date with the code layout.
1291 ///
1292 /// Return the pair where the first size is for the main part, and the second
1293 /// size is for the cold one.
1294 /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
1295 /// function in place so that BinaryBasicBlock::getOutputSize() gives the
1296 /// emitted size of the basic block.
1297 std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1298 bool FixBranches = true);
1299
1300 /// Calculate the size of the instruction \p Inst optionally using a
1301 /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1302 /// not thread safe and each thread should operate with its own copy of it.
1303 uint64_t
1304 computeInstructionSize(const MCInst &Inst,
1305 const MCCodeEmitter *Emitter = nullptr) const {
1306 if (std::optional<uint32_t> Size = MIB->getSize(Inst))
1307 return *Size;
1308
1309 if (!Emitter)
1310 Emitter = this->MCE.get();
1311 SmallString<256> Code;
1312 SmallVector<MCFixup, 4> Fixups;
1313 Emitter->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI);
1314 return Code.size();
1315 }
1316
1317 /// Compute the native code size for a range of instructions.
1318 /// Note: this can be imprecise wrt the final binary since happening prior to
1319 /// relaxation, as well as wrt the original binary because of opcode
1320 /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1321 /// with its own copy of it.
1322 template <typename Itr>
1323 uint64_t computeCodeSize(Itr Beg, Itr End,
1324 const MCCodeEmitter *Emitter = nullptr) const {
1325 uint64_t Size = 0;
1326 while (Beg != End) {
1327 if (!MIB->isPseudo(Inst: *Beg))
1328 Size += computeInstructionSize(Inst: *Beg, Emitter);
1329 ++Beg;
1330 }
1331 return Size;
1332 }
1333
1334 /// Validate that disassembling the \p Sequence of bytes into an instruction
1335 /// and assembling the instruction again, results in a byte sequence identical
1336 /// to the original one.
1337 bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
1338
1339 /// Return a function execution count threshold for determining whether
1340 /// the function is 'hot'. Consider it hot if count is above the average exec
1341 /// count of profiled functions.
1342 uint64_t getHotThreshold() const;
1343
1344 /// Return true if instruction \p Inst requires an offset for further
1345 /// processing (e.g. assigning a profile).
1346 bool keepOffsetForInstruction(const MCInst &Inst) const {
1347 if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1348 MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1349 return true;
1350 }
1351 return false;
1352 }
1353
1354 /// Return true if the function should be emitted to the output file.
1355 bool shouldEmit(const BinaryFunction &Function) const;
1356
1357 /// Dump the assembly representation of MCInst to debug output.
1358 void dump(const MCInst &Inst) const;
1359
1360 /// Print the string name for a CFI operation.
1361 static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1362
1363 /// Print a single MCInst in native format. If Function is non-null,
1364 /// the instruction will be annotated with CFI and possibly DWARF line table
1365 /// info.
1366 /// If printMCInst is true, the instruction is also printed in the
1367 /// architecture independent format.
1368 void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1369 uint64_t Offset = 0,
1370 const BinaryFunction *Function = nullptr,
1371 bool PrintMCInst = false, bool PrintMemData = false,
1372 bool PrintRelocations = false,
1373 StringRef Endl = "\n") const;
1374
1375 /// Print a range of instructions.
1376 template <typename Itr>
1377 uint64_t
1378 printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1379 const BinaryFunction *Function = nullptr,
1380 bool PrintMCInst = false, bool PrintMemData = false,
1381 bool PrintRelocations = false,
1382 StringRef Endl = "\n") const {
1383 while (Begin != End) {
1384 printInstruction(OS, Instruction: *Begin, Offset, Function, PrintMCInst, PrintMemData,
1385 PrintRelocations, Endl);
1386 Offset += computeCodeSize(Begin, Begin + 1);
1387 ++Begin;
1388 }
1389 return Offset;
1390 }
1391
1392 /// Log BOLT errors to journaling streams and quit process with non-zero error
1393 /// code 1 if error is fatal.
1394 void logBOLTErrorsAndQuitOnFatal(Error E);
1395
1396 std::string generateBugReportMessage(StringRef Message,
1397 const BinaryFunction &Function) const;
1398
1399 struct IndependentCodeEmitter {
1400 std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1401 std::unique_ptr<MCContext> LocalCtx;
1402 std::unique_ptr<MCCodeEmitter> MCE;
1403 };
1404
1405 /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1406 /// with the main one available through BinaryContext::MCE, managed by
1407 /// BinaryContext.
1408 /// This is intended to create a lock-free environment for an auxiliary thread
1409 /// that needs to perform work with an MCCodeEmitter that can be transient or
1410 /// won't be used in the main code emitter.
1411 IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1412 IndependentCodeEmitter MCEInstance;
1413 MCEInstance.LocalCtx.reset(
1414 p: new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1415 MCEInstance.LocalMOFI.reset(
1416 p: TheTarget->createMCObjectFileInfo(Ctx&: *MCEInstance.LocalCtx.get(),
1417 /*PIC=*/PIC: !HasFixedLoadAddress));
1418 MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1419 MCEInstance.MCE.reset(
1420 p: TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *MCEInstance.LocalCtx));
1421 return MCEInstance;
1422 }
1423
1424 /// Creating MCStreamer instance.
1425 std::unique_ptr<MCStreamer>
1426 createStreamer(llvm::raw_pwrite_stream &OS) const {
1427 MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx);
1428 MCAsmBackend *MAB =
1429 TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions());
1430 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1431 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1432 T: *TheTriple, Ctx&: *Ctx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW),
1433 Emitter: std::unique_ptr<MCCodeEmitter>(MCE), STI: *STI,
1434 /* RelaxAll */ RelaxAll: false,
1435 /* IncrementalLinkerCompatible */ IncrementalLinkerCompatible: false,
1436 /* DWARFMustBeAtTheEnd */ DWARFMustBeAtTheEnd: false));
1437 return Streamer;
1438 }
1439
1440 void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
1441 const AddressMap &getIOAddressMap() const {
1442 assert(IOAddressMap && "Address map not set yet");
1443 return *IOAddressMap;
1444 }
1445
1446 raw_ostream &outs() const { return Logger.Out; }
1447
1448 raw_ostream &errs() const { return Logger.Err; }
1449};
1450
1451template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1452inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1453 const char *Sep = "";
1454 for (const auto Byte : ByteArray) {
1455 OS << Sep << format("%.2x", Byte);
1456 Sep = " ";
1457 }
1458 return OS;
1459}
1460
1461} // namespace bolt
1462} // namespace llvm
1463
1464#endif
1465

source code of bolt/include/bolt/Core/BinaryContext.h