1//===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Context for processing binary executable/library files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef BOLT_CORE_BINARY_CONTEXT_H
14#define BOLT_CORE_BINARY_CONTEXT_H
15
16#include "bolt/Core/AddressMap.h"
17#include "bolt/Core/BinaryData.h"
18#include "bolt/Core/BinarySection.h"
19#include "bolt/Core/DebugData.h"
20#include "bolt/Core/DynoStats.h"
21#include "bolt/Core/JumpTable.h"
22#include "bolt/Core/MCPlusBuilder.h"
23#include "bolt/RuntimeLibs/RuntimeLibrary.h"
24#include "llvm/ADT/AddressRanges.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/StringMap.h"
28#include "llvm/ADT/iterator.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/BinaryFormat/MachO.h"
31#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
32#include "llvm/MC/MCAsmInfo.h"
33#include "llvm/MC/MCCodeEmitter.h"
34#include "llvm/MC/MCContext.h"
35#include "llvm/MC/MCObjectFileInfo.h"
36#include "llvm/MC/MCObjectWriter.h"
37#include "llvm/MC/MCPseudoProbe.h"
38#include "llvm/MC/MCSectionELF.h"
39#include "llvm/MC/MCSectionMachO.h"
40#include "llvm/MC/MCStreamer.h"
41#include "llvm/MC/MCSymbol.h"
42#include "llvm/MC/TargetRegistry.h"
43#include "llvm/Support/ErrorOr.h"
44#include "llvm/Support/RWMutex.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/TargetParser/Triple.h"
47#include <functional>
48#include <list>
49#include <map>
50#include <optional>
51#include <set>
52#include <string>
53#include <system_error>
54#include <type_traits>
55#include <unordered_map>
56#include <vector>
57
58namespace llvm {
59class MCDisassembler;
60class MCInstPrinter;
61
62using namespace object;
63
64namespace bolt {
65
66class BinaryFunction;
67
68/// Information on loadable part of the file.
69struct SegmentInfo {
70 uint64_t Address; /// Address of the segment in memory.
71 uint64_t Size; /// Size of the segment in memory.
72 uint64_t FileOffset; /// Offset in the file.
73 uint64_t FileSize; /// Size in file.
74 uint64_t Alignment; /// Alignment of the segment.
75 bool IsExecutable; /// Is the executable bit set on the Segment?
76 bool IsWritable; /// Is the segment writable.
77
78 void print(raw_ostream &OS) const {
79 OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Val: Address)
80 << ", Size: 0x" << Twine::utohexstr(Val: Size) << ", FileOffset: 0x"
81 << Twine::utohexstr(Val: FileOffset) << ", FileSize: 0x"
82 << Twine::utohexstr(Val: FileSize) << ", Alignment: 0x"
83 << Twine::utohexstr(Val: Alignment) << ", " << (IsExecutable ? "x" : "")
84 << (IsWritable ? "w" : "") << " }";
85 };
86};
87
88inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
89 SegInfo.print(OS);
90 return OS;
91}
92
93// AArch64-specific symbol markers used to delimit code/data in .text.
94enum class MarkerSymType : char {
95 NONE = 0,
96 CODE,
97 DATA,
98};
99
100enum class MemoryContentsType : char {
101 UNKNOWN = 0, /// Unknown contents.
102 POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
103 POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
104};
105
106/// Helper function to truncate a \p Value to given size in \p Bytes.
107inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
108 return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
109}
110
111/// Filter iterator.
112template <typename ItrType,
113 typename PredType = std::function<bool(const ItrType &)>>
114class FilterIterator {
115 using inner_traits = std::iterator_traits<ItrType>;
116 using Iterator = FilterIterator;
117
118 PredType Pred;
119 ItrType Itr, End;
120
121 void prev() {
122 while (!Pred(--Itr))
123 ;
124 }
125 void next() {
126 ++Itr;
127 nextMatching();
128 }
129 void nextMatching() {
130 while (Itr != End && !Pred(Itr))
131 ++Itr;
132 }
133
134public:
135 using iterator_category = std::bidirectional_iterator_tag;
136 using value_type = typename inner_traits::value_type;
137 using difference_type = typename inner_traits::difference_type;
138 using pointer = typename inner_traits::pointer;
139 using reference = typename inner_traits::reference;
140
141 Iterator &operator++() { next(); return *this; }
142 Iterator &operator--() { prev(); return *this; }
143 Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
144 Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
145 bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
146 bool operator!=(const Iterator &Other) const { return !operator==(Other); }
147 reference operator*() { return *Itr; }
148 pointer operator->() { return &operator*(); }
149 FilterIterator(PredType Pred, ItrType Itr, ItrType End)
150 : Pred(Pred), Itr(Itr), End(End) {
151 nextMatching();
152 }
153};
154
155/// BOLT-exclusive errors generated in core BOLT libraries, optionally holding a
156/// string message and whether it is fatal or not. In case it is fatal and if
157/// BOLT is running as a standalone process, the process might be killed as soon
158/// as the error is checked.
159class BOLTError : public ErrorInfo<BOLTError> {
160public:
161 static char ID;
162
163 BOLTError(bool IsFatal, const Twine &S = Twine());
164 void log(raw_ostream &OS) const override;
165 bool isFatal() const { return IsFatal; }
166
167 const std::string &getMessage() const { return Msg; }
168 std::error_code convertToErrorCode() const override;
169
170private:
171 bool IsFatal;
172 std::string Msg;
173};
174
175/// Streams used by BOLT to log regular or error events
176struct JournalingStreams {
177 raw_ostream &Out;
178 raw_ostream &Err;
179};
180
181Error createNonFatalBOLTError(const Twine &S);
182Error createFatalBOLTError(const Twine &S);
183
184class BinaryContext {
185 BinaryContext() = delete;
186
187 /// Name of the binary file the context originated from.
188 std::string Filename;
189
190 /// Unique build ID if available for the binary.
191 std::optional<std::string> FileBuildID;
192
193 /// Set of all sections.
194 struct CompareSections {
195 bool operator()(const BinarySection *A, const BinarySection *B) const {
196 return *A < *B;
197 }
198 };
199 using SectionSetType = std::set<BinarySection *, CompareSections>;
200 SectionSetType Sections;
201
202 using SectionIterator = pointee_iterator<SectionSetType::iterator>;
203 using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
204
205 using FilteredSectionIterator = FilterIterator<SectionIterator>;
206 using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
207
208 /// Map virtual address to a section. It is possible to have more than one
209 /// section mapped to the same address, e.g. non-allocatable sections.
210 using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
211 AddressToSectionMapType AddressToSection;
212
213 /// multimap of section name to BinarySection object. Some binaries
214 /// have multiple sections with the same name.
215 using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
216 NameToSectionMapType NameToSection;
217
218 /// Map section references to BinarySection for matching sections in the
219 /// input file to internal section representation.
220 DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
221
222 /// Low level section registration.
223 BinarySection &registerSection(BinarySection *Section);
224
225 /// Store all functions in the binary, sorted by original address.
226 std::map<uint64_t, BinaryFunction> BinaryFunctions;
227
228 /// A mutex that is used to control parallel accesses to BinaryFunctions
229 mutable llvm::sys::RWMutex BinaryFunctionsMutex;
230
231 /// Functions injected by BOLT
232 std::vector<BinaryFunction *> InjectedBinaryFunctions;
233
234 /// Jump tables for all functions mapped by address.
235 std::map<uint64_t, JumpTable *> JumpTables;
236
237 /// Locations of PC-relative relocations in data objects.
238 std::unordered_set<uint64_t> DataPCRelocations;
239
240 /// Used in duplicateJumpTable() to uniquely identify a JT clone
241 /// Start our IDs with a high number so getJumpTableContainingAddress checks
242 /// with size won't overflow
243 uint32_t DuplicatedJumpTables{0x10000000};
244
245 /// Function fragments to skip.
246 std::unordered_set<BinaryFunction *> FragmentsToSkip;
247
248 /// Fragment equivalence classes to query belonging to the same "family" in
249 /// presence of multiple fragments/multiple parents.
250 EquivalenceClasses<const BinaryFunction *> FragmentClasses;
251
252 /// The runtime library.
253 std::unique_ptr<RuntimeLibrary> RtLibrary;
254
255 /// DWP Context.
256 std::shared_ptr<DWARFContext> DWPContext;
257
258 /// Decoded pseudo probes.
259 std::shared_ptr<MCPseudoProbeDecoder> PseudoProbeDecoder;
260
261 /// A map of DWO Ids to CUs.
262 using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
263 DWOIdToCUMapType DWOCUs;
264
265 bool ContainsDwarf5{false};
266 bool ContainsDwarfLegacy{false};
267
268 /// Mapping from input to output addresses.
269 std::optional<AddressMap> IOAddressMap;
270
271 /// Preprocess DWO debug information.
272 void preprocessDWODebugInfo();
273
274 /// DWARF line info for CUs.
275 std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
276
277 /// Internal helper for removing section name from a lookup table.
278 void deregisterSectionName(const BinarySection &Section);
279
280public:
281 static Expected<std::unique_ptr<BinaryContext>> createBinaryContext(
282 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
283 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
284 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger);
285
286 /// Superset of compiler units that will contain overwritten code that needs
287 /// new debug info. In a few cases, functions may end up not being
288 /// overwritten, but it is okay to re-generate debug info for them.
289 std::set<const DWARFUnit *> ProcessedCUs;
290
291 // Setup MCPlus target builder
292 void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
293 MIB = std::move(TargetBuilder);
294 }
295
296 /// Return function fragments to skip.
297 const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
298 return FragmentsToSkip;
299 }
300
301 /// Add function fragment to skip
302 void addFragmentsToSkip(BinaryFunction *Function) {
303 FragmentsToSkip.insert(x: Function);
304 }
305
306 void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
307
308 /// Given DWOId returns CU if it exists in DWOCUs.
309 std::optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
310
311 /// Returns DWOContext if it exists.
312 DWARFContext *getDWOContext() const;
313
314 /// Get Number of DWOCUs in a map.
315 uint32_t getNumDWOCUs() { return DWOCUs.size(); }
316
317 /// Returns true if DWARF5 is used.
318 bool isDWARF5Used() const { return ContainsDwarf5; }
319
320 /// Returns true if DWARF4 or lower is used.
321 bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
322
323 std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
324 return DwarfLineTablesCUMap;
325 }
326
327 DwarfLineTable &getDwarfLineTable(unsigned CUID) {
328 return DwarfLineTablesCUMap[CUID];
329 }
330
331 Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
332 unsigned FileNumber,
333 std::optional<MD5::MD5Result> Checksum,
334 std::optional<StringRef> Source,
335 unsigned CUID, unsigned DWARFVersion);
336
337 /// Input file segment info
338 ///
339 /// [start memory address] -> [segment info] mapping.
340 std::map<uint64_t, SegmentInfo> SegmentMapInfo;
341
342 /// Newly created segments.
343 std::vector<SegmentInfo> NewSegments;
344
345 /// Symbols that are expected to be undefined in MCContext during emission.
346 std::unordered_set<MCSymbol *> UndefinedSymbols;
347
348 /// [name] -> [BinaryData*] map used for global symbol resolution.
349 using SymbolMapType = StringMap<BinaryData *>;
350 SymbolMapType GlobalSymbols;
351
352 /// [address] -> [BinaryData], ...
353 /// Addresses never change.
354 /// Note: it is important that clients do not hold on to instances of
355 /// BinaryData* while the map is still being modified during BinaryFunction
356 /// disassembly. This is because of the possibility that a regular
357 /// BinaryData is later discovered to be a JumpTable.
358 using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
359 using binary_data_iterator = BinaryDataMapType::iterator;
360 using binary_data_const_iterator = BinaryDataMapType::const_iterator;
361 BinaryDataMapType BinaryDataMap;
362
363 using FilteredBinaryDataConstIterator =
364 FilterIterator<binary_data_const_iterator>;
365 using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
366
367 StringRef getFilename() const { return Filename; }
368 void setFilename(StringRef Name) { Filename = std::string(Name); }
369
370 std::optional<StringRef> getFileBuildID() const {
371 if (FileBuildID)
372 return StringRef(*FileBuildID);
373
374 return std::nullopt;
375 }
376 void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
377
378 bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
379 void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }
380
381 std::shared_ptr<orc::SymbolStringPool> getSymbolStringPool() { return SSP; }
382 /// Return true if relocations against symbol with a given name
383 /// must be created.
384 bool forceSymbolRelocations(StringRef SymbolName) const;
385
386 uint64_t getNumUnusedProfiledObjects() const {
387 return NumUnusedProfiledObjects;
388 }
389 void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
390
391 RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
392 void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
393 assert(!RtLibrary && "Cannot set runtime library twice.");
394 RtLibrary = std::move(Lib);
395 }
396
397 const MCPseudoProbeDecoder *getPseudoProbeDecoder() const {
398 return PseudoProbeDecoder.get();
399 }
400
401 void setPseudoProbeDecoder(std::shared_ptr<MCPseudoProbeDecoder> Decoder) {
402 assert(!PseudoProbeDecoder && "Cannot set pseudo probe decoder twice.");
403 PseudoProbeDecoder = Decoder;
404 }
405
406 /// Return BinaryFunction containing a given \p Address or nullptr if
407 /// no registered function contains the \p Address.
408 ///
409 /// In a binary a function has somewhat vague boundaries. E.g. a function can
410 /// refer to the first byte past the end of the function, and it will still be
411 /// referring to this function, not the function following it in the address
412 /// space. Thus we have the following flags that allow to lookup for
413 /// a function where a caller has more context for the search.
414 ///
415 /// If \p CheckPastEnd is true and the \p Address falls on a byte
416 /// immediately following the last byte of some function and there's no other
417 /// function that starts there, then return the function as the one containing
418 /// the \p Address. This is useful when we need to locate functions for
419 /// references pointing immediately past a function body.
420 ///
421 /// If \p UseMaxSize is true, then include the space between this function
422 /// body and the next object in address ranges that we check.
423 BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
424 bool CheckPastEnd = false,
425 bool UseMaxSize = false);
426 const BinaryFunction *
427 getBinaryFunctionContainingAddress(uint64_t Address,
428 bool CheckPastEnd = false,
429 bool UseMaxSize = false) const {
430 return const_cast<BinaryContext *>(this)
431 ->getBinaryFunctionContainingAddress(Address, CheckPastEnd, UseMaxSize);
432 }
433
434 /// Return a BinaryFunction that starts at a given \p Address.
435 BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
436
437 const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
438 return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
439 Address);
440 }
441
442 /// Return size of an entry for the given jump table \p Type.
443 uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
444 return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
445 }
446
447 /// Return JumpTable containing a given \p Address.
448 JumpTable *getJumpTableContainingAddress(uint64_t Address) {
449 auto JTI = JumpTables.upper_bound(x: Address);
450 if (JTI == JumpTables.begin())
451 return nullptr;
452 --JTI;
453 if (JTI->first + JTI->second->getSize() > Address)
454 return JTI->second;
455 if (JTI->second->getSize() == 0 && JTI->first == Address)
456 return JTI->second;
457 return nullptr;
458 }
459
460 /// Deregister JumpTable registered at a given \p Address and delete it.
461 void deleteJumpTable(uint64_t Address);
462
463 unsigned getDWARFEncodingSize(unsigned Encoding) {
464 if (Encoding == dwarf::DW_EH_PE_omit)
465 return 0;
466 switch (Encoding & 0x0f) {
467 default:
468 llvm_unreachable("unknown encoding");
469 case dwarf::DW_EH_PE_absptr:
470 case dwarf::DW_EH_PE_signed:
471 return AsmInfo->getCodePointerSize();
472 case dwarf::DW_EH_PE_udata2:
473 case dwarf::DW_EH_PE_sdata2:
474 return 2;
475 case dwarf::DW_EH_PE_udata4:
476 case dwarf::DW_EH_PE_sdata4:
477 return 4;
478 case dwarf::DW_EH_PE_udata8:
479 case dwarf::DW_EH_PE_sdata8:
480 return 8;
481 }
482 }
483
484 /// [MCSymbol] -> [BinaryFunction]
485 ///
486 /// As we fold identical functions, multiple symbols can point
487 /// to the same BinaryFunction.
488 std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
489
490 /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
491 mutable llvm::sys::RWMutex SymbolToFunctionMapMutex;
492
493 /// Look up the symbol entry that contains the given \p Address (based on
494 /// the start address and size for each symbol). Returns a pointer to
495 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
496 const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
497
498 /// Update the Parent fields in BinaryDatas after adding a new entry into
499 /// \p BinaryDataMap.
500 void updateObjectNesting(BinaryDataMapType::iterator GAI);
501
502 /// Validate that if object address ranges overlap that the object with
503 /// the larger range is a parent of the object with the smaller range.
504 bool validateObjectNesting() const;
505
506 /// Validate that there are no top level "holes" in each section
507 /// and that all relocations with a section are mapped to a valid
508 /// top level BinaryData.
509 bool validateHoles() const;
510
511 /// Produce output address ranges based on input ranges for some module.
512 DebugAddressRangesVector translateModuleAddressRanges(
513 const DWARFAddressRangesVector &InputRanges) const;
514
515 /// Get a bogus "absolute" section that will be associated with all
516 /// absolute BinaryDatas.
517 BinarySection &absoluteSection();
518
519 /// Process "holes" in between known BinaryData objects. For now,
520 /// symbols are padded with the space before the next BinaryData object.
521 void fixBinaryDataHoles();
522
523 /// Generate names based on data hashes for unknown symbols.
524 void generateSymbolHashes();
525
526 /// Construct BinaryFunction object and add it to internal maps.
527 BinaryFunction *createBinaryFunction(const std::string &Name,
528 BinarySection &Section, uint64_t Address,
529 uint64_t Size, uint64_t SymbolSize = 0,
530 uint16_t Alignment = 0);
531
532 /// Return all functions for this rewrite instance.
533 std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
534 return BinaryFunctions;
535 }
536
537 /// Return all functions for this rewrite instance.
538 const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
539 return BinaryFunctions;
540 }
541
542 /// Create BOLT-injected function
543 BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
544 bool IsSimple = true);
545
546 /// Patch the original binary contents at address \p Address with a sequence
547 /// of instructions from the \p Instructions list. The callee is responsible
548 /// for checking that the sequence doesn't cross any function or section
549 /// boundaries.
550 ///
551 /// Optional \p Name can be assigned to the patch. The name will be emitted to
552 /// the symbol table at \p Address.
553 BinaryFunction *
554 createInstructionPatch(uint64_t Address,
555 const InstructionListType &Instructions,
556 const Twine &Name = "");
557
558 std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
559 return InjectedBinaryFunctions;
560 }
561
562 /// Return vector with all functions, i.e. include functions from the input
563 /// binary and functions created by BOLT.
564 std::vector<BinaryFunction *> getAllBinaryFunctions();
565
566 /// Construct a jump table for \p Function at \p Address or return an existing
567 /// one at that location.
568 ///
569 /// May create an embedded jump table and return its label as the second
570 /// element of the pair.
571 const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
572 uint64_t Address,
573 JumpTable::JumpTableType Type);
574
575 /// Analyze a possible jump table of type \p Type at a given \p Address.
576 /// \p BF is a function referencing the jump table.
577 /// Return true if the jump table was detected at \p Address, and false
578 /// otherwise.
579 ///
580 /// If \p NextJTAddress is different from zero, it is used as an upper
581 /// bound for jump table memory layout.
582 ///
583 /// Optionally, populate \p Address from jump table entries. The entries
584 /// could be partially populated if the jump table detection fails.
585 bool analyzeJumpTable(const uint64_t Address,
586 const JumpTable::JumpTableType Type,
587 const BinaryFunction &BF,
588 const uint64_t NextJTAddress = 0,
589 JumpTable::AddressesType *EntriesAsAddress = nullptr,
590 bool *HasEntryInFragment = nullptr) const;
591
592 /// After jump table locations are established, this function will populate
593 /// their EntriesAsAddress based on memory contents.
594 void populateJumpTables();
595
596 /// Returns a jump table ID and label pointing to the duplicated jump table.
597 /// Ordinarily, jump tables are identified by their address in the input
598 /// binary. We return an ID with the high bit set to differentiate it from
599 /// regular addresses, avoiding conflicts with standard jump tables.
600 std::pair<uint64_t, const MCSymbol *>
601 duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
602 const MCSymbol *OldLabel);
603
604 /// Generate a unique name for jump table at a given \p Address belonging
605 /// to function \p BF.
606 std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
607
608 /// Free memory used by JumpTable's EntriesAsAddress
609 void clearJumpTableTempData() {
610 for (auto &JTI : JumpTables) {
611 JumpTable &JT = *JTI.second;
612 JumpTable::AddressesType Temp;
613 Temp.swap(x&: JT.EntriesAsAddress);
614 }
615 }
616 /// Return true if the array of bytes represents a valid code padding.
617 bool hasValidCodePadding(const BinaryFunction &BF);
618
619 /// Verify padding area between functions, and adjust max function size
620 /// accordingly.
621 void adjustCodePadding();
622
623 /// Regular page size.
624 unsigned RegularPageSize{0x1000};
625 static constexpr unsigned RegularPageSizeX86 = 0x1000;
626 static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
627
628 /// Huge page size to use.
629 static constexpr unsigned HugePageSize = 0x200000;
630
631 /// Addresses reserved for kernel on x86_64 start at this location.
632 static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000;
633
634 /// Map address to a constant island owner (constant data in code section)
635 std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
636
637 /// A map from jump table address to insertion order. Used for generating
638 /// jump table names.
639 std::map<uint64_t, size_t> JumpTableIds;
640
641 std::unique_ptr<MCContext> Ctx;
642
643 /// A mutex that is used to control parallel accesses to Ctx
644 mutable llvm::sys::RWMutex CtxMutex;
645 std::unique_lock<llvm::sys::RWMutex> scopeLock() const {
646 return std::unique_lock<llvm::sys::RWMutex>(CtxMutex);
647 }
648
649 std::unique_ptr<DWARFContext> DwCtx;
650
651 std::unique_ptr<Triple> TheTriple;
652
653 std::shared_ptr<orc::SymbolStringPool> SSP;
654
655 const Target *TheTarget;
656
657 std::string TripleName;
658
659 std::unique_ptr<MCCodeEmitter> MCE;
660
661 std::unique_ptr<MCObjectFileInfo> MOFI;
662
663 std::unique_ptr<const MCAsmInfo> AsmInfo;
664
665 std::unique_ptr<const MCInstrInfo> MII;
666
667 std::unique_ptr<const MCSubtargetInfo> STI;
668
669 std::unique_ptr<MCInstPrinter> InstPrinter;
670
671 std::unique_ptr<const MCInstrAnalysis> MIA;
672
673 std::unique_ptr<MCPlusBuilder> MIB;
674
675 std::unique_ptr<const MCRegisterInfo> MRI;
676
677 std::unique_ptr<MCDisassembler> DisAsm;
678
679 /// Symbolic disassembler.
680 std::unique_ptr<MCDisassembler> SymbolicDisAsm;
681
682 std::unique_ptr<MCAsmBackend> MAB;
683
684 /// Allows BOLT to print to log whenever it is necessary (with or without
685 /// const references)
686 mutable JournalingStreams Logger;
687
688 /// Indicates if the binary is Linux kernel.
689 bool IsLinuxKernel{false};
690
691 /// Indicates if relocations are available for usage.
692 bool HasRelocations{false};
693
694 /// Indicates if the binary is stripped
695 bool IsStripped{false};
696
697 /// Indicates if the binary contains split functions.
698 bool HasSplitFunctions{false};
699
700 /// Indicates if the function ordering of the binary is finalized.
701 bool HasFinalizedFunctionOrder{false};
702
703 /// Indicates if a separate .text.warm section is needed that contains
704 /// function fragments with
705 /// FunctionFragment::getFragmentNum() == FragmentNum::warm()
706 bool HasWarmSection{false};
707
708 /// Is the binary always loaded at a fixed address. Shared objects and
709 /// position-independent executables (PIEs) are examples of binaries that
710 /// will have HasFixedLoadAddress set to false.
711 bool HasFixedLoadAddress{true};
712
713 /// True if the binary has no dynamic dependencies, i.e., if it was statically
714 /// linked.
715 bool IsStaticExecutable{false};
716
717 /// Set to true if the binary contains PT_INTERP header.
718 bool HasInterpHeader{false};
719
720 /// Indicates if any of local symbols used for functions or data objects
721 /// have an origin file name available.
722 bool HasSymbolsWithFileName{false};
723
724 /// Does the binary have BAT section.
725 bool HasBATSection{false};
726
727 /// Sum of execution count of all functions
728 uint64_t SumExecutionCount{0};
729
730 /// Number of functions with profile information
731 uint64_t NumProfiledFuncs{0};
732
733 /// Number of functions with stale profile information
734 uint64_t NumStaleProfileFuncs{0};
735
736 /// Number of objects in profile whose profile was ignored.
737 uint64_t NumUnusedProfiledObjects{0};
738
739 /// Total hotness score according to profiling data for this binary.
740 uint64_t TotalScore{0};
741
742 /// Binary-wide aggregated stats.
743 struct BinaryStats {
744 /// Stats for stale profile matching:
745 /// the total number of basic blocks in the profile
746 uint32_t NumStaleBlocks{0};
747 /// the number of exactly matched basic blocks
748 uint32_t NumExactMatchedBlocks{0};
749 /// the number of loosely matched basic blocks
750 uint32_t NumLooseMatchedBlocks{0};
751 /// the number of exactly pseudo probe matched basic blocks
752 uint32_t NumPseudoProbeExactMatchedBlocks{0};
753 /// the number of loosely pseudo probe matched basic blocks
754 uint32_t NumPseudoProbeLooseMatchedBlocks{0};
755 /// the number of call matched basic blocks
756 uint32_t NumCallMatchedBlocks{0};
757 /// the total count of samples in the profile
758 uint64_t StaleSampleCount{0};
759 /// the count of exactly matched samples
760 uint64_t ExactMatchedSampleCount{0};
761 /// the count of loosely matched samples
762 uint64_t LooseMatchedSampleCount{0};
763 /// the count of exactly pseudo probe matched samples
764 uint64_t PseudoProbeExactMatchedSampleCount{0};
765 /// the count of loosely pseudo probe matched samples
766 uint64_t PseudoProbeLooseMatchedSampleCount{0};
767 /// the count of call matched samples
768 uint64_t CallMatchedSampleCount{0};
769 /// the number of stale functions that have matching number of blocks in
770 /// the profile
771 uint64_t NumStaleFuncsWithEqualBlockCount{0};
772 /// the number of blocks that have matching size but a differing hash
773 uint64_t NumStaleBlocksWithEqualIcount{0};
774 } Stats;
775
776 // Original binary execution count stats.
777 DynoStats InitialDynoStats;
778
779 // Address of the first allocated segment.
780 uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
781
782 /// Track next available address for new allocatable sections. RewriteInstance
783 /// sets this prior to running BOLT passes, so layout passes are aware of the
784 /// final addresses functions will have.
785 uint64_t LayoutStartAddress{0};
786
787 /// Old .text info.
788 uint64_t OldTextSectionAddress{0};
789 uint64_t OldTextSectionOffset{0};
790 uint64_t OldTextSectionSize{0};
791
792 /// Area in the input binary reserved for BOLT.
793 AddressRange BOLTReserved;
794
795 /// Address of the code/function that is executed before any other code in
796 /// the binary.
797 std::optional<uint64_t> StartFunctionAddress;
798
799 /// Address of the code/function that is going to be executed right before
800 /// the execution of the binary is completed.
801 std::optional<uint64_t> FiniFunctionAddress;
802
803 /// DT_FINI.
804 std::optional<uint64_t> FiniAddress;
805
806 /// DT_FINI_ARRAY. Only used when DT_FINI is not set.
807 std::optional<uint64_t> FiniArrayAddress;
808
809 /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
810 std::optional<uint64_t> FiniArraySize;
811
812 /// Page alignment used for code layout.
813 uint64_t PageAlign{HugePageSize};
814
815 /// True if the binary requires immediate relocation processing.
816 bool RequiresZNow{false};
817
818 /// List of functions that always trap.
819 std::vector<const BinaryFunction *> TrappedFunctions;
820
821 /// List of external addresses in the code that are not a function start
822 /// and are referenced from BinaryFunction.
823 std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
824
825 /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
826 /// enum Constants, e.g. DW_EH_PE_omit.
827 unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
828
829 BinaryContext(std::unique_ptr<MCContext> Ctx,
830 std::unique_ptr<DWARFContext> DwCtx,
831 std::unique_ptr<Triple> TheTriple,
832 std::shared_ptr<orc::SymbolStringPool> SSP,
833 const Target *TheTarget, std::string TripleName,
834 std::unique_ptr<MCCodeEmitter> MCE,
835 std::unique_ptr<MCObjectFileInfo> MOFI,
836 std::unique_ptr<const MCAsmInfo> AsmInfo,
837 std::unique_ptr<const MCInstrInfo> MII,
838 std::unique_ptr<const MCSubtargetInfo> STI,
839 std::unique_ptr<MCInstPrinter> InstPrinter,
840 std::unique_ptr<const MCInstrAnalysis> MIA,
841 std::unique_ptr<MCPlusBuilder> MIB,
842 std::unique_ptr<const MCRegisterInfo> MRI,
843 std::unique_ptr<MCDisassembler> DisAsm,
844 JournalingStreams Logger);
845
846 ~BinaryContext();
847
848 std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
849
850 bool isELF() const { return TheTriple->isOSBinFormatELF(); }
851
852 bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
853
854 bool isAArch64() const {
855 return TheTriple->getArch() == llvm::Triple::aarch64;
856 }
857
858 bool isX86() const {
859 return TheTriple->getArch() == llvm::Triple::x86 ||
860 TheTriple->getArch() == llvm::Triple::x86_64;
861 }
862
863 bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
864
865 // AArch64-specific functions to check if symbol is used to delimit
866 // code/data in .text. Code is marked by $x, data by $d.
867 MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
868 bool isMarker(const SymbolRef &Symbol) const;
869
870 /// Iterate over all BinaryData.
871 iterator_range<binary_data_const_iterator> getBinaryData() const {
872 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
873 }
874
875 /// Iterate over all BinaryData.
876 iterator_range<binary_data_iterator> getBinaryData() {
877 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
878 }
879
880 /// Iterate over all BinaryData associated with the given \p Section.
881 iterator_range<FilteredBinaryDataConstIterator>
882 getBinaryDataForSection(const BinarySection &Section) const {
883 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
884 if (Begin != BinaryDataMap.begin())
885 --Begin;
886 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
887 auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
888 return Itr->second->getSection() == Section;
889 };
890 return make_range(x: FilteredBinaryDataConstIterator(pred, Begin, End),
891 y: FilteredBinaryDataConstIterator(pred, End, End));
892 }
893
894 /// Iterate over all BinaryData associated with the given \p Section.
895 iterator_range<FilteredBinaryDataIterator>
896 getBinaryDataForSection(BinarySection &Section) {
897 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
898 if (Begin != BinaryDataMap.begin())
899 --Begin;
900 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
901 auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
902 return Itr->second->getSection() == Section;
903 };
904 return make_range(x: FilteredBinaryDataIterator(pred, Begin, End),
905 y: FilteredBinaryDataIterator(pred, End, End));
906 }
907
908 /// Iterate over all the sub-symbols of /p BD (if any).
909 iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
910
911 /// Clear the global symbol address -> name(s) map.
912 void clearBinaryData() {
913 GlobalSymbols.clear();
914 for (auto &Entry : BinaryDataMap)
915 delete Entry.second;
916 BinaryDataMap.clear();
917 }
918
919 /// Process \p Address reference from code in function \BF.
920 /// \p IsPCRel indicates if the reference is PC-relative.
921 /// Return <Symbol, Addend> pair corresponding to the \p Address.
922 std::pair<const MCSymbol *, uint64_t>
923 handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
924
925 /// Analyze memory contents at the given \p Address and return the type of
926 /// memory contents (such as a possible jump table).
927 MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
928
929 /// Return a value of the global \p Symbol or an error if the value
930 /// was not set.
931 ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
932 const BinaryData *BD = getBinaryDataByName(Name: Symbol.getName());
933 if (!BD)
934 return std::make_error_code(e: std::errc::bad_address);
935 return BD->getAddress();
936 }
937
938 /// Return a global symbol registered at a given \p Address and \p Size.
939 /// If no symbol exists, create one with unique name using \p Prefix.
940 /// If there are multiple symbols registered at the \p Address, then
941 /// return the first one.
942 MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
943 uint64_t Size = 0, uint16_t Alignment = 0,
944 unsigned Flags = 0);
945
946 /// Create a global symbol without registering an address.
947 MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
948
949 /// Register a symbol with \p Name at a given \p Address using \p Size,
950 /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
951 /// of \p Flags.
952 MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
953 uint64_t Size, uint16_t Alignment,
954 unsigned Flags = 0);
955
956 /// Return BinaryData registered at a given \p Address or nullptr if no
957 /// global symbol was registered at the location.
958 const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
959 auto NI = BinaryDataMap.find(x: Address);
960 return NI != BinaryDataMap.end() ? NI->second : nullptr;
961 }
962
963 BinaryData *getBinaryDataAtAddress(uint64_t Address) {
964 auto NI = BinaryDataMap.find(x: Address);
965 return NI != BinaryDataMap.end() ? NI->second : nullptr;
966 }
967
968 /// Look up the symbol entry that contains the given \p Address (based on
969 /// the start address and size for each symbol). Returns a pointer to
970 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
971 const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
972 return getBinaryDataContainingAddressImpl(Address);
973 }
974
975 BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
976 return const_cast<BinaryData *>(
977 getBinaryDataContainingAddressImpl(Address));
978 }
979
980 /// Return BinaryData for the given \p Name or nullptr if no
981 /// global symbol with that name exists.
982 const BinaryData *getBinaryDataByName(StringRef Name) const {
983 return GlobalSymbols.lookup(Key: Name);
984 }
985
986 BinaryData *getBinaryDataByName(StringRef Name) {
987 return GlobalSymbols.lookup(Key: Name);
988 }
989
990 /// Return registered PLT entry BinaryData with the given \p Name
991 /// or nullptr if no global PLT symbol with that name exists.
992 const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
993 if (const BinaryData *Data = getBinaryDataByName(Name: Name.str() + "@PLT"))
994 return Data;
995
996 // The symbol name might contain versioning information e.g
997 // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
998 // without it.
999 size_t At = Name.find(Str: "@");
1000 if (At != std::string::npos)
1001 return getBinaryDataByName(Name: Name.str().substr(pos: 0, n: At) + "@PLT");
1002
1003 return nullptr;
1004 }
1005
1006 /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
1007 /// at GOT, or null if it is not present in the input binary symtab.
1008 BinaryData *getGOTSymbol();
1009
1010 /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
1011 bool isGOTSymbol(StringRef SymName) const {
1012 return SymName == "_GLOBAL_OFFSET_TABLE_";
1013 }
1014
1015 /// Return true if \p SymbolName was generated internally and was not present
1016 /// in the input binary.
1017 bool isInternalSymbolName(const StringRef Name) {
1018 return Name.starts_with(Prefix: "SYMBOLat") || Name.starts_with(Prefix: "DATAat") ||
1019 Name.starts_with(Prefix: "HOLEat");
1020 }
1021
1022 MCSymbol *getHotTextStartSymbol() const {
1023 return Ctx->getOrCreateSymbol(Name: "__hot_start");
1024 }
1025
1026 MCSymbol *getHotTextEndSymbol() const {
1027 return Ctx->getOrCreateSymbol(Name: "__hot_end");
1028 }
1029
1030 MCSection *getTextSection() const { return MOFI->getTextSection(); }
1031
1032 /// Return code section with a given name.
1033 MCSection *getCodeSection(StringRef SectionName) const {
1034 if (isELF())
1035 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS,
1036 Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1037 else
1038 return Ctx->getMachOSection(Segment: "__TEXT", Section: SectionName,
1039 TypeAndAttributes: MachO::S_ATTR_PURE_INSTRUCTIONS,
1040 K: SectionKind::getText());
1041 }
1042
1043 /// Return data section with a given name.
1044 MCSection *getDataSection(StringRef SectionName) const {
1045 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS, Flags: ELF::SHF_ALLOC);
1046 }
1047
1048 /// \name Pre-assigned Section Names
1049 /// @{
1050
1051 const char *getMainCodeSectionName() const { return ".text"; }
1052
1053 const char *getWarmCodeSectionName() const { return ".text.warm"; }
1054
1055 const char *getColdCodeSectionName() const { return ".text.cold"; }
1056
1057 const char *getHotTextMoverSectionName() const { return ".text.mover"; }
1058
1059 const char *getInjectedCodeSectionName() const { return ".text.injected"; }
1060
1061 const char *getInjectedColdCodeSectionName() const {
1062 return ".text.injected.cold";
1063 }
1064
1065 ErrorOr<BinarySection &> getGdbIndexSection() const {
1066 return getUniqueSectionByName(SectionName: ".gdb_index");
1067 }
1068
1069 ErrorOr<BinarySection &> getDebugNamesSection() const {
1070 return getUniqueSectionByName(SectionName: ".debug_names");
1071 }
1072
1073 /// @}
1074
1075 /// Register \p TargetFunction as a fragment of \p Function if checks pass:
1076 /// - if \p TargetFunction name matches \p Function name with a suffix:
1077 /// fragment_name == parent_name.cold(.\d+)?
1078 /// True if the Function is registered, false if the check failed.
1079 bool registerFragment(BinaryFunction &TargetFunction,
1080 BinaryFunction &Function);
1081
1082 /// Return true if two functions belong to the same "family": are fragments
1083 /// of one another, or fragments of the same parent, or transitively fragment-
1084 /// related.
1085 bool areRelatedFragments(const BinaryFunction *LHS,
1086 const BinaryFunction *RHS) const {
1087 return FragmentClasses.isEquivalent(V1: LHS, V2: RHS);
1088 }
1089
1090 /// Add interprocedural reference for \p Function to \p Address
1091 void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
1092 InterproceduralReferences.push_back(x: {Function, Address});
1093 }
1094
1095 /// Used to fix the target of linker-generated AArch64 adrp + add
1096 /// sequence with no relocation info.
1097 void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
1098 MCInst &LoadHiBits, uint64_t Target);
1099
1100 /// Return true if AARch64 veneer was successfully matched at a given
1101 /// \p Address and register veneer binary function if \p MatchOnly
1102 /// argument is false.
1103 bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
1104
1105 /// Resolve inter-procedural dependencies from
1106 void processInterproceduralReferences();
1107
1108 /// Skip functions with all parent and child fragments transitively.
1109 void skipMarkedFragments();
1110
1111 /// Perform any necessary post processing on the symbol table after
1112 /// function disassembly is complete. This processing fixes top
1113 /// level data holes and makes sure the symbol table is valid.
1114 /// It also assigns all memory profiling info to the appropriate
1115 /// BinaryData objects.
1116 void postProcessSymbolTable();
1117
1118 /// Set the size of the global symbol located at \p Address. Return
1119 /// false if no symbol exists, true otherwise.
1120 bool setBinaryDataSize(uint64_t Address, uint64_t Size);
1121
1122 /// Print the global symbol table.
1123 void printGlobalSymbols(raw_ostream &OS) const;
1124
1125 /// Register information about the given \p Section so we can look up
1126 /// sections by address.
1127 BinarySection &registerSection(SectionRef Section);
1128
1129 /// Register a copy of /p OriginalSection under a different name.
1130 BinarySection &registerSection(const Twine &SectionName,
1131 const BinarySection &OriginalSection);
1132
1133 /// Register or update the information for the section with the given
1134 /// /p Name. If the section already exists, the information in the
1135 /// section will be updated with the new data.
1136 BinarySection &registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1137 unsigned ELFFlags,
1138 uint8_t *Data = nullptr,
1139 uint64_t Size = 0,
1140 unsigned Alignment = 1);
1141
1142 /// Register the information for the note (non-allocatable) section
1143 /// with the given /p Name. If the section already exists, the
1144 /// information in the section will be updated with the new data.
1145 BinarySection &
1146 registerOrUpdateNoteSection(const Twine &Name, uint8_t *Data = nullptr,
1147 uint64_t Size = 0, unsigned Alignment = 1,
1148 bool IsReadOnly = true,
1149 unsigned ELFType = ELF::SHT_PROGBITS) {
1150 return registerOrUpdateSection(Name, ELFType,
1151 ELFFlags: BinarySection::getFlags(IsReadOnly), Data,
1152 Size, Alignment);
1153 }
1154
1155 /// Remove sections that were preregistered but never used.
1156 void deregisterUnusedSections();
1157
1158 /// Remove the given /p Section from the set of all sections. Return
1159 /// true if the section was removed (and deleted), otherwise false.
1160 bool deregisterSection(BinarySection &Section);
1161
1162 /// Re-register \p Section under the \p NewName.
1163 void renameSection(BinarySection &Section, const Twine &NewName);
1164
1165 /// Iterate over all registered sections.
1166 iterator_range<FilteredSectionIterator> sections() {
1167 auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
1168 return make_range(
1169 x: FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
1170 y: FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
1171 }
1172
1173 /// Iterate over all registered sections.
1174 iterator_range<FilteredSectionConstIterator> sections() const {
1175 return const_cast<BinaryContext *>(this)->sections();
1176 }
1177
1178 /// Iterate over all registered allocatable sections.
1179 iterator_range<FilteredSectionIterator> allocatableSections() {
1180 auto isAllocatable = [](const SectionIterator &Itr) {
1181 return *Itr && Itr->isAllocatable();
1182 };
1183 return make_range(
1184 x: FilteredSectionIterator(isAllocatable, Sections.begin(),
1185 Sections.end()),
1186 y: FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
1187 }
1188
1189 /// Iterate over all registered code sections.
1190 iterator_range<FilteredSectionIterator> textSections() {
1191 auto isText = [](const SectionIterator &Itr) {
1192 return *Itr && Itr->isAllocatable() && Itr->isText();
1193 };
1194 return make_range(
1195 x: FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1196 y: FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1197 }
1198
1199 /// Iterate over all registered allocatable sections.
1200 iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1201 return const_cast<BinaryContext *>(this)->allocatableSections();
1202 }
1203
1204 /// Iterate over all registered non-allocatable sections.
1205 iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1206 auto notAllocated = [](const SectionIterator &Itr) {
1207 return *Itr && !Itr->isAllocatable();
1208 };
1209 return make_range(
1210 x: FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1211 y: FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1212 }
1213
1214 /// Iterate over all registered non-allocatable sections.
1215 iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1216 return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1217 }
1218
1219 /// Iterate over all allocatable relocation sections.
1220 iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1221 auto isAllocatableRela = [](const SectionIterator &Itr) {
1222 return *Itr && Itr->isAllocatable() && Itr->isRela();
1223 };
1224 return make_range(x: FilteredSectionIterator(isAllocatableRela,
1225 Sections.begin(), Sections.end()),
1226 y: FilteredSectionIterator(isAllocatableRela, Sections.end(),
1227 Sections.end()));
1228 }
1229
1230 /// Return base address for the shared object or PIE based on the segment
1231 /// mapping information. \p MMapAddress is an address where one of the
1232 /// segments was mapped. \p FileOffset is the offset in the file of the
1233 /// mapping. Note that \p FileOffset should be page-aligned and could be
1234 /// different from the file offset of the segment which could be unaligned.
1235 /// If no segment is found that matches \p FileOffset, return std::nullopt.
1236 std::optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1237 uint64_t FileOffset) const;
1238
1239 /// Check if the address belongs to this binary's static allocation space.
1240 bool containsAddress(uint64_t Address) const {
1241 return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1242 }
1243
1244 /// Return section name containing the given \p Address.
1245 ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1246
1247 /// Print all sections.
1248 void printSections(raw_ostream &OS) const;
1249
1250 /// Return largest section containing the given \p Address. These
1251 /// functions only work for allocatable sections, i.e. ones with non-zero
1252 /// addresses.
1253 ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
1254 ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1255 return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1256 }
1257
1258 /// Return internal section representation for a section in a file.
1259 BinarySection *getSectionForSectionRef(SectionRef Section) const {
1260 return SectionRefToBinarySection.lookup(Val: Section);
1261 }
1262
1263 /// Return section(s) associated with given \p Name.
1264 iterator_range<NameToSectionMapType::iterator>
1265 getSectionByName(const Twine &Name) {
1266 return make_range(p: NameToSection.equal_range(x: Name.str()));
1267 }
1268 iterator_range<NameToSectionMapType::const_iterator>
1269 getSectionByName(const Twine &Name) const {
1270 return make_range(p: NameToSection.equal_range(x: Name.str()));
1271 }
1272
1273 /// Return the unique section associated with given \p Name.
1274 /// If there is more than one section with the same name, return an error
1275 /// object.
1276 ErrorOr<BinarySection &>
1277 getUniqueSectionByName(const Twine &SectionName) const {
1278 auto Sections = getSectionByName(Name: SectionName);
1279 if (Sections.begin() != Sections.end() &&
1280 std::next(x: Sections.begin()) == Sections.end())
1281 return *Sections.begin()->second;
1282 return std::make_error_code(e: std::errc::bad_address);
1283 }
1284
1285 /// Return an unsigned value of \p Size stored at \p Address. The address has
1286 /// to be a valid statically allocated address for the binary.
1287 ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1288 size_t Size) const;
1289
1290 /// Return a signed value of \p Size stored at \p Address. The address has
1291 /// to be a valid statically allocated address for the binary.
1292 ErrorOr<int64_t> getSignedValueAtAddress(uint64_t Address, size_t Size) const;
1293
1294 /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
1295 ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1296 return getUnsignedValueAtAddress(Address, Size: AsmInfo->getCodePointerSize());
1297 }
1298
1299 /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1300 /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1301 /// is merged into that of \p ParentBF. This function is thread safe.
1302 void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1303
1304 /// Add a Section relocation at a given \p Address.
1305 void addRelocation(uint64_t Address, MCSymbol *Symbol, uint32_t Type,
1306 uint64_t Addend = 0, uint64_t Value = 0);
1307
1308 /// Return a relocation registered at a given \p Address, or nullptr if there
1309 /// is no relocation at such address.
1310 const Relocation *getRelocationAt(uint64_t Address) const;
1311
1312 /// Register a presence of PC-relative relocation at the given \p Address.
1313 void addPCRelativeDataRelocation(uint64_t Address) {
1314 DataPCRelocations.emplace(args&: Address);
1315 }
1316
1317 /// Register dynamic relocation at \p Address.
1318 void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint32_t Type,
1319 uint64_t Addend, uint64_t Value = 0);
1320
1321 /// Return a dynamic relocation registered at a given \p Address, or nullptr
1322 /// if there is no dynamic relocation at such address.
1323 const Relocation *getDynamicRelocationAt(uint64_t Address) const;
1324
1325 /// Remove registered relocation at a given \p Address.
1326 bool removeRelocationAt(uint64_t Address);
1327
1328 /// This function makes sure that symbols referenced by ambiguous relocations
1329 /// are marked as immovable. For now, if a section relocation points at the
1330 /// boundary between two symbols then those symbols are marked as immovable.
1331 void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1332
1333 /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1334 /// nullptr, set it to entry descriminator corresponding to \p Symbol
1335 /// (0 for single-entry functions). This function is thread safe.
1336 BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1337 uint64_t *EntryDesc = nullptr);
1338
1339 const BinaryFunction *
1340 getFunctionForSymbol(const MCSymbol *Symbol,
1341 uint64_t *EntryDesc = nullptr) const {
1342 return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1343 EntryDesc);
1344 }
1345
1346 /// Associate the symbol \p Sym with the function \p BF for lookups with
1347 /// getFunctionForSymbol().
1348 void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1349 SymbolToFunctionMap[Sym] = BF;
1350 }
1351
1352 /// Populate some internal data structures with debug info.
1353 void preprocessDebugInfo();
1354
1355 /// Add a filename entry from SrcCUID to DestCUID.
1356 unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1357 const uint32_t SrcCUID, unsigned FileIndex);
1358
1359 /// Return functions in output layout order
1360 std::vector<BinaryFunction *> getSortedFunctions();
1361
1362 /// Do the best effort to calculate the size of the function by emitting
1363 /// its code, and relaxing branch instructions. By default, branch
1364 /// instructions are updated to match the layout. Pass \p FixBranches set to
1365 /// false if the branches are known to be up to date with the code layout.
1366 ///
1367 /// Return the pair where the first size is for the main part, and the second
1368 /// size is for the cold one.
1369 /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
1370 /// function in place so that BinaryBasicBlock::getOutputSize() gives the
1371 /// emitted size of the basic block.
1372 std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1373 bool FixBranches = true);
1374
1375 /// Calculate the size of the instruction \p Inst optionally using a
1376 /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1377 /// not thread safe and each thread should operate with its own copy of it.
1378 uint64_t
1379 computeInstructionSize(const MCInst &Inst,
1380 const MCCodeEmitter *Emitter = nullptr) const {
1381 if (std::optional<uint32_t> Size = MIB->getSize(Inst))
1382 return *Size;
1383
1384 if (MIB->isPseudo(Inst))
1385 return 0;
1386
1387 if (std::optional<uint32_t> Size = MIB->getInstructionSize(Inst))
1388 return *Size;
1389
1390 if (!Emitter)
1391 Emitter = this->MCE.get();
1392 SmallString<256> Code;
1393 SmallVector<MCFixup, 4> Fixups;
1394 Emitter->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI);
1395 return Code.size();
1396 }
1397
1398 /// Compute the native code size for a range of instructions.
1399 /// Note: this can be imprecise wrt the final binary since happening prior to
1400 /// relaxation, as well as wrt the original binary because of opcode
1401 /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1402 /// with its own copy of it.
1403 template <typename Itr>
1404 uint64_t computeCodeSize(Itr Beg, Itr End,
1405 const MCCodeEmitter *Emitter = nullptr) const {
1406 uint64_t Size = 0;
1407 while (Beg != End) {
1408 if (!MIB->isPseudo(Inst: *Beg))
1409 Size += computeInstructionSize(Inst: *Beg, Emitter);
1410 ++Beg;
1411 }
1412 return Size;
1413 }
1414
1415 /// Validate that disassembling the \p Sequence of bytes into an instruction
1416 /// and assembling the instruction again, results in a byte sequence identical
1417 /// to the original one.
1418 bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
1419
1420 /// Return a function execution count threshold for determining whether
1421 /// the function is 'hot'. Consider it hot if count is above the average exec
1422 /// count of profiled functions.
1423 uint64_t getHotThreshold() const;
1424
1425 /// Return true if instruction \p Inst requires an offset for further
1426 /// processing (e.g. assigning a profile).
1427 bool keepOffsetForInstruction(const MCInst &Inst) const {
1428 if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1429 MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1430 return true;
1431 }
1432 return false;
1433 }
1434
1435 /// Return true if the function should be emitted to the output file.
1436 bool shouldEmit(const BinaryFunction &Function) const;
1437
1438 /// Dump the assembly representation of MCInst to debug output.
1439 void dump(const MCInst &Inst) const;
1440
1441 /// Print the string name for a CFI operation.
1442 static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1443
1444 /// Print a single MCInst in native format. If Function is non-null,
1445 /// the instruction will be annotated with CFI and possibly DWARF line table
1446 /// info.
1447 /// If printMCInst is true, the instruction is also printed in the
1448 /// architecture independent format.
1449 void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1450 uint64_t Offset = 0,
1451 const BinaryFunction *Function = nullptr,
1452 bool PrintMCInst = false, bool PrintMemData = false,
1453 bool PrintRelocations = false,
1454 StringRef Endl = "\n") const;
1455
1456 /// Print data when embedded in the instruction stream keeping the format
1457 /// similar to printInstruction().
1458 void printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
1459 uint64_t Offset) const;
1460
1461 /// Extract data from the binary corresponding to [Address, Address + Size)
1462 /// range. Return an empty ArrayRef if the address range does not belong to
1463 /// any section in the binary, crosses a section boundary, or falls into a
1464 /// virtual section.
1465 ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;
1466
1467 /// Print a range of instructions.
1468 template <typename Itr>
1469 uint64_t
1470 printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1471 const BinaryFunction *Function = nullptr,
1472 bool PrintMCInst = false, bool PrintMemData = false,
1473 bool PrintRelocations = false,
1474 StringRef Endl = "\n") const {
1475 while (Begin != End) {
1476 printInstruction(OS, Instruction: *Begin, Offset, Function, PrintMCInst, PrintMemData,
1477 PrintRelocations, Endl);
1478 Offset += computeCodeSize(Begin, Begin + 1);
1479 ++Begin;
1480 }
1481 return Offset;
1482 }
1483
1484 /// Log BOLT errors to journaling streams and quit process with non-zero error
1485 /// code 1 if error is fatal.
1486 void logBOLTErrorsAndQuitOnFatal(Error E);
1487
1488 std::string generateBugReportMessage(StringRef Message,
1489 const BinaryFunction &Function) const;
1490
1491 struct IndependentCodeEmitter {
1492 std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1493 std::unique_ptr<MCContext> LocalCtx;
1494 std::unique_ptr<MCCodeEmitter> MCE;
1495 };
1496
1497 /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1498 /// with the main one available through BinaryContext::MCE, managed by
1499 /// BinaryContext.
1500 /// This is intended to create a lock-free environment for an auxiliary thread
1501 /// that needs to perform work with an MCCodeEmitter that can be transient or
1502 /// won't be used in the main code emitter.
1503 IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1504 IndependentCodeEmitter MCEInstance;
1505 MCEInstance.LocalCtx.reset(
1506 p: new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1507 MCEInstance.LocalMOFI.reset(
1508 p: TheTarget->createMCObjectFileInfo(Ctx&: *MCEInstance.LocalCtx,
1509 /*PIC=*/PIC: !HasFixedLoadAddress));
1510 MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1511 MCEInstance.MCE.reset(
1512 p: TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *MCEInstance.LocalCtx));
1513 return MCEInstance;
1514 }
1515
1516 /// Creating MCStreamer instance.
1517 std::unique_ptr<MCStreamer>
1518 createStreamer(llvm::raw_pwrite_stream &OS) const {
1519 MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx);
1520 MCAsmBackend *MAB =
1521 TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions());
1522 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1523 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1524 T: *TheTriple, Ctx&: *Ctx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW),
1525 Emitter: std::unique_ptr<MCCodeEmitter>(MCE), STI: *STI));
1526 return Streamer;
1527 }
1528
1529 void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
1530 const AddressMap &getIOAddressMap() const {
1531 assert(IOAddressMap && "Address map not set yet");
1532 return *IOAddressMap;
1533 }
1534
1535 raw_ostream &outs() const { return Logger.Out; }
1536
1537 raw_ostream &errs() const { return Logger.Err; }
1538};
1539
1540template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1541inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1542 const char *Sep = "";
1543 for (const auto Byte : ByteArray) {
1544 OS << Sep << format("%.2x", Byte);
1545 Sep = " ";
1546 }
1547 return OS;
1548}
1549
1550} // namespace bolt
1551} // namespace llvm
1552
1553#endif
1554

source code of bolt/include/bolt/Core/BinaryContext.h