1//===- bolt/Core/BinaryContext.h - Low-level context ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Context for processing binary executable/library files.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef BOLT_CORE_BINARY_CONTEXT_H
14#define BOLT_CORE_BINARY_CONTEXT_H
15
16#include "bolt/Core/AddressMap.h"
17#include "bolt/Core/BinaryData.h"
18#include "bolt/Core/BinarySection.h"
19#include "bolt/Core/DebugData.h"
20#include "bolt/Core/DynoStats.h"
21#include "bolt/Core/JumpTable.h"
22#include "bolt/Core/MCPlusBuilder.h"
23#include "bolt/RuntimeLibs/RuntimeLibrary.h"
24#include "llvm/ADT/AddressRanges.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/StringMap.h"
28#include "llvm/ADT/iterator.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/BinaryFormat/MachO.h"
31#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
32#include "llvm/MC/MCAsmInfo.h"
33#include "llvm/MC/MCCodeEmitter.h"
34#include "llvm/MC/MCContext.h"
35#include "llvm/MC/MCObjectFileInfo.h"
36#include "llvm/MC/MCObjectWriter.h"
37#include "llvm/MC/MCPseudoProbe.h"
38#include "llvm/MC/MCSectionELF.h"
39#include "llvm/MC/MCSectionMachO.h"
40#include "llvm/MC/MCStreamer.h"
41#include "llvm/MC/MCSymbol.h"
42#include "llvm/MC/TargetRegistry.h"
43#include "llvm/Support/ErrorOr.h"
44#include "llvm/Support/RWMutex.h"
45#include "llvm/Support/raw_ostream.h"
46#include "llvm/TargetParser/Triple.h"
47#include <functional>
48#include <list>
49#include <map>
50#include <optional>
51#include <set>
52#include <string>
53#include <system_error>
54#include <type_traits>
55#include <unordered_map>
56#include <vector>
57
58namespace llvm {
59class MCDisassembler;
60class MCInstPrinter;
61
62using namespace object;
63
64namespace bolt {
65
66class BinaryFunction;
67
68/// Information on loadable part of the file.
69struct SegmentInfo {
70 uint64_t Address; /// Address of the segment in memory.
71 uint64_t Size; /// Size of the segment in memory.
72 uint64_t FileOffset; /// Offset in the file.
73 uint64_t FileSize; /// Size in file.
74 uint64_t Alignment; /// Alignment of the segment.
75 bool IsExecutable; /// Is the executable bit set on the Segment?
76
77 void print(raw_ostream &OS) const {
78 OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Val: Address)
79 << ", Size: 0x" << Twine::utohexstr(Val: Size) << ", FileOffset: 0x"
80 << Twine::utohexstr(Val: FileOffset) << ", FileSize: 0x"
81 << Twine::utohexstr(Val: FileSize) << ", Alignment: 0x"
82 << Twine::utohexstr(Val: Alignment) << ", " << (IsExecutable ? "x" : " ")
83 << "}";
84 };
85};
86
87inline raw_ostream &operator<<(raw_ostream &OS, const SegmentInfo &SegInfo) {
88 SegInfo.print(OS);
89 return OS;
90}
91
92// AArch64-specific symbol markers used to delimit code/data in .text.
93enum class MarkerSymType : char {
94 NONE = 0,
95 CODE,
96 DATA,
97};
98
99enum class MemoryContentsType : char {
100 UNKNOWN = 0, /// Unknown contents.
101 POSSIBLE_JUMP_TABLE, /// Possibly a non-PIC jump table.
102 POSSIBLE_PIC_JUMP_TABLE, /// Possibly a PIC jump table.
103};
104
105/// Helper function to truncate a \p Value to given size in \p Bytes.
106inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
107 return Value & ((uint64_t)(int64_t)-1 >> (64 - Bytes * 8));
108}
109
110/// Filter iterator.
111template <typename ItrType,
112 typename PredType = std::function<bool(const ItrType &)>>
113class FilterIterator {
114 using inner_traits = std::iterator_traits<ItrType>;
115 using Iterator = FilterIterator;
116
117 PredType Pred;
118 ItrType Itr, End;
119
120 void prev() {
121 while (!Pred(--Itr))
122 ;
123 }
124 void next() {
125 ++Itr;
126 nextMatching();
127 }
128 void nextMatching() {
129 while (Itr != End && !Pred(Itr))
130 ++Itr;
131 }
132
133public:
134 using iterator_category = std::bidirectional_iterator_tag;
135 using value_type = typename inner_traits::value_type;
136 using difference_type = typename inner_traits::difference_type;
137 using pointer = typename inner_traits::pointer;
138 using reference = typename inner_traits::reference;
139
140 Iterator &operator++() { next(); return *this; }
141 Iterator &operator--() { prev(); return *this; }
142 Iterator operator++(int) { auto Tmp(Itr); next(); return Tmp; }
143 Iterator operator--(int) { auto Tmp(Itr); prev(); return Tmp; }
144 bool operator==(const Iterator &Other) const { return Itr == Other.Itr; }
145 bool operator!=(const Iterator &Other) const { return !operator==(Other); }
146 reference operator*() { return *Itr; }
147 pointer operator->() { return &operator*(); }
148 FilterIterator(PredType Pred, ItrType Itr, ItrType End)
149 : Pred(Pred), Itr(Itr), End(End) {
150 nextMatching();
151 }
152};
153
154/// BOLT-exclusive errors generated in core BOLT libraries, optionally holding a
155/// string message and whether it is fatal or not. In case it is fatal and if
156/// BOLT is running as a standalone process, the process might be killed as soon
157/// as the error is checked.
158class BOLTError : public ErrorInfo<BOLTError> {
159public:
160 static char ID;
161
162 BOLTError(bool IsFatal, const Twine &S = Twine());
163 void log(raw_ostream &OS) const override;
164 bool isFatal() const { return IsFatal; }
165
166 const std::string &getMessage() const { return Msg; }
167 std::error_code convertToErrorCode() const override;
168
169private:
170 bool IsFatal;
171 std::string Msg;
172};
173
174/// Streams used by BOLT to log regular or error events
175struct JournalingStreams {
176 raw_ostream &Out;
177 raw_ostream &Err;
178};
179
180Error createNonFatalBOLTError(const Twine &S);
181Error createFatalBOLTError(const Twine &S);
182
183class BinaryContext {
184 BinaryContext() = delete;
185
186 /// Name of the binary file the context originated from.
187 std::string Filename;
188
189 /// Unique build ID if available for the binary.
190 std::optional<std::string> FileBuildID;
191
192 /// Set of all sections.
193 struct CompareSections {
194 bool operator()(const BinarySection *A, const BinarySection *B) const {
195 return *A < *B;
196 }
197 };
198 using SectionSetType = std::set<BinarySection *, CompareSections>;
199 SectionSetType Sections;
200
201 using SectionIterator = pointee_iterator<SectionSetType::iterator>;
202 using SectionConstIterator = pointee_iterator<SectionSetType::const_iterator>;
203
204 using FilteredSectionIterator = FilterIterator<SectionIterator>;
205 using FilteredSectionConstIterator = FilterIterator<SectionConstIterator>;
206
207 /// Map virtual address to a section. It is possible to have more than one
208 /// section mapped to the same address, e.g. non-allocatable sections.
209 using AddressToSectionMapType = std::multimap<uint64_t, BinarySection *>;
210 AddressToSectionMapType AddressToSection;
211
212 /// multimap of section name to BinarySection object. Some binaries
213 /// have multiple sections with the same name.
214 using NameToSectionMapType = std::multimap<std::string, BinarySection *>;
215 NameToSectionMapType NameToSection;
216
217 /// Map section references to BinarySection for matching sections in the
218 /// input file to internal section representation.
219 DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
220
221 /// Low level section registration.
222 BinarySection &registerSection(BinarySection *Section);
223
224 /// Store all functions in the binary, sorted by original address.
225 std::map<uint64_t, BinaryFunction> BinaryFunctions;
226
227 /// A mutex that is used to control parallel accesses to BinaryFunctions
228 mutable llvm::sys::RWMutex BinaryFunctionsMutex;
229
230 /// Functions injected by BOLT
231 std::vector<BinaryFunction *> InjectedBinaryFunctions;
232
233 /// Jump tables for all functions mapped by address.
234 std::map<uint64_t, JumpTable *> JumpTables;
235
236 /// Locations of PC-relative relocations in data objects.
237 std::unordered_set<uint64_t> DataPCRelocations;
238
239 /// Used in duplicateJumpTable() to uniquely identify a JT clone
240 /// Start our IDs with a high number so getJumpTableContainingAddress checks
241 /// with size won't overflow
242 uint32_t DuplicatedJumpTables{0x10000000};
243
244 /// Function fragments to skip.
245 std::unordered_set<BinaryFunction *> FragmentsToSkip;
246
247 /// Fragment equivalence classes to query belonging to the same "family" in
248 /// presence of multiple fragments/multiple parents.
249 EquivalenceClasses<const BinaryFunction *> FragmentClasses;
250
251 /// The runtime library.
252 std::unique_ptr<RuntimeLibrary> RtLibrary;
253
254 /// DWP Context.
255 std::shared_ptr<DWARFContext> DWPContext;
256
257 /// Decoded pseudo probes.
258 std::shared_ptr<MCPseudoProbeDecoder> PseudoProbeDecoder;
259
260 /// A map of DWO Ids to CUs.
261 using DWOIdToCUMapType = std::unordered_map<uint64_t, DWARFUnit *>;
262 DWOIdToCUMapType DWOCUs;
263
264 bool ContainsDwarf5{false};
265 bool ContainsDwarfLegacy{false};
266
267 /// Mapping from input to output addresses.
268 std::optional<AddressMap> IOAddressMap;
269
270 /// Preprocess DWO debug information.
271 void preprocessDWODebugInfo();
272
273 /// DWARF line info for CUs.
274 std::map<unsigned, DwarfLineTable> DwarfLineTablesCUMap;
275
276 /// Internal helper for removing section name from a lookup table.
277 void deregisterSectionName(const BinarySection &Section);
278
279public:
280 static Expected<std::unique_ptr<BinaryContext>> createBinaryContext(
281 Triple TheTriple, std::shared_ptr<orc::SymbolStringPool> SSP,
282 StringRef InputFileName, SubtargetFeatures *Features, bool IsPIC,
283 std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger);
284
285 /// Superset of compiler units that will contain overwritten code that needs
286 /// new debug info. In a few cases, functions may end up not being
287 /// overwritten, but it is okay to re-generate debug info for them.
288 std::set<const DWARFUnit *> ProcessedCUs;
289
290 // Setup MCPlus target builder
291 void initializeTarget(std::unique_ptr<MCPlusBuilder> TargetBuilder) {
292 MIB = std::move(TargetBuilder);
293 }
294
295 /// Return function fragments to skip.
296 const std::unordered_set<BinaryFunction *> &getFragmentsToSkip() {
297 return FragmentsToSkip;
298 }
299
300 /// Add function fragment to skip
301 void addFragmentsToSkip(BinaryFunction *Function) {
302 FragmentsToSkip.insert(x: Function);
303 }
304
305 void clearFragmentsToSkip() { FragmentsToSkip.clear(); }
306
307 /// Given DWOId returns CU if it exists in DWOCUs.
308 std::optional<DWARFUnit *> getDWOCU(uint64_t DWOId);
309
310 /// Returns DWOContext if it exists.
311 DWARFContext *getDWOContext() const;
312
313 /// Get Number of DWOCUs in a map.
314 uint32_t getNumDWOCUs() { return DWOCUs.size(); }
315
316 /// Returns true if DWARF5 is used.
317 bool isDWARF5Used() const { return ContainsDwarf5; }
318
319 /// Returns true if DWARF4 or lower is used.
320 bool isDWARFLegacyUsed() const { return ContainsDwarfLegacy; }
321
322 std::map<unsigned, DwarfLineTable> &getDwarfLineTables() {
323 return DwarfLineTablesCUMap;
324 }
325
326 DwarfLineTable &getDwarfLineTable(unsigned CUID) {
327 return DwarfLineTablesCUMap[CUID];
328 }
329
330 Expected<unsigned> getDwarfFile(StringRef Directory, StringRef FileName,
331 unsigned FileNumber,
332 std::optional<MD5::MD5Result> Checksum,
333 std::optional<StringRef> Source,
334 unsigned CUID, unsigned DWARFVersion);
335
336 /// [start memory address] -> [segment info] mapping.
337 std::map<uint64_t, SegmentInfo> SegmentMapInfo;
338
339 /// Symbols that are expected to be undefined in MCContext during emission.
340 std::unordered_set<MCSymbol *> UndefinedSymbols;
341
342 /// [name] -> [BinaryData*] map used for global symbol resolution.
343 using SymbolMapType = StringMap<BinaryData *>;
344 SymbolMapType GlobalSymbols;
345
346 /// [address] -> [BinaryData], ...
347 /// Addresses never change.
348 /// Note: it is important that clients do not hold on to instances of
349 /// BinaryData* while the map is still being modified during BinaryFunction
350 /// disassembly. This is because of the possibility that a regular
351 /// BinaryData is later discovered to be a JumpTable.
352 using BinaryDataMapType = std::map<uint64_t, BinaryData *>;
353 using binary_data_iterator = BinaryDataMapType::iterator;
354 using binary_data_const_iterator = BinaryDataMapType::const_iterator;
355 BinaryDataMapType BinaryDataMap;
356
357 using FilteredBinaryDataConstIterator =
358 FilterIterator<binary_data_const_iterator>;
359 using FilteredBinaryDataIterator = FilterIterator<binary_data_iterator>;
360
361 StringRef getFilename() const { return Filename; }
362 void setFilename(StringRef Name) { Filename = std::string(Name); }
363
364 std::optional<StringRef> getFileBuildID() const {
365 if (FileBuildID)
366 return StringRef(*FileBuildID);
367
368 return std::nullopt;
369 }
370 void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }
371
372 bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
373 void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }
374
375 std::shared_ptr<orc::SymbolStringPool> getSymbolStringPool() { return SSP; }
376 /// Return true if relocations against symbol with a given name
377 /// must be created.
378 bool forceSymbolRelocations(StringRef SymbolName) const;
379
380 uint64_t getNumUnusedProfiledObjects() const {
381 return NumUnusedProfiledObjects;
382 }
383 void setNumUnusedProfiledObjects(uint64_t N) { NumUnusedProfiledObjects = N; }
384
385 RuntimeLibrary *getRuntimeLibrary() { return RtLibrary.get(); }
386 void setRuntimeLibrary(std::unique_ptr<RuntimeLibrary> Lib) {
387 assert(!RtLibrary && "Cannot set runtime library twice.");
388 RtLibrary = std::move(Lib);
389 }
390
391 const MCPseudoProbeDecoder *getPseudoProbeDecoder() const {
392 return PseudoProbeDecoder.get();
393 }
394
395 void setPseudoProbeDecoder(std::shared_ptr<MCPseudoProbeDecoder> Decoder) {
396 assert(!PseudoProbeDecoder && "Cannot set pseudo probe decoder twice.");
397 PseudoProbeDecoder = Decoder;
398 }
399
400 /// Return BinaryFunction containing a given \p Address or nullptr if
401 /// no registered function contains the \p Address.
402 ///
403 /// In a binary a function has somewhat vague boundaries. E.g. a function can
404 /// refer to the first byte past the end of the function, and it will still be
405 /// referring to this function, not the function following it in the address
406 /// space. Thus we have the following flags that allow to lookup for
407 /// a function where a caller has more context for the search.
408 ///
409 /// If \p CheckPastEnd is true and the \p Address falls on a byte
410 /// immediately following the last byte of some function and there's no other
411 /// function that starts there, then return the function as the one containing
412 /// the \p Address. This is useful when we need to locate functions for
413 /// references pointing immediately past a function body.
414 ///
415 /// If \p UseMaxSize is true, then include the space between this function
416 /// body and the next object in address ranges that we check.
417 BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address,
418 bool CheckPastEnd = false,
419 bool UseMaxSize = false);
420 const BinaryFunction *
421 getBinaryFunctionContainingAddress(uint64_t Address,
422 bool CheckPastEnd = false,
423 bool UseMaxSize = false) const {
424 return const_cast<BinaryContext *>(this)
425 ->getBinaryFunctionContainingAddress(Address, CheckPastEnd, UseMaxSize);
426 }
427
428 /// Return a BinaryFunction that starts at a given \p Address.
429 BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address);
430
431 const BinaryFunction *getBinaryFunctionAtAddress(uint64_t Address) const {
432 return const_cast<BinaryContext *>(this)->getBinaryFunctionAtAddress(
433 Address);
434 }
435
436 /// Return size of an entry for the given jump table \p Type.
437 uint64_t getJumpTableEntrySize(JumpTable::JumpTableType Type) const {
438 return Type == JumpTable::JTT_PIC ? 4 : AsmInfo->getCodePointerSize();
439 }
440
441 /// Return JumpTable containing a given \p Address.
442 JumpTable *getJumpTableContainingAddress(uint64_t Address) {
443 auto JTI = JumpTables.upper_bound(x: Address);
444 if (JTI == JumpTables.begin())
445 return nullptr;
446 --JTI;
447 if (JTI->first + JTI->second->getSize() > Address)
448 return JTI->second;
449 if (JTI->second->getSize() == 0 && JTI->first == Address)
450 return JTI->second;
451 return nullptr;
452 }
453
454 /// Deregister JumpTable registered at a given \p Address and delete it.
455 void deleteJumpTable(uint64_t Address);
456
457 unsigned getDWARFEncodingSize(unsigned Encoding) {
458 if (Encoding == dwarf::DW_EH_PE_omit)
459 return 0;
460 switch (Encoding & 0x0f) {
461 default:
462 llvm_unreachable("unknown encoding");
463 case dwarf::DW_EH_PE_absptr:
464 case dwarf::DW_EH_PE_signed:
465 return AsmInfo->getCodePointerSize();
466 case dwarf::DW_EH_PE_udata2:
467 case dwarf::DW_EH_PE_sdata2:
468 return 2;
469 case dwarf::DW_EH_PE_udata4:
470 case dwarf::DW_EH_PE_sdata4:
471 return 4;
472 case dwarf::DW_EH_PE_udata8:
473 case dwarf::DW_EH_PE_sdata8:
474 return 8;
475 }
476 }
477
478 /// [MCSymbol] -> [BinaryFunction]
479 ///
480 /// As we fold identical functions, multiple symbols can point
481 /// to the same BinaryFunction.
482 std::unordered_map<const MCSymbol *, BinaryFunction *> SymbolToFunctionMap;
483
484 /// A mutex that is used to control parallel accesses to SymbolToFunctionMap
485 mutable llvm::sys::RWMutex SymbolToFunctionMapMutex;
486
487 /// Look up the symbol entry that contains the given \p Address (based on
488 /// the start address and size for each symbol). Returns a pointer to
489 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
490 const BinaryData *getBinaryDataContainingAddressImpl(uint64_t Address) const;
491
492 /// Update the Parent fields in BinaryDatas after adding a new entry into
493 /// \p BinaryDataMap.
494 void updateObjectNesting(BinaryDataMapType::iterator GAI);
495
496 /// Validate that if object address ranges overlap that the object with
497 /// the larger range is a parent of the object with the smaller range.
498 bool validateObjectNesting() const;
499
500 /// Validate that there are no top level "holes" in each section
501 /// and that all relocations with a section are mapped to a valid
502 /// top level BinaryData.
503 bool validateHoles() const;
504
505 /// Produce output address ranges based on input ranges for some module.
506 DebugAddressRangesVector translateModuleAddressRanges(
507 const DWARFAddressRangesVector &InputRanges) const;
508
509 /// Get a bogus "absolute" section that will be associated with all
510 /// absolute BinaryDatas.
511 BinarySection &absoluteSection();
512
513 /// Process "holes" in between known BinaryData objects. For now,
514 /// symbols are padded with the space before the next BinaryData object.
515 void fixBinaryDataHoles();
516
517 /// Generate names based on data hashes for unknown symbols.
518 void generateSymbolHashes();
519
520 /// Construct BinaryFunction object and add it to internal maps.
521 BinaryFunction *createBinaryFunction(const std::string &Name,
522 BinarySection &Section, uint64_t Address,
523 uint64_t Size, uint64_t SymbolSize = 0,
524 uint16_t Alignment = 0);
525
526 /// Return all functions for this rewrite instance.
527 std::map<uint64_t, BinaryFunction> &getBinaryFunctions() {
528 return BinaryFunctions;
529 }
530
531 /// Return all functions for this rewrite instance.
532 const std::map<uint64_t, BinaryFunction> &getBinaryFunctions() const {
533 return BinaryFunctions;
534 }
535
536 /// Create BOLT-injected function
537 BinaryFunction *createInjectedBinaryFunction(const std::string &Name,
538 bool IsSimple = true);
539
540 /// Patch the original binary contents at address \p Address with a sequence
541 /// of instructions from the \p Instructions list. The callee is responsible
542 /// for checking that the sequence doesn't cross any function or section
543 /// boundaries.
544 ///
545 /// Optional \p Name can be assigned to the patch. The name will be emitted to
546 /// the symbol table at \p Address.
547 BinaryFunction *
548 createInstructionPatch(uint64_t Address,
549 const InstructionListType &Instructions,
550 const Twine &Name = "");
551
552 std::vector<BinaryFunction *> &getInjectedBinaryFunctions() {
553 return InjectedBinaryFunctions;
554 }
555
556 /// Return vector with all functions, i.e. include functions from the input
557 /// binary and functions created by BOLT.
558 std::vector<BinaryFunction *> getAllBinaryFunctions();
559
560 /// Construct a jump table for \p Function at \p Address or return an existing
561 /// one at that location.
562 ///
563 /// May create an embedded jump table and return its label as the second
564 /// element of the pair.
565 const MCSymbol *getOrCreateJumpTable(BinaryFunction &Function,
566 uint64_t Address,
567 JumpTable::JumpTableType Type);
568
569 /// Analyze a possible jump table of type \p Type at a given \p Address.
570 /// \p BF is a function referencing the jump table.
571 /// Return true if the jump table was detected at \p Address, and false
572 /// otherwise.
573 ///
574 /// If \p NextJTAddress is different from zero, it is used as an upper
575 /// bound for jump table memory layout.
576 ///
577 /// Optionally, populate \p Address from jump table entries. The entries
578 /// could be partially populated if the jump table detection fails.
579 bool analyzeJumpTable(const uint64_t Address,
580 const JumpTable::JumpTableType Type,
581 const BinaryFunction &BF,
582 const uint64_t NextJTAddress = 0,
583 JumpTable::AddressesType *EntriesAsAddress = nullptr,
584 bool *HasEntryInFragment = nullptr) const;
585
586 /// After jump table locations are established, this function will populate
587 /// their EntriesAsAddress based on memory contents.
588 void populateJumpTables();
589
590 /// Returns a jump table ID and label pointing to the duplicated jump table.
591 /// Ordinarily, jump tables are identified by their address in the input
592 /// binary. We return an ID with the high bit set to differentiate it from
593 /// regular addresses, avoiding conflicts with standard jump tables.
594 std::pair<uint64_t, const MCSymbol *>
595 duplicateJumpTable(BinaryFunction &Function, JumpTable *JT,
596 const MCSymbol *OldLabel);
597
598 /// Generate a unique name for jump table at a given \p Address belonging
599 /// to function \p BF.
600 std::string generateJumpTableName(const BinaryFunction &BF, uint64_t Address);
601
602 /// Free memory used by JumpTable's EntriesAsAddress
603 void clearJumpTableTempData() {
604 for (auto &JTI : JumpTables) {
605 JumpTable &JT = *JTI.second;
606 JumpTable::AddressesType Temp;
607 Temp.swap(x&: JT.EntriesAsAddress);
608 }
609 }
610 /// Return true if the array of bytes represents a valid code padding.
611 bool hasValidCodePadding(const BinaryFunction &BF);
612
613 /// Verify padding area between functions, and adjust max function size
614 /// accordingly.
615 void adjustCodePadding();
616
617 /// Regular page size.
618 unsigned RegularPageSize{0x1000};
619 static constexpr unsigned RegularPageSizeX86 = 0x1000;
620 static constexpr unsigned RegularPageSizeAArch64 = 0x10000;
621
622 /// Huge page size to use.
623 static constexpr unsigned HugePageSize = 0x200000;
624
625 /// Addresses reserved for kernel on x86_64 start at this location.
626 static constexpr uint64_t KernelStartX86_64 = 0xFFFF'FFFF'8000'0000;
627
628 /// Map address to a constant island owner (constant data in code section)
629 std::map<uint64_t, BinaryFunction *> AddressToConstantIslandMap;
630
631 /// A map from jump table address to insertion order. Used for generating
632 /// jump table names.
633 std::map<uint64_t, size_t> JumpTableIds;
634
635 std::unique_ptr<MCContext> Ctx;
636
637 /// A mutex that is used to control parallel accesses to Ctx
638 mutable llvm::sys::RWMutex CtxMutex;
639 std::unique_lock<llvm::sys::RWMutex> scopeLock() const {
640 return std::unique_lock<llvm::sys::RWMutex>(CtxMutex);
641 }
642
643 std::unique_ptr<DWARFContext> DwCtx;
644
645 std::unique_ptr<Triple> TheTriple;
646
647 std::shared_ptr<orc::SymbolStringPool> SSP;
648
649 const Target *TheTarget;
650
651 std::string TripleName;
652
653 std::unique_ptr<MCCodeEmitter> MCE;
654
655 std::unique_ptr<MCObjectFileInfo> MOFI;
656
657 std::unique_ptr<const MCAsmInfo> AsmInfo;
658
659 std::unique_ptr<const MCInstrInfo> MII;
660
661 std::unique_ptr<const MCSubtargetInfo> STI;
662
663 std::unique_ptr<MCInstPrinter> InstPrinter;
664
665 std::unique_ptr<const MCInstrAnalysis> MIA;
666
667 std::unique_ptr<MCPlusBuilder> MIB;
668
669 std::unique_ptr<const MCRegisterInfo> MRI;
670
671 std::unique_ptr<MCDisassembler> DisAsm;
672
673 /// Symbolic disassembler.
674 std::unique_ptr<MCDisassembler> SymbolicDisAsm;
675
676 std::unique_ptr<MCAsmBackend> MAB;
677
678 /// Allows BOLT to print to log whenever it is necessary (with or without
679 /// const references)
680 mutable JournalingStreams Logger;
681
682 /// Indicates if the binary is Linux kernel.
683 bool IsLinuxKernel{false};
684
685 /// Indicates if relocations are available for usage.
686 bool HasRelocations{false};
687
688 /// Indicates if the binary is stripped
689 bool IsStripped{false};
690
691 /// Indicates if the binary contains split functions.
692 bool HasSplitFunctions{false};
693
694 /// Indicates if the function ordering of the binary is finalized.
695 bool HasFinalizedFunctionOrder{false};
696
697 /// Indicates if a separate .text.warm section is needed that contains
698 /// function fragments with
699 /// FunctionFragment::getFragmentNum() == FragmentNum::warm()
700 bool HasWarmSection{false};
701
702 /// Is the binary always loaded at a fixed address. Shared objects and
703 /// position-independent executables (PIEs) are examples of binaries that
704 /// will have HasFixedLoadAddress set to false.
705 bool HasFixedLoadAddress{true};
706
707 /// True if the binary has no dynamic dependencies, i.e., if it was statically
708 /// linked.
709 bool IsStaticExecutable{false};
710
711 /// Set to true if the binary contains PT_INTERP header.
712 bool HasInterpHeader{false};
713
714 /// Indicates if any of local symbols used for functions or data objects
715 /// have an origin file name available.
716 bool HasSymbolsWithFileName{false};
717
718 /// Does the binary have BAT section.
719 bool HasBATSection{false};
720
721 /// Sum of execution count of all functions
722 uint64_t SumExecutionCount{0};
723
724 /// Number of functions with profile information
725 uint64_t NumProfiledFuncs{0};
726
727 /// Number of functions with stale profile information
728 uint64_t NumStaleProfileFuncs{0};
729
730 /// Number of objects in profile whose profile was ignored.
731 uint64_t NumUnusedProfiledObjects{0};
732
733 /// Total hotness score according to profiling data for this binary.
734 uint64_t TotalScore{0};
735
736 /// Binary-wide aggregated stats.
737 struct BinaryStats {
738 /// Stats for stale profile matching:
739 /// the total number of basic blocks in the profile
740 uint32_t NumStaleBlocks{0};
741 /// the number of exactly matched basic blocks
742 uint32_t NumExactMatchedBlocks{0};
743 /// the number of loosely matched basic blocks
744 uint32_t NumLooseMatchedBlocks{0};
745 /// the number of exactly pseudo probe matched basic blocks
746 uint32_t NumPseudoProbeExactMatchedBlocks{0};
747 /// the number of loosely pseudo probe matched basic blocks
748 uint32_t NumPseudoProbeLooseMatchedBlocks{0};
749 /// the number of call matched basic blocks
750 uint32_t NumCallMatchedBlocks{0};
751 /// the total count of samples in the profile
752 uint64_t StaleSampleCount{0};
753 /// the count of exactly matched samples
754 uint64_t ExactMatchedSampleCount{0};
755 /// the count of loosely matched samples
756 uint64_t LooseMatchedSampleCount{0};
757 /// the count of exactly pseudo probe matched samples
758 uint64_t PseudoProbeExactMatchedSampleCount{0};
759 /// the count of loosely pseudo probe matched samples
760 uint64_t PseudoProbeLooseMatchedSampleCount{0};
761 /// the count of call matched samples
762 uint64_t CallMatchedSampleCount{0};
763 /// the number of stale functions that have matching number of blocks in
764 /// the profile
765 uint64_t NumStaleFuncsWithEqualBlockCount{0};
766 /// the number of blocks that have matching size but a differing hash
767 uint64_t NumStaleBlocksWithEqualIcount{0};
768 } Stats;
769
770 // Original binary execution count stats.
771 DynoStats InitialDynoStats;
772
773 // Address of the first allocated segment.
774 uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};
775
776 /// Track next available address for new allocatable sections. RewriteInstance
777 /// sets this prior to running BOLT passes, so layout passes are aware of the
778 /// final addresses functions will have.
779 uint64_t LayoutStartAddress{0};
780
781 /// Old .text info.
782 uint64_t OldTextSectionAddress{0};
783 uint64_t OldTextSectionOffset{0};
784 uint64_t OldTextSectionSize{0};
785
786 /// Area in the input binary reserved for BOLT.
787 AddressRange BOLTReserved;
788
789 /// Address of the code/function that is executed before any other code in
790 /// the binary.
791 std::optional<uint64_t> StartFunctionAddress;
792
793 /// Address of the code/function that is going to be executed right before
794 /// the execution of the binary is completed.
795 std::optional<uint64_t> FiniFunctionAddress;
796
797 /// DT_FINI.
798 std::optional<uint64_t> FiniAddress;
799
800 /// DT_FINI_ARRAY. Only used when DT_FINI is not set.
801 std::optional<uint64_t> FiniArrayAddress;
802
803 /// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
804 std::optional<uint64_t> FiniArraySize;
805
806 /// Page alignment used for code layout.
807 uint64_t PageAlign{HugePageSize};
808
809 /// True if the binary requires immediate relocation processing.
810 bool RequiresZNow{false};
811
812 /// List of functions that always trap.
813 std::vector<const BinaryFunction *> TrappedFunctions;
814
815 /// List of external addresses in the code that are not a function start
816 /// and are referenced from BinaryFunction.
817 std::list<std::pair<BinaryFunction *, uint64_t>> InterproceduralReferences;
818
819 /// DWARF encoding. Available encoding types defined in BinaryFormat/Dwarf.h
820 /// enum Constants, e.g. DW_EH_PE_omit.
821 unsigned LSDAEncoding = dwarf::DW_EH_PE_omit;
822
823 BinaryContext(std::unique_ptr<MCContext> Ctx,
824 std::unique_ptr<DWARFContext> DwCtx,
825 std::unique_ptr<Triple> TheTriple,
826 std::shared_ptr<orc::SymbolStringPool> SSP,
827 const Target *TheTarget, std::string TripleName,
828 std::unique_ptr<MCCodeEmitter> MCE,
829 std::unique_ptr<MCObjectFileInfo> MOFI,
830 std::unique_ptr<const MCAsmInfo> AsmInfo,
831 std::unique_ptr<const MCInstrInfo> MII,
832 std::unique_ptr<const MCSubtargetInfo> STI,
833 std::unique_ptr<MCInstPrinter> InstPrinter,
834 std::unique_ptr<const MCInstrAnalysis> MIA,
835 std::unique_ptr<MCPlusBuilder> MIB,
836 std::unique_ptr<const MCRegisterInfo> MRI,
837 std::unique_ptr<MCDisassembler> DisAsm,
838 JournalingStreams Logger);
839
840 ~BinaryContext();
841
842 std::unique_ptr<MCObjectWriter> createObjectWriter(raw_pwrite_stream &OS);
843
844 bool isELF() const { return TheTriple->isOSBinFormatELF(); }
845
846 bool isMachO() const { return TheTriple->isOSBinFormatMachO(); }
847
848 bool isAArch64() const {
849 return TheTriple->getArch() == llvm::Triple::aarch64;
850 }
851
852 bool isX86() const {
853 return TheTriple->getArch() == llvm::Triple::x86 ||
854 TheTriple->getArch() == llvm::Triple::x86_64;
855 }
856
857 bool isRISCV() const { return TheTriple->getArch() == llvm::Triple::riscv64; }
858
859 // AArch64-specific functions to check if symbol is used to delimit
860 // code/data in .text. Code is marked by $x, data by $d.
861 MarkerSymType getMarkerType(const SymbolRef &Symbol) const;
862 bool isMarker(const SymbolRef &Symbol) const;
863
864 /// Iterate over all BinaryData.
865 iterator_range<binary_data_const_iterator> getBinaryData() const {
866 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
867 }
868
869 /// Iterate over all BinaryData.
870 iterator_range<binary_data_iterator> getBinaryData() {
871 return make_range(x: BinaryDataMap.begin(), y: BinaryDataMap.end());
872 }
873
874 /// Iterate over all BinaryData associated with the given \p Section.
875 iterator_range<FilteredBinaryDataConstIterator>
876 getBinaryDataForSection(const BinarySection &Section) const {
877 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
878 if (Begin != BinaryDataMap.begin())
879 --Begin;
880 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
881 auto pred = [&Section](const binary_data_const_iterator &Itr) -> bool {
882 return Itr->second->getSection() == Section;
883 };
884 return make_range(x: FilteredBinaryDataConstIterator(pred, Begin, End),
885 y: FilteredBinaryDataConstIterator(pred, End, End));
886 }
887
888 /// Iterate over all BinaryData associated with the given \p Section.
889 iterator_range<FilteredBinaryDataIterator>
890 getBinaryDataForSection(BinarySection &Section) {
891 auto Begin = BinaryDataMap.lower_bound(x: Section.getAddress());
892 if (Begin != BinaryDataMap.begin())
893 --Begin;
894 auto End = BinaryDataMap.upper_bound(x: Section.getEndAddress());
895 auto pred = [&Section](const binary_data_iterator &Itr) -> bool {
896 return Itr->second->getSection() == Section;
897 };
898 return make_range(x: FilteredBinaryDataIterator(pred, Begin, End),
899 y: FilteredBinaryDataIterator(pred, End, End));
900 }
901
902 /// Iterate over all the sub-symbols of /p BD (if any).
903 iterator_range<binary_data_iterator> getSubBinaryData(BinaryData *BD);
904
905 /// Clear the global symbol address -> name(s) map.
906 void clearBinaryData() {
907 GlobalSymbols.clear();
908 for (auto &Entry : BinaryDataMap)
909 delete Entry.second;
910 BinaryDataMap.clear();
911 }
912
913 /// Process \p Address reference from code in function \BF.
914 /// \p IsPCRel indicates if the reference is PC-relative.
915 /// Return <Symbol, Addend> pair corresponding to the \p Address.
916 std::pair<const MCSymbol *, uint64_t>
917 handleAddressRef(uint64_t Address, BinaryFunction &BF, bool IsPCRel);
918
919 /// Analyze memory contents at the given \p Address and return the type of
920 /// memory contents (such as a possible jump table).
921 MemoryContentsType analyzeMemoryAt(uint64_t Address, BinaryFunction &BF);
922
923 /// Return a value of the global \p Symbol or an error if the value
924 /// was not set.
925 ErrorOr<uint64_t> getSymbolValue(const MCSymbol &Symbol) const {
926 const BinaryData *BD = getBinaryDataByName(Name: Symbol.getName());
927 if (!BD)
928 return std::make_error_code(e: std::errc::bad_address);
929 return BD->getAddress();
930 }
931
932 /// Return a global symbol registered at a given \p Address and \p Size.
933 /// If no symbol exists, create one with unique name using \p Prefix.
934 /// If there are multiple symbols registered at the \p Address, then
935 /// return the first one.
936 MCSymbol *getOrCreateGlobalSymbol(uint64_t Address, Twine Prefix,
937 uint64_t Size = 0, uint16_t Alignment = 0,
938 unsigned Flags = 0);
939
940 /// Create a global symbol without registering an address.
941 MCSymbol *getOrCreateUndefinedGlobalSymbol(StringRef Name);
942
943 /// Register a symbol with \p Name at a given \p Address using \p Size,
944 /// \p Alignment, and \p Flags. See llvm::SymbolRef::Flags for the definition
945 /// of \p Flags.
946 MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
947 uint64_t Size, uint16_t Alignment,
948 unsigned Flags = 0);
949
950 /// Return BinaryData registered at a given \p Address or nullptr if no
951 /// global symbol was registered at the location.
952 const BinaryData *getBinaryDataAtAddress(uint64_t Address) const {
953 auto NI = BinaryDataMap.find(x: Address);
954 return NI != BinaryDataMap.end() ? NI->second : nullptr;
955 }
956
957 BinaryData *getBinaryDataAtAddress(uint64_t Address) {
958 auto NI = BinaryDataMap.find(x: Address);
959 return NI != BinaryDataMap.end() ? NI->second : nullptr;
960 }
961
962 /// Look up the symbol entry that contains the given \p Address (based on
963 /// the start address and size for each symbol). Returns a pointer to
964 /// the BinaryData for that symbol. If no data is found, nullptr is returned.
965 const BinaryData *getBinaryDataContainingAddress(uint64_t Address) const {
966 return getBinaryDataContainingAddressImpl(Address);
967 }
968
969 BinaryData *getBinaryDataContainingAddress(uint64_t Address) {
970 return const_cast<BinaryData *>(
971 getBinaryDataContainingAddressImpl(Address));
972 }
973
974 /// Return BinaryData for the given \p Name or nullptr if no
975 /// global symbol with that name exists.
976 const BinaryData *getBinaryDataByName(StringRef Name) const {
977 return GlobalSymbols.lookup(Key: Name);
978 }
979
980 BinaryData *getBinaryDataByName(StringRef Name) {
981 return GlobalSymbols.lookup(Key: Name);
982 }
983
984 /// Return registered PLT entry BinaryData with the given \p Name
985 /// or nullptr if no global PLT symbol with that name exists.
986 const BinaryData *getPLTBinaryDataByName(StringRef Name) const {
987 if (const BinaryData *Data = getBinaryDataByName(Name: Name.str() + "@PLT"))
988 return Data;
989
990 // The symbol name might contain versioning information e.g
991 // memcpy@@GLIBC_2.17. Remove it and try to locate binary data
992 // without it.
993 size_t At = Name.find(Str: "@");
994 if (At != std::string::npos)
995 return getBinaryDataByName(Name: Name.str().substr(pos: 0, n: At) + "@PLT");
996
997 return nullptr;
998 }
999
1000 /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
1001 /// at GOT, or null if it is not present in the input binary symtab.
1002 BinaryData *getGOTSymbol();
1003
1004 /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
1005 bool isGOTSymbol(StringRef SymName) const {
1006 return SymName == "_GLOBAL_OFFSET_TABLE_";
1007 }
1008
1009 /// Return true if \p SymbolName was generated internally and was not present
1010 /// in the input binary.
1011 bool isInternalSymbolName(const StringRef Name) {
1012 return Name.starts_with(Prefix: "SYMBOLat") || Name.starts_with(Prefix: "DATAat") ||
1013 Name.starts_with(Prefix: "HOLEat");
1014 }
1015
1016 MCSymbol *getHotTextStartSymbol() const {
1017 return Ctx->getOrCreateSymbol(Name: "__hot_start");
1018 }
1019
1020 MCSymbol *getHotTextEndSymbol() const {
1021 return Ctx->getOrCreateSymbol(Name: "__hot_end");
1022 }
1023
1024 MCSection *getTextSection() const { return MOFI->getTextSection(); }
1025
1026 /// Return code section with a given name.
1027 MCSection *getCodeSection(StringRef SectionName) const {
1028 if (isELF())
1029 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS,
1030 Flags: ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
1031 else
1032 return Ctx->getMachOSection(Segment: "__TEXT", Section: SectionName,
1033 TypeAndAttributes: MachO::S_ATTR_PURE_INSTRUCTIONS,
1034 K: SectionKind::getText());
1035 }
1036
1037 /// Return data section with a given name.
1038 MCSection *getDataSection(StringRef SectionName) const {
1039 return Ctx->getELFSection(Section: SectionName, Type: ELF::SHT_PROGBITS, Flags: ELF::SHF_ALLOC);
1040 }
1041
1042 /// \name Pre-assigned Section Names
1043 /// @{
1044
1045 const char *getMainCodeSectionName() const { return ".text"; }
1046
1047 const char *getWarmCodeSectionName() const { return ".text.warm"; }
1048
1049 const char *getColdCodeSectionName() const { return ".text.cold"; }
1050
1051 const char *getHotTextMoverSectionName() const { return ".text.mover"; }
1052
1053 const char *getInjectedCodeSectionName() const { return ".text.injected"; }
1054
1055 const char *getInjectedColdCodeSectionName() const {
1056 return ".text.injected.cold";
1057 }
1058
1059 ErrorOr<BinarySection &> getGdbIndexSection() const {
1060 return getUniqueSectionByName(SectionName: ".gdb_index");
1061 }
1062
1063 ErrorOr<BinarySection &> getDebugNamesSection() const {
1064 return getUniqueSectionByName(SectionName: ".debug_names");
1065 }
1066
1067 /// @}
1068
1069 /// Register \p TargetFunction as a fragment of \p Function if checks pass:
1070 /// - if \p TargetFunction name matches \p Function name with a suffix:
1071 /// fragment_name == parent_name.cold(.\d+)?
1072 /// True if the Function is registered, false if the check failed.
1073 bool registerFragment(BinaryFunction &TargetFunction,
1074 BinaryFunction &Function);
1075
1076 /// Return true if two functions belong to the same "family": are fragments
1077 /// of one another, or fragments of the same parent, or transitively fragment-
1078 /// related.
1079 bool areRelatedFragments(const BinaryFunction *LHS,
1080 const BinaryFunction *RHS) const {
1081 return FragmentClasses.isEquivalent(V1: LHS, V2: RHS);
1082 }
1083
1084 /// Add interprocedural reference for \p Function to \p Address
1085 void addInterproceduralReference(BinaryFunction *Function, uint64_t Address) {
1086 InterproceduralReferences.push_back(x: {Function, Address});
1087 }
1088
1089 /// Used to fix the target of linker-generated AArch64 adrp + add
1090 /// sequence with no relocation info.
1091 void addAdrpAddRelocAArch64(BinaryFunction &BF, MCInst &LoadLowBits,
1092 MCInst &LoadHiBits, uint64_t Target);
1093
1094 /// Return true if AARch64 veneer was successfully matched at a given
1095 /// \p Address and register veneer binary function if \p MatchOnly
1096 /// argument is false.
1097 bool handleAArch64Veneer(uint64_t Address, bool MatchOnly = false);
1098
1099 /// Resolve inter-procedural dependencies from
1100 void processInterproceduralReferences();
1101
1102 /// Skip functions with all parent and child fragments transitively.
1103 void skipMarkedFragments();
1104
1105 /// Perform any necessary post processing on the symbol table after
1106 /// function disassembly is complete. This processing fixes top
1107 /// level data holes and makes sure the symbol table is valid.
1108 /// It also assigns all memory profiling info to the appropriate
1109 /// BinaryData objects.
1110 void postProcessSymbolTable();
1111
1112 /// Set the size of the global symbol located at \p Address. Return
1113 /// false if no symbol exists, true otherwise.
1114 bool setBinaryDataSize(uint64_t Address, uint64_t Size);
1115
1116 /// Print the global symbol table.
1117 void printGlobalSymbols(raw_ostream &OS) const;
1118
1119 /// Register information about the given \p Section so we can look up
1120 /// sections by address.
1121 BinarySection &registerSection(SectionRef Section);
1122
1123 /// Register a copy of /p OriginalSection under a different name.
1124 BinarySection &registerSection(const Twine &SectionName,
1125 const BinarySection &OriginalSection);
1126
1127 /// Register or update the information for the section with the given
1128 /// /p Name. If the section already exists, the information in the
1129 /// section will be updated with the new data.
1130 BinarySection &registerOrUpdateSection(const Twine &Name, unsigned ELFType,
1131 unsigned ELFFlags,
1132 uint8_t *Data = nullptr,
1133 uint64_t Size = 0,
1134 unsigned Alignment = 1);
1135
1136 /// Register the information for the note (non-allocatable) section
1137 /// with the given /p Name. If the section already exists, the
1138 /// information in the section will be updated with the new data.
1139 BinarySection &
1140 registerOrUpdateNoteSection(const Twine &Name, uint8_t *Data = nullptr,
1141 uint64_t Size = 0, unsigned Alignment = 1,
1142 bool IsReadOnly = true,
1143 unsigned ELFType = ELF::SHT_PROGBITS) {
1144 return registerOrUpdateSection(Name, ELFType,
1145 ELFFlags: BinarySection::getFlags(IsReadOnly), Data,
1146 Size, Alignment);
1147 }
1148
1149 /// Remove sections that were preregistered but never used.
1150 void deregisterUnusedSections();
1151
1152 /// Remove the given /p Section from the set of all sections. Return
1153 /// true if the section was removed (and deleted), otherwise false.
1154 bool deregisterSection(BinarySection &Section);
1155
1156 /// Re-register \p Section under the \p NewName.
1157 void renameSection(BinarySection &Section, const Twine &NewName);
1158
1159 /// Iterate over all registered sections.
1160 iterator_range<FilteredSectionIterator> sections() {
1161 auto notNull = [](const SectionIterator &Itr) { return (bool)*Itr; };
1162 return make_range(
1163 x: FilteredSectionIterator(notNull, Sections.begin(), Sections.end()),
1164 y: FilteredSectionIterator(notNull, Sections.end(), Sections.end()));
1165 }
1166
1167 /// Iterate over all registered sections.
1168 iterator_range<FilteredSectionConstIterator> sections() const {
1169 return const_cast<BinaryContext *>(this)->sections();
1170 }
1171
1172 /// Iterate over all registered allocatable sections.
1173 iterator_range<FilteredSectionIterator> allocatableSections() {
1174 auto isAllocatable = [](const SectionIterator &Itr) {
1175 return *Itr && Itr->isAllocatable();
1176 };
1177 return make_range(
1178 x: FilteredSectionIterator(isAllocatable, Sections.begin(),
1179 Sections.end()),
1180 y: FilteredSectionIterator(isAllocatable, Sections.end(), Sections.end()));
1181 }
1182
1183 /// Iterate over all registered code sections.
1184 iterator_range<FilteredSectionIterator> textSections() {
1185 auto isText = [](const SectionIterator &Itr) {
1186 return *Itr && Itr->isAllocatable() && Itr->isText();
1187 };
1188 return make_range(
1189 x: FilteredSectionIterator(isText, Sections.begin(), Sections.end()),
1190 y: FilteredSectionIterator(isText, Sections.end(), Sections.end()));
1191 }
1192
1193 /// Iterate over all registered allocatable sections.
1194 iterator_range<FilteredSectionConstIterator> allocatableSections() const {
1195 return const_cast<BinaryContext *>(this)->allocatableSections();
1196 }
1197
1198 /// Iterate over all registered non-allocatable sections.
1199 iterator_range<FilteredSectionIterator> nonAllocatableSections() {
1200 auto notAllocated = [](const SectionIterator &Itr) {
1201 return *Itr && !Itr->isAllocatable();
1202 };
1203 return make_range(
1204 x: FilteredSectionIterator(notAllocated, Sections.begin(), Sections.end()),
1205 y: FilteredSectionIterator(notAllocated, Sections.end(), Sections.end()));
1206 }
1207
1208 /// Iterate over all registered non-allocatable sections.
1209 iterator_range<FilteredSectionConstIterator> nonAllocatableSections() const {
1210 return const_cast<BinaryContext *>(this)->nonAllocatableSections();
1211 }
1212
1213 /// Iterate over all allocatable relocation sections.
1214 iterator_range<FilteredSectionIterator> allocatableRelaSections() {
1215 auto isAllocatableRela = [](const SectionIterator &Itr) {
1216 return *Itr && Itr->isAllocatable() && Itr->isRela();
1217 };
1218 return make_range(x: FilteredSectionIterator(isAllocatableRela,
1219 Sections.begin(), Sections.end()),
1220 y: FilteredSectionIterator(isAllocatableRela, Sections.end(),
1221 Sections.end()));
1222 }
1223
1224 /// Return base address for the shared object or PIE based on the segment
1225 /// mapping information. \p MMapAddress is an address where one of the
1226 /// segments was mapped. \p FileOffset is the offset in the file of the
1227 /// mapping. Note that \p FileOffset should be page-aligned and could be
1228 /// different from the file offset of the segment which could be unaligned.
1229 /// If no segment is found that matches \p FileOffset, return std::nullopt.
1230 std::optional<uint64_t> getBaseAddressForMapping(uint64_t MMapAddress,
1231 uint64_t FileOffset) const;
1232
1233 /// Check if the address belongs to this binary's static allocation space.
1234 bool containsAddress(uint64_t Address) const {
1235 return Address >= FirstAllocAddress && Address < LayoutStartAddress;
1236 }
1237
1238 /// Return section name containing the given \p Address.
1239 ErrorOr<StringRef> getSectionNameForAddress(uint64_t Address) const;
1240
1241 /// Print all sections.
1242 void printSections(raw_ostream &OS) const;
1243
1244 /// Return largest section containing the given \p Address. These
1245 /// functions only work for allocatable sections, i.e. ones with non-zero
1246 /// addresses.
1247 ErrorOr<BinarySection &> getSectionForAddress(uint64_t Address);
1248 ErrorOr<const BinarySection &> getSectionForAddress(uint64_t Address) const {
1249 return const_cast<BinaryContext *>(this)->getSectionForAddress(Address);
1250 }
1251
1252 /// Return internal section representation for a section in a file.
1253 BinarySection *getSectionForSectionRef(SectionRef Section) const {
1254 return SectionRefToBinarySection.lookup(Val: Section);
1255 }
1256
1257 /// Return section(s) associated with given \p Name.
1258 iterator_range<NameToSectionMapType::iterator>
1259 getSectionByName(const Twine &Name) {
1260 return make_range(p: NameToSection.equal_range(x: Name.str()));
1261 }
1262 iterator_range<NameToSectionMapType::const_iterator>
1263 getSectionByName(const Twine &Name) const {
1264 return make_range(p: NameToSection.equal_range(x: Name.str()));
1265 }
1266
1267 /// Return the unique section associated with given \p Name.
1268 /// If there is more than one section with the same name, return an error
1269 /// object.
1270 ErrorOr<BinarySection &>
1271 getUniqueSectionByName(const Twine &SectionName) const {
1272 auto Sections = getSectionByName(Name: SectionName);
1273 if (Sections.begin() != Sections.end() &&
1274 std::next(x: Sections.begin()) == Sections.end())
1275 return *Sections.begin()->second;
1276 return std::make_error_code(e: std::errc::bad_address);
1277 }
1278
1279 /// Return an unsigned value of \p Size stored at \p Address. The address has
1280 /// to be a valid statically allocated address for the binary.
1281 ErrorOr<uint64_t> getUnsignedValueAtAddress(uint64_t Address,
1282 size_t Size) const;
1283
1284 /// Return a signed value of \p Size stored at \p Address. The address has
1285 /// to be a valid statically allocated address for the binary.
1286 ErrorOr<int64_t> getSignedValueAtAddress(uint64_t Address, size_t Size) const;
1287
1288 /// Special case of getUnsignedValueAtAddress() that uses a pointer size.
1289 ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
1290 return getUnsignedValueAtAddress(Address, Size: AsmInfo->getCodePointerSize());
1291 }
1292
1293 /// Replaces all references to \p ChildBF with \p ParentBF. \p ChildBF is then
1294 /// removed from the list of functions \p BFs. The profile data of \p ChildBF
1295 /// is merged into that of \p ParentBF. This function is thread safe.
1296 void foldFunction(BinaryFunction &ChildBF, BinaryFunction &ParentBF);
1297
1298 /// Add a Section relocation at a given \p Address.
1299 void addRelocation(uint64_t Address, MCSymbol *Symbol, uint32_t Type,
1300 uint64_t Addend = 0, uint64_t Value = 0);
1301
1302 /// Return a relocation registered at a given \p Address, or nullptr if there
1303 /// is no relocation at such address.
1304 const Relocation *getRelocationAt(uint64_t Address) const;
1305
1306 /// Register a presence of PC-relative relocation at the given \p Address.
1307 void addPCRelativeDataRelocation(uint64_t Address) {
1308 DataPCRelocations.emplace(args&: Address);
1309 }
1310
1311 /// Register dynamic relocation at \p Address.
1312 void addDynamicRelocation(uint64_t Address, MCSymbol *Symbol, uint32_t Type,
1313 uint64_t Addend, uint64_t Value = 0);
1314
1315 /// Return a dynamic relocation registered at a given \p Address, or nullptr
1316 /// if there is no dynamic relocation at such address.
1317 const Relocation *getDynamicRelocationAt(uint64_t Address) const;
1318
1319 /// Remove registered relocation at a given \p Address.
1320 bool removeRelocationAt(uint64_t Address);
1321
1322 /// This function makes sure that symbols referenced by ambiguous relocations
1323 /// are marked as immovable. For now, if a section relocation points at the
1324 /// boundary between two symbols then those symbols are marked as immovable.
1325 void markAmbiguousRelocations(BinaryData &BD, const uint64_t Address);
1326
1327 /// Return BinaryFunction corresponding to \p Symbol. If \p EntryDesc is not
1328 /// nullptr, set it to entry descriminator corresponding to \p Symbol
1329 /// (0 for single-entry functions). This function is thread safe.
1330 BinaryFunction *getFunctionForSymbol(const MCSymbol *Symbol,
1331 uint64_t *EntryDesc = nullptr);
1332
1333 const BinaryFunction *
1334 getFunctionForSymbol(const MCSymbol *Symbol,
1335 uint64_t *EntryDesc = nullptr) const {
1336 return const_cast<BinaryContext *>(this)->getFunctionForSymbol(Symbol,
1337 EntryDesc);
1338 }
1339
1340 /// Associate the symbol \p Sym with the function \p BF for lookups with
1341 /// getFunctionForSymbol().
1342 void setSymbolToFunctionMap(const MCSymbol *Sym, BinaryFunction *BF) {
1343 SymbolToFunctionMap[Sym] = BF;
1344 }
1345
1346 /// Populate some internal data structures with debug info.
1347 void preprocessDebugInfo();
1348
1349 /// Add a filename entry from SrcCUID to DestCUID.
1350 unsigned addDebugFilenameToUnit(const uint32_t DestCUID,
1351 const uint32_t SrcCUID, unsigned FileIndex);
1352
1353 /// Return functions in output layout order
1354 std::vector<BinaryFunction *> getSortedFunctions();
1355
1356 /// Do the best effort to calculate the size of the function by emitting
1357 /// its code, and relaxing branch instructions. By default, branch
1358 /// instructions are updated to match the layout. Pass \p FixBranches set to
1359 /// false if the branches are known to be up to date with the code layout.
1360 ///
1361 /// Return the pair where the first size is for the main part, and the second
1362 /// size is for the cold one.
1363 /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
1364 /// function in place so that BinaryBasicBlock::getOutputSize() gives the
1365 /// emitted size of the basic block.
1366 std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
1367 bool FixBranches = true);
1368
1369 /// Calculate the size of the instruction \p Inst optionally using a
1370 /// user-supplied emitter for lock-free multi-thread work. MCCodeEmitter is
1371 /// not thread safe and each thread should operate with its own copy of it.
1372 uint64_t
1373 computeInstructionSize(const MCInst &Inst,
1374 const MCCodeEmitter *Emitter = nullptr) const {
1375 if (std::optional<uint32_t> Size = MIB->getSize(Inst))
1376 return *Size;
1377
1378 if (MIB->isPseudo(Inst))
1379 return 0;
1380
1381 if (std::optional<uint32_t> Size = MIB->getInstructionSize(Inst))
1382 return *Size;
1383
1384 if (!Emitter)
1385 Emitter = this->MCE.get();
1386 SmallString<256> Code;
1387 SmallVector<MCFixup, 4> Fixups;
1388 Emitter->encodeInstruction(Inst, CB&: Code, Fixups, STI: *STI);
1389 return Code.size();
1390 }
1391
1392 /// Compute the native code size for a range of instructions.
1393 /// Note: this can be imprecise wrt the final binary since happening prior to
1394 /// relaxation, as well as wrt the original binary because of opcode
1395 /// shortening.MCCodeEmitter is not thread safe and each thread should operate
1396 /// with its own copy of it.
1397 template <typename Itr>
1398 uint64_t computeCodeSize(Itr Beg, Itr End,
1399 const MCCodeEmitter *Emitter = nullptr) const {
1400 uint64_t Size = 0;
1401 while (Beg != End) {
1402 if (!MIB->isPseudo(Inst: *Beg))
1403 Size += computeInstructionSize(Inst: *Beg, Emitter);
1404 ++Beg;
1405 }
1406 return Size;
1407 }
1408
1409 /// Validate that disassembling the \p Sequence of bytes into an instruction
1410 /// and assembling the instruction again, results in a byte sequence identical
1411 /// to the original one.
1412 bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
1413
1414 /// Return a function execution count threshold for determining whether
1415 /// the function is 'hot'. Consider it hot if count is above the average exec
1416 /// count of profiled functions.
1417 uint64_t getHotThreshold() const;
1418
1419 /// Return true if instruction \p Inst requires an offset for further
1420 /// processing (e.g. assigning a profile).
1421 bool keepOffsetForInstruction(const MCInst &Inst) const {
1422 if (MIB->isCall(Inst) || MIB->isBranch(Inst) || MIB->isReturn(Inst) ||
1423 MIB->isPrefix(Inst) || MIB->isIndirectBranch(Inst)) {
1424 return true;
1425 }
1426 return false;
1427 }
1428
1429 /// Return true if the function should be emitted to the output file.
1430 bool shouldEmit(const BinaryFunction &Function) const;
1431
1432 /// Dump the assembly representation of MCInst to debug output.
1433 void dump(const MCInst &Inst) const;
1434
1435 /// Print the string name for a CFI operation.
1436 static void printCFI(raw_ostream &OS, const MCCFIInstruction &Inst);
1437
1438 /// Print a single MCInst in native format. If Function is non-null,
1439 /// the instruction will be annotated with CFI and possibly DWARF line table
1440 /// info.
1441 /// If printMCInst is true, the instruction is also printed in the
1442 /// architecture independent format.
1443 void printInstruction(raw_ostream &OS, const MCInst &Instruction,
1444 uint64_t Offset = 0,
1445 const BinaryFunction *Function = nullptr,
1446 bool PrintMCInst = false, bool PrintMemData = false,
1447 bool PrintRelocations = false,
1448 StringRef Endl = "\n") const;
1449
1450 /// Print data when embedded in the instruction stream keeping the format
1451 /// similar to printInstruction().
1452 void printData(raw_ostream &OS, ArrayRef<uint8_t> Data,
1453 uint64_t Offset) const;
1454
1455 /// Extract data from the binary corresponding to [Address, Address + Size)
1456 /// range. Return an empty ArrayRef if the address range does not belong to
1457 /// any section in the binary, crosses a section boundary, or falls into a
1458 /// virtual section.
1459 ArrayRef<uint8_t> extractData(uint64_t Address, uint64_t Size) const;
1460
1461 /// Print a range of instructions.
1462 template <typename Itr>
1463 uint64_t
1464 printInstructions(raw_ostream &OS, Itr Begin, Itr End, uint64_t Offset = 0,
1465 const BinaryFunction *Function = nullptr,
1466 bool PrintMCInst = false, bool PrintMemData = false,
1467 bool PrintRelocations = false,
1468 StringRef Endl = "\n") const {
1469 while (Begin != End) {
1470 printInstruction(OS, Instruction: *Begin, Offset, Function, PrintMCInst, PrintMemData,
1471 PrintRelocations, Endl);
1472 Offset += computeCodeSize(Begin, Begin + 1);
1473 ++Begin;
1474 }
1475 return Offset;
1476 }
1477
1478 /// Log BOLT errors to journaling streams and quit process with non-zero error
1479 /// code 1 if error is fatal.
1480 void logBOLTErrorsAndQuitOnFatal(Error E);
1481
1482 std::string generateBugReportMessage(StringRef Message,
1483 const BinaryFunction &Function) const;
1484
1485 struct IndependentCodeEmitter {
1486 std::unique_ptr<MCObjectFileInfo> LocalMOFI;
1487 std::unique_ptr<MCContext> LocalCtx;
1488 std::unique_ptr<MCCodeEmitter> MCE;
1489 };
1490
1491 /// Encapsulates an independent MCCodeEmitter that doesn't share resources
1492 /// with the main one available through BinaryContext::MCE, managed by
1493 /// BinaryContext.
1494 /// This is intended to create a lock-free environment for an auxiliary thread
1495 /// that needs to perform work with an MCCodeEmitter that can be transient or
1496 /// won't be used in the main code emitter.
1497 IndependentCodeEmitter createIndependentMCCodeEmitter() const {
1498 IndependentCodeEmitter MCEInstance;
1499 MCEInstance.LocalCtx.reset(
1500 p: new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
1501 MCEInstance.LocalMOFI.reset(
1502 p: TheTarget->createMCObjectFileInfo(Ctx&: *MCEInstance.LocalCtx,
1503 /*PIC=*/PIC: !HasFixedLoadAddress));
1504 MCEInstance.LocalCtx->setObjectFileInfo(MCEInstance.LocalMOFI.get());
1505 MCEInstance.MCE.reset(
1506 p: TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *MCEInstance.LocalCtx));
1507 return MCEInstance;
1508 }
1509
1510 /// Creating MCStreamer instance.
1511 std::unique_ptr<MCStreamer>
1512 createStreamer(llvm::raw_pwrite_stream &OS) const {
1513 MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(II: *MII, Ctx&: *Ctx);
1514 MCAsmBackend *MAB =
1515 TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCTargetOptions());
1516 std::unique_ptr<MCObjectWriter> OW = MAB->createObjectWriter(OS);
1517 std::unique_ptr<MCStreamer> Streamer(TheTarget->createMCObjectStreamer(
1518 T: *TheTriple, Ctx&: *Ctx, TAB: std::unique_ptr<MCAsmBackend>(MAB), OW: std::move(OW),
1519 Emitter: std::unique_ptr<MCCodeEmitter>(MCE), STI: *STI));
1520 return Streamer;
1521 }
1522
1523 void setIOAddressMap(AddressMap Map) { IOAddressMap = std::move(Map); }
1524 const AddressMap &getIOAddressMap() const {
1525 assert(IOAddressMap && "Address map not set yet");
1526 return *IOAddressMap;
1527 }
1528
1529 raw_ostream &outs() const { return Logger.Out; }
1530
1531 raw_ostream &errs() const { return Logger.Err; }
1532};
1533
1534template <typename T, typename = std::enable_if_t<sizeof(T) == 1>>
1535inline raw_ostream &operator<<(raw_ostream &OS, const ArrayRef<T> &ByteArray) {
1536 const char *Sep = "";
1537 for (const auto Byte : ByteArray) {
1538 OS << Sep << format("%.2x", Byte);
1539 Sep = " ";
1540 }
1541 return OS;
1542}
1543
1544} // namespace bolt
1545} // namespace llvm
1546
1547#endif
1548

source code of bolt/include/bolt/Core/BinaryContext.h