1//===- bolt/Core/BinarySection.h - Section in a binary file -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the BinarySection class, which
10// represents a section in an executable file and contains its properties,
11// flags, contents, and relocations.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef BOLT_CORE_BINARY_SECTION_H
16#define BOLT_CORE_BINARY_SECTION_H
17
18#include "bolt/Core/DebugData.h"
19#include "bolt/Core/Relocation.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/BinaryFormat/ELF.h"
22#include "llvm/Object/ELFObjectFile.h"
23#include "llvm/Object/MachO.h"
24#include "llvm/Object/ObjectFile.h"
25#include "llvm/Support/raw_ostream.h"
26#include <map>
27#include <memory>
28#include <set>
29
30namespace llvm {
31class MCStreamer;
32class MCSymbol;
33
34using namespace object;
35
36namespace bolt {
37
38class BinaryContext;
39class BinaryData;
40
41/// A class to manage binary sections that also manages related relocations.
42class BinarySection {
43 friend class BinaryContext;
44
45 /// Count the number of sections created.
46 static uint64_t Count;
47
48 BinaryContext &BC; // Owning BinaryContext
49 std::string Name; // Section name
50 const SectionRef Section; // SectionRef for input binary sections.
51 StringRef Contents; // Input section contents
52 const uint64_t Address; // Address of section in input binary (may be 0)
53 const uint64_t Size; // Input section size
54 uint64_t InputFileOffset{0}; // Offset in the input binary
55 unsigned Alignment; // alignment in bytes (must be > 0)
56 unsigned ELFType; // ELF section type
57 unsigned ELFFlags; // ELF section flags
58 bool IsRelro{false}; // GNU RELRO section (read-only after relocation)
59
60 // Relocations associated with this section. Relocation offsets are
61 // wrt. to the original section address and size.
62 using RelocationSetType = std::multiset<Relocation, std::less<>>;
63 RelocationSetType Relocations;
64
65 // Dynamic relocations associated with this section. Relocation offsets are
66 // from the original section address.
67 RelocationSetType DynamicRelocations;
68
69 // Pending relocations for this section.
70 std::vector<Relocation> PendingRelocations;
71
72 struct BinaryPatch {
73 uint64_t Offset;
74 SmallString<8> Bytes;
75
76 BinaryPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes)
77 : Offset(Offset), Bytes(Bytes.begin(), Bytes.end()) {}
78 };
79 std::vector<BinaryPatch> Patches;
80 /// Patcher used to apply simple changes to sections of the input binary.
81 std::unique_ptr<BinaryPatcher> Patcher;
82
83 // Output info
84 bool IsFinalized{false}; // Has this section had output information
85 // finalized?
86 std::string OutputName; // Output section name (if the section has
87 // been renamed)
88 uint64_t OutputAddress{0}; // Section address for the rewritten binary.
89 uint64_t OutputSize{0}; // Section size in the rewritten binary.
90 uint64_t OutputFileOffset{0}; // File offset in the rewritten binary file.
91 StringRef OutputContents; // Rewritten section contents.
92 const uint64_t SectionNumber; // Order in which the section was created.
93 std::string SectionID; // Unique ID used for address mapping.
94 // Set by ExecutableFileMemoryManager.
95 uint32_t Index{0}; // Section index in the output file.
96 mutable bool IsReordered{false}; // Have the contents been reordered?
97 bool IsAnonymous{false}; // True if the name should not be included
98 // in the output file.
99 bool IsLinkOnly{false}; // True if the section should not be included
100 // in the output file.
101
102 uint64_t hash(const BinaryData &BD,
103 std::map<const BinaryData *, uint64_t> &Cache) const;
104
105 // non-copyable
106 BinarySection(const BinarySection &) = delete;
107 BinarySection(BinarySection &&) = delete;
108 BinarySection &operator=(const BinarySection &) = delete;
109 BinarySection &operator=(BinarySection &&) = delete;
110
111 static StringRef getName(SectionRef Section) {
112 return cantFail(ValOrErr: Section.getName());
113 }
114 static StringRef getContentsOrQuit(SectionRef Section) {
115 if (Section.getObject()->isELF() &&
116 ELFSectionRef(Section).getType() == ELF::SHT_NOBITS)
117 return StringRef();
118
119 Expected<StringRef> ContentsOrErr = Section.getContents();
120 if (!ContentsOrErr) {
121 Error E = ContentsOrErr.takeError();
122 errs() << "BOLT-ERROR: cannot get section contents for "
123 << getName(Section) << ": " << E << ".\n";
124 exit(status: 1);
125 }
126 return *ContentsOrErr;
127 }
128
129 /// Get the set of relocations referring to data in this section that
130 /// has been reordered. The relocation offsets will be modified to
131 /// reflect the new data locations.
132 RelocationSetType reorderRelocations(bool Inplace) const;
133
134 /// Set output info for this section.
135 void update(uint8_t *NewData, uint64_t NewSize, unsigned NewAlignment,
136 unsigned NewELFType, unsigned NewELFFlags) {
137 assert(NewAlignment > 0 && "section alignment must be > 0");
138 Alignment = NewAlignment;
139 ELFType = NewELFType;
140 ELFFlags = NewELFFlags;
141 updateContents(NewData, NewSize);
142 }
143
144public:
145 /// Copy a section.
146 explicit BinarySection(BinaryContext &BC, const Twine &Name,
147 const BinarySection &Section)
148 : BC(BC), Name(Name.str()), Section(SectionRef()),
149 Contents(Section.getContents()), Address(Section.getAddress()),
150 Size(Section.getSize()), Alignment(Section.getAlignment()),
151 ELFType(Section.getELFType()), ELFFlags(Section.getELFFlags()),
152 Relocations(Section.Relocations),
153 PendingRelocations(Section.PendingRelocations), OutputName(Name.str()),
154 SectionNumber(++Count) {}
155
156 BinarySection(BinaryContext &BC, SectionRef Section)
157 : BC(BC), Name(getName(Section)), Section(Section),
158 Contents(getContentsOrQuit(Section)), Address(Section.getAddress()),
159 Size(Section.getSize()), Alignment(Section.getAlignment().value()),
160 OutputName(Name), SectionNumber(++Count) {
161 if (isELF()) {
162 ELFType = ELFSectionRef(Section).getType();
163 ELFFlags = ELFSectionRef(Section).getFlags();
164 InputFileOffset = ELFSectionRef(Section).getOffset();
165 } else if (isMachO()) {
166 auto *O = cast<MachOObjectFile>(Val: Section.getObject());
167 InputFileOffset =
168 O->is64Bit() ? O->getSection64(DRI: Section.getRawDataRefImpl()).offset
169 : O->getSection(DRI: Section.getRawDataRefImpl()).offset;
170 }
171 }
172
173 // TODO: pass Data as StringRef/ArrayRef? use StringRef::copy method.
174 BinarySection(BinaryContext &BC, const Twine &Name, uint8_t *Data,
175 uint64_t Size, unsigned Alignment, unsigned ELFType,
176 unsigned ELFFlags)
177 : BC(BC), Name(Name.str()),
178 Contents(reinterpret_cast<const char *>(Data), Data ? Size : 0),
179 Address(0), Size(Size), Alignment(Alignment), ELFType(ELFType),
180 ELFFlags(ELFFlags), IsFinalized(true), OutputName(Name.str()),
181 OutputSize(Size), OutputContents(Contents), SectionNumber(++Count) {
182 assert(Alignment > 0 && "section alignment must be > 0");
183 }
184
185 ~BinarySection();
186
187 /// Helper function to generate the proper ELF flags from section properties.
188 static unsigned getFlags(bool IsReadOnly = true, bool IsText = false,
189 bool IsAllocatable = false) {
190 unsigned Flags = 0;
191 if (IsAllocatable)
192 Flags |= ELF::SHF_ALLOC;
193 if (!IsReadOnly)
194 Flags |= ELF::SHF_WRITE;
195 if (IsText)
196 Flags |= ELF::SHF_EXECINSTR;
197 return Flags;
198 }
199
200 operator bool() const { return ELFType != ELF::SHT_NULL; }
201
202 bool operator==(const BinarySection &Other) const {
203 return (Name == Other.Name && Address == Other.Address &&
204 Size == Other.Size && getData() == Other.getData() &&
205 Alignment == Other.Alignment && ELFType == Other.ELFType &&
206 ELFFlags == Other.ELFFlags);
207 }
208
209 bool operator!=(const BinarySection &Other) const {
210 return !operator==(Other);
211 }
212
213 // Order sections by their immutable properties.
214 bool operator<(const BinarySection &Other) const {
215 // Allocatable before non-allocatable.
216 if (isAllocatable() != Other.isAllocatable())
217 return isAllocatable() > Other.isAllocatable();
218
219 // Input sections take precedence.
220 if (hasSectionRef() != Other.hasSectionRef())
221 return hasSectionRef() > Other.hasSectionRef();
222
223 // Compare allocatable input sections by their address.
224 if (hasSectionRef() && getAddress() != Other.getAddress())
225 return getAddress() < Other.getAddress();
226 if (hasSectionRef() && getAddress() && getSize() != Other.getSize())
227 return getSize() < Other.getSize();
228
229 // Code before data.
230 if (isText() != Other.isText())
231 return isText() > Other.isText();
232
233 // Read-only before writable.
234 if (isWritable() != Other.isWritable())
235 return isWritable() < Other.isWritable();
236
237 // BSS at the end.
238 if (isBSS() != Other.isBSS())
239 return isBSS() < Other.isBSS();
240
241 // Otherwise, preserve the order of creation.
242 return SectionNumber < Other.SectionNumber;
243 }
244
245 ///
246 /// Basic property access.
247 ///
248 BinaryContext &getBinaryContext() { return BC; }
249 bool isELF() const;
250 bool isMachO() const;
251 StringRef getName() const { return Name; }
252 uint64_t getAddress() const { return Address; }
253 uint64_t getEndAddress() const { return Address + Size; }
254 uint64_t getSize() const { return Size; }
255 uint64_t getInputFileOffset() const { return InputFileOffset; }
256 Align getAlign() const { return Align(Alignment); }
257 uint64_t getAlignment() const { return Alignment; }
258 bool isText() const {
259 if (isELF())
260 return (ELFFlags & ELF::SHF_EXECINSTR);
261 return hasSectionRef() && getSectionRef().isText();
262 }
263 bool isData() const {
264 if (isELF())
265 return (ELFType == ELF::SHT_PROGBITS &&
266 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
267 return hasSectionRef() && getSectionRef().isData();
268 }
269 bool isBSS() const {
270 return (ELFType == ELF::SHT_NOBITS &&
271 (ELFFlags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)));
272 }
273 bool isTLS() const { return (ELFFlags & ELF::SHF_TLS); }
274 bool isTBSS() const { return isBSS() && isTLS(); }
275 bool isVirtual() const { return ELFType == ELF::SHT_NOBITS; }
276 bool isRela() const { return ELFType == ELF::SHT_RELA; }
277 bool isRelr() const { return ELFType == ELF::SHT_RELR; }
278 bool isWritable() const { return (ELFFlags & ELF::SHF_WRITE); }
279 bool isAllocatable() const {
280 if (isELF()) {
281 return (ELFFlags & ELF::SHF_ALLOC) && !isTBSS();
282 } else {
283 // On non-ELF assume all sections are allocatable.
284 return true;
285 }
286 }
287 bool isReordered() const { return IsReordered; }
288 bool isAnonymous() const { return IsAnonymous; }
289 bool isRelro() const { return IsRelro; }
290 void setRelro() { IsRelro = true; }
291 unsigned getELFType() const { return ELFType; }
292 unsigned getELFFlags() const { return ELFFlags; }
293
294 uint8_t *getData() {
295 return reinterpret_cast<uint8_t *>(
296 const_cast<char *>(getContents().data()));
297 }
298 const uint8_t *getData() const {
299 return reinterpret_cast<const uint8_t *>(getContents().data());
300 }
301 StringRef getContents() const { return Contents; }
302 void clearContents() { Contents = {}; }
303 bool hasSectionRef() const { return Section != SectionRef(); }
304 SectionRef getSectionRef() const { return Section; }
305
306 /// Does this section contain the given \p Address?
307 /// Note: this is in terms of the original mapped binary addresses.
308 bool containsAddress(uint64_t Address) const {
309 return (getAddress() <= Address && Address < getEndAddress()) ||
310 (getSize() == 0 && getAddress() == Address);
311 }
312
313 /// Does this section contain the range [\p Address, \p Address + \p Size)?
314 /// Note: this is in terms of the original mapped binary addresses.
315 bool containsRange(uint64_t Address, uint64_t Size) const {
316 return containsAddress(Address) && Address + Size <= getEndAddress();
317 }
318
319 /// Iterate over all non-pending relocations for this section.
320 iterator_range<RelocationSetType::iterator> relocations() {
321 return make_range(x: Relocations.begin(), y: Relocations.end());
322 }
323
324 /// Iterate over all non-pending relocations for this section.
325 iterator_range<RelocationSetType::const_iterator> relocations() const {
326 return make_range(x: Relocations.begin(), y: Relocations.end());
327 }
328
329 /// Iterate over all dynamic relocations for this section.
330 iterator_range<RelocationSetType::iterator> dynamicRelocations() {
331 return make_range(x: DynamicRelocations.begin(), y: DynamicRelocations.end());
332 }
333
334 /// Iterate over all dynamic relocations for this section.
335 iterator_range<RelocationSetType::const_iterator> dynamicRelocations() const {
336 return make_range(x: DynamicRelocations.begin(), y: DynamicRelocations.end());
337 }
338
339 /// Does this section have any non-pending relocations?
340 bool hasRelocations() const { return !Relocations.empty(); }
341
342 /// Does this section have any pending relocations?
343 bool hasPendingRelocations() const { return !PendingRelocations.empty(); }
344
345 /// Remove non-pending relocation with the given /p Offset.
346 bool removeRelocationAt(uint64_t Offset) {
347 auto Itr = Relocations.find(x: Offset);
348 if (Itr != Relocations.end()) {
349 auto End = Relocations.upper_bound(x: Offset);
350 Relocations.erase(first: Itr, last: End);
351 return true;
352 }
353 return false;
354 }
355
356 void clearRelocations();
357
358 /// Add a new relocation at the given /p Offset.
359 void addRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
360 uint64_t Addend, uint64_t Value = 0,
361 bool Pending = false) {
362 assert(Offset < getSize() && "offset not within section bounds");
363 if (!Pending) {
364 Relocations.emplace(args: Relocation{.Offset: Offset, .Symbol: Symbol, .Type: Type, .Addend: Addend, .Value: Value});
365 } else {
366 PendingRelocations.emplace_back(
367 args: Relocation{.Offset: Offset, .Symbol: Symbol, .Type: Type, .Addend: Addend, .Value: Value});
368 }
369 }
370
371 /// Add a dynamic relocation at the given /p Offset.
372 void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
373 uint64_t Addend, uint64_t Value = 0) {
374 addDynamicRelocation(Reloc: Relocation{.Offset: Offset, .Symbol: Symbol, .Type: Type, .Addend: Addend, .Value: Value});
375 }
376
377 void addDynamicRelocation(const Relocation &Reloc) {
378 assert(Reloc.Offset < getSize() && "offset not within section bounds");
379 DynamicRelocations.emplace(args: Reloc);
380 }
381
382 /// Add relocation against the original contents of this section.
383 void addPendingRelocation(const Relocation &Rel) {
384 PendingRelocations.push_back(x: Rel);
385 }
386
387 /// Add patch to the input contents of this section.
388 void addPatch(uint64_t Offset, const SmallVectorImpl<char> &Bytes) {
389 Patches.emplace_back(args: BinaryPatch(Offset, Bytes));
390 }
391
392 /// Register patcher for this section.
393 void registerPatcher(std::unique_ptr<BinaryPatcher> BPatcher) {
394 Patcher = std::move(BPatcher);
395 }
396
397 /// Returns the patcher
398 BinaryPatcher *getPatcher() { return Patcher.get(); }
399
400 /// Lookup the relocation (if any) at the given /p Offset.
401 const Relocation *getRelocationAt(uint64_t Offset) const {
402 auto Itr = Relocations.find(x: Offset);
403 return Itr != Relocations.end() ? &*Itr : nullptr;
404 }
405
406 /// Lookup the relocation (if any) at the given /p Offset.
407 const Relocation *getDynamicRelocationAt(uint64_t Offset) const {
408 Relocation Key{.Offset: Offset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0};
409 auto Itr = DynamicRelocations.find(x: Key);
410 return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
411 }
412
413 std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) {
414 Relocation Key{.Offset: Offset, .Symbol: 0, .Type: 0, .Addend: 0, .Value: 0};
415 auto Itr = DynamicRelocations.find(x: Key);
416
417 if (Itr == DynamicRelocations.end())
418 return std::nullopt;
419
420 Relocation Reloc = *Itr;
421 DynamicRelocations.erase(position: Itr);
422 return Reloc;
423 }
424
425 uint64_t hash(const BinaryData &BD) const {
426 std::map<const BinaryData *, uint64_t> Cache;
427 return hash(BD, Cache);
428 }
429
430 ///
431 /// Property accessors related to output data.
432 ///
433
434 bool isFinalized() const { return IsFinalized; }
435 void setIsFinalized() { IsFinalized = true; }
436 StringRef getOutputName() const { return OutputName; }
437 uint64_t getOutputSize() const { return OutputSize; }
438 uint8_t *getOutputData() {
439 return reinterpret_cast<uint8_t *>(
440 const_cast<char *>(getOutputContents().data()));
441 }
442 const uint8_t *getOutputData() const {
443 return reinterpret_cast<const uint8_t *>(getOutputContents().data());
444 }
445 StringRef getOutputContents() const { return OutputContents; }
446 uint64_t getAllocAddress() const {
447 return reinterpret_cast<uint64_t>(getOutputData());
448 }
449 uint64_t getOutputAddress() const { return OutputAddress; }
450 uint64_t getOutputFileOffset() const { return OutputFileOffset; }
451 StringRef getSectionID() const {
452 assert(hasValidSectionID() && "trying to use uninitialized section id");
453 return SectionID;
454 }
455 bool hasValidSectionID() const { return !SectionID.empty(); }
456 bool hasValidIndex() { return Index != 0; }
457 uint32_t getIndex() const { return Index; }
458
459 // mutation
460 void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
461 void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
462 void setSectionID(StringRef ID) {
463 assert(!hasValidSectionID() && "trying to set section id twice");
464 SectionID = ID;
465 }
466 void setIndex(uint32_t I) { Index = I; }
467 void setOutputName(const Twine &Name) { OutputName = Name.str(); }
468 void setAnonymous(bool Flag) { IsAnonymous = Flag; }
469 bool isLinkOnly() const { return IsLinkOnly; }
470 void setLinkOnly() { IsLinkOnly = true; }
471
472 /// Emit the section as data, possibly with relocations.
473 /// Use name \p SectionName for the section during the emission.
474 void emitAsData(MCStreamer &Streamer, const Twine &SectionName) const;
475
476 using SymbolResolverFuncTy = llvm::function_ref<uint64_t(const MCSymbol *)>;
477
478 /// Flush all pending relocations to patch original contents of sections
479 /// that were not emitted via MCStreamer.
480 void flushPendingRelocations(raw_pwrite_stream &OS,
481 SymbolResolverFuncTy Resolver);
482
483 /// Change contents of the section. Unless the section has a valid SectionID,
484 /// the memory passed in \p NewData will be managed by the instance of
485 /// BinarySection.
486 void updateContents(const uint8_t *NewData, size_t NewSize) {
487 if (getOutputData() && !hasValidSectionID() &&
488 (!hasSectionRef() ||
489 OutputContents.data() != getContentsOrQuit(Section).data())) {
490 delete[] getOutputData();
491 }
492
493 OutputContents = StringRef(reinterpret_cast<const char *>(NewData),
494 NewData ? NewSize : 0);
495 OutputSize = NewSize;
496 IsFinalized = true;
497 }
498
499 /// Reorder the contents of this section according to /p Order. If
500 /// /p Inplace is true, the entire contents of the section is reordered,
501 /// otherwise the new contents contain only the reordered data.
502 void reorderContents(const std::vector<BinaryData *> &Order, bool Inplace);
503
504 void print(raw_ostream &OS) const;
505
506 /// Write the contents of an ELF note section given the name of the producer,
507 /// a number identifying the type of note and the contents of the note in
508 /// \p DescStr.
509 static std::string encodeELFNote(StringRef NameStr, StringRef DescStr,
510 uint32_t Type);
511
512 /// Code for ELF notes written by producer 'BOLT'
513 enum { NT_BOLT_BAT = 1, NT_BOLT_INSTRUMENTATION_TABLES = 2 };
514};
515
516inline uint8_t *copyByteArray(const uint8_t *Data, uint64_t Size) {
517 auto *Array = new uint8_t[Size];
518 memcpy(dest: Array, src: Data, n: Size);
519 return Array;
520}
521
522inline uint8_t *copyByteArray(StringRef Buffer) {
523 return copyByteArray(Data: reinterpret_cast<const uint8_t *>(Buffer.data()),
524 Size: Buffer.size());
525}
526
527inline uint8_t *copyByteArray(ArrayRef<char> Buffer) {
528 return copyByteArray(Data: reinterpret_cast<const uint8_t *>(Buffer.data()),
529 Size: Buffer.size());
530}
531
532inline raw_ostream &operator<<(raw_ostream &OS, const BinarySection &Section) {
533 Section.print(OS);
534 return OS;
535}
536
537} // namespace bolt
538} // namespace llvm
539
540#endif
541

source code of bolt/include/bolt/Core/BinarySection.h