1//===- SyntheticSections.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10#define LLD_MACHO_SYNTHETIC_SECTIONS_H
11
12#include "Config.h"
13#include "ExportTrie.h"
14#include "InputSection.h"
15#include "OutputSection.h"
16#include "OutputSegment.h"
17#include "Target.h"
18#include "Writer.h"
19
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/Hashing.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/BinaryFormat/MachO.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <unordered_map>
28
29namespace llvm {
30class DWARFUnit;
31} // namespace llvm
32
33namespace lld::macho {
34
35class Defined;
36class DylibSymbol;
37class LoadCommand;
38class ObjFile;
39class UnwindInfoSection;
40
41class SyntheticSection : public OutputSection {
42public:
43 SyntheticSection(const char *segname, const char *name);
44 virtual ~SyntheticSection() = default;
45
46 static bool classof(const OutputSection *sec) {
47 return sec->kind() == SyntheticKind;
48 }
49
50 StringRef segname;
51 // This fake InputSection makes it easier for us to write code that applies
52 // generically to both user inputs and synthetics.
53 InputSection *isec;
54};
55
56// All sections in __LINKEDIT should inherit from this.
57class LinkEditSection : public SyntheticSection {
58public:
59 LinkEditSection(const char *segname, const char *name)
60 : SyntheticSection(segname, name) {
61 align = target->wordSize;
62 }
63
64 // Implementations of this method can assume that the regular (non-__LINKEDIT)
65 // sections already have their addresses assigned.
66 virtual void finalizeContents() {}
67
68 // Sections in __LINKEDIT are special: their offsets are recorded in the
69 // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
70 // headers.
71 bool isHidden() const final { return true; }
72
73 virtual uint64_t getRawSize() const = 0;
74
75 // codesign (or more specifically libstuff) checks that each section in
76 // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
77 // therefore align every section's start and end points to WordSize.
78 //
79 // NOTE: This assumes that the extra bytes required for alignment can be
80 // zero-valued bytes.
81 uint64_t getSize() const final { return llvm::alignTo(Value: getRawSize(), Align: align); }
82};
83
84// The header of the Mach-O file, which must have a file offset of zero.
85class MachHeaderSection final : public SyntheticSection {
86public:
87 MachHeaderSection();
88 bool isHidden() const override { return true; }
89 uint64_t getSize() const override;
90 void writeTo(uint8_t *buf) const override;
91
92 void addLoadCommand(LoadCommand *);
93
94protected:
95 std::vector<LoadCommand *> loadCommands;
96 uint32_t sizeOfCmds = 0;
97};
98
99// A hidden section that exists solely for the purpose of creating the
100// __PAGEZERO segment, which is used to catch null pointer dereferences.
101class PageZeroSection final : public SyntheticSection {
102public:
103 PageZeroSection();
104 bool isHidden() const override { return true; }
105 bool isNeeded() const override { return target->pageZeroSize != 0; }
106 uint64_t getSize() const override { return target->pageZeroSize; }
107 uint64_t getFileSize() const override { return 0; }
108 void writeTo(uint8_t *buf) const override {}
109};
110
111// This is the base class for the GOT and TLVPointer sections, which are nearly
112// functionally identical -- they will both be populated by dyld with addresses
113// to non-lazily-loaded dylib symbols. The main difference is that the
114// TLVPointerSection stores references to thread-local variables.
115class NonLazyPointerSectionBase : public SyntheticSection {
116public:
117 NonLazyPointerSectionBase(const char *segname, const char *name);
118 const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
119 bool isNeeded() const override { return !entries.empty(); }
120 uint64_t getSize() const override {
121 return entries.size() * target->wordSize;
122 }
123 void writeTo(uint8_t *buf) const override;
124 void addEntry(Symbol *sym);
125 uint64_t getVA(uint32_t gotIndex) const {
126 return addr + gotIndex * target->wordSize;
127 }
128
129private:
130 llvm::SetVector<const Symbol *> entries;
131};
132
133class GotSection final : public NonLazyPointerSectionBase {
134public:
135 GotSection();
136};
137
138class TlvPointerSection final : public NonLazyPointerSectionBase {
139public:
140 TlvPointerSection();
141};
142
143struct Location {
144 const InputSection *isec;
145 uint64_t offset;
146
147 Location(const InputSection *isec, uint64_t offset)
148 : isec(isec), offset(offset) {}
149 uint64_t getVA() const { return isec->getVA(off: offset); }
150};
151
152// Stores rebase opcodes, which tell dyld where absolute addresses have been
153// encoded in the binary. If the binary is not loaded at its preferred address,
154// dyld has to rebase these addresses by adding an offset to them.
155class RebaseSection final : public LinkEditSection {
156public:
157 RebaseSection();
158 void finalizeContents() override;
159 uint64_t getRawSize() const override { return contents.size(); }
160 bool isNeeded() const override { return !locations.empty(); }
161 void writeTo(uint8_t *buf) const override;
162
163 void addEntry(const InputSection *isec, uint64_t offset) {
164 if (config->isPic)
165 locations.emplace_back(args&: isec, args&: offset);
166 }
167
168private:
169 std::vector<Location> locations;
170 SmallVector<char, 128> contents;
171};
172
173struct BindingEntry {
174 int64_t addend;
175 Location target;
176 BindingEntry(int64_t addend, Location target)
177 : addend(addend), target(target) {}
178};
179
180template <class Sym>
181using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
182
183// Stores bind opcodes for telling dyld which symbols to load non-lazily.
184class BindingSection final : public LinkEditSection {
185public:
186 BindingSection();
187 void finalizeContents() override;
188 uint64_t getRawSize() const override { return contents.size(); }
189 bool isNeeded() const override { return !bindingsMap.empty(); }
190 void writeTo(uint8_t *buf) const override;
191
192 void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset,
193 int64_t addend = 0) {
194 bindingsMap[dysym].emplace_back(args&: addend, args: Location(isec, offset));
195 }
196
197private:
198 BindingsMap<const Symbol *> bindingsMap;
199 SmallVector<char, 128> contents;
200};
201
202// Stores bind opcodes for telling dyld which weak symbols need coalescing.
203// There are two types of entries in this section:
204//
205// 1) Non-weak definitions: This is a symbol definition that weak symbols in
206// other dylibs should coalesce to.
207//
208// 2) Weak bindings: These tell dyld that a given symbol reference should
209// coalesce to a non-weak definition if one is found. Note that unlike the
210// entries in the BindingSection, the bindings here only refer to these
211// symbols by name, but do not specify which dylib to load them from.
212class WeakBindingSection final : public LinkEditSection {
213public:
214 WeakBindingSection();
215 void finalizeContents() override;
216 uint64_t getRawSize() const override { return contents.size(); }
217 bool isNeeded() const override {
218 return !bindingsMap.empty() || !definitions.empty();
219 }
220
221 void writeTo(uint8_t *buf) const override;
222
223 void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
224 int64_t addend = 0) {
225 bindingsMap[symbol].emplace_back(args&: addend, args: Location(isec, offset));
226 }
227
228 bool hasEntry() const { return !bindingsMap.empty(); }
229
230 void addNonWeakDefinition(const Defined *defined) {
231 definitions.emplace_back(args&: defined);
232 }
233
234 bool hasNonWeakDefinition() const { return !definitions.empty(); }
235
236private:
237 BindingsMap<const Symbol *> bindingsMap;
238 std::vector<const Defined *> definitions;
239 SmallVector<char, 128> contents;
240};
241
242// The following sections implement lazy symbol binding -- very similar to the
243// PLT mechanism in ELF.
244//
245// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
246// and StubHelperSection. Calls to functions in dylibs will end up calling into
247// StubsSection, which contains indirect jumps to addresses stored in the
248// LazyPointerSection (the counterpart to ELF's .plt.got).
249//
250// We will first describe how non-weak symbols are handled.
251//
252// At program start, the LazyPointerSection contains addresses that point into
253// one of the entry points in the middle of the StubHelperSection. The code in
254// StubHelperSection will push on the stack an offset into the
255// LazyBindingSection. The push is followed by a jump to the beginning of the
256// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
257// dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
258// the GOT.
259//
260// The stub binder will look up the bind opcodes in the LazyBindingSection at
261// the given offset. The bind opcodes will tell the binder to update the
262// address in the LazyPointerSection to point to the symbol, so that subsequent
263// calls don't have to redo the symbol resolution. The binder will then jump to
264// the resolved symbol.
265//
266// With weak symbols, the situation is slightly different. Since there is no
267// "weak lazy" lookup, function calls to weak symbols are always non-lazily
268// bound. We emit both regular non-lazy bindings as well as weak bindings, in
269// order that the weak bindings may overwrite the non-lazy bindings if an
270// appropriate symbol is found at runtime. However, the bound addresses will
271// still be written (non-lazily) into the LazyPointerSection.
272//
273// Symbols are always bound eagerly when chained fixups are used. In that case,
274// StubsSection contains indirect jumps to addresses stored in the GotSection.
275// The GOT directly contains the fixup entries, which will be replaced by the
276// address of the target symbols on load. LazyPointerSection and
277// StubHelperSection are not used.
278
279class StubsSection final : public SyntheticSection {
280public:
281 StubsSection();
282 uint64_t getSize() const override;
283 bool isNeeded() const override { return !entries.empty(); }
284 void finalize() override;
285 void writeTo(uint8_t *buf) const override;
286 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
287 // Creates a stub for the symbol and the corresponding entry in the
288 // LazyPointerSection.
289 void addEntry(Symbol *);
290 uint64_t getVA(uint32_t stubsIndex) const {
291 assert(isFinal || target->usesThunks());
292 // ConcatOutputSection::finalize() can seek the address of a
293 // stub before its address is assigned. Before __stubs is
294 // finalized, return a contrived out-of-range address.
295 return isFinal ? addr + stubsIndex * target->stubSize
296 : TargetInfo::outOfRangeVA;
297 }
298
299 bool isFinal = false; // is address assigned?
300
301private:
302 llvm::SetVector<Symbol *> entries;
303};
304
305class StubHelperSection final : public SyntheticSection {
306public:
307 StubHelperSection();
308 uint64_t getSize() const override;
309 bool isNeeded() const override;
310 void writeTo(uint8_t *buf) const override;
311
312 void setUp();
313
314 DylibSymbol *stubBinder = nullptr;
315 Defined *dyldPrivate = nullptr;
316};
317
318// Objective-C stubs are hoisted objc_msgSend calls per selector called in the
319// program. Apple Clang produces undefined symbols to each stub, such as
320// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
321// load the particular selector 'foo' from __objc_selrefs, setting it to the
322// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
323// actual stub contents are mirrored from ld64.
324class ObjCStubsSection final : public SyntheticSection {
325public:
326 ObjCStubsSection();
327 void initialize();
328 void addEntry(Symbol *sym);
329 uint64_t getSize() const override;
330 bool isNeeded() const override { return !symbols.empty(); }
331 void finalize() override { isec->isFinal = true; }
332 void writeTo(uint8_t *buf) const override;
333 void setUp();
334
335 static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
336 static bool isObjCStubSymbol(Symbol *sym);
337 static StringRef getMethname(Symbol *sym);
338
339private:
340 std::vector<Defined *> symbols;
341 llvm::DenseMap<llvm::CachedHashStringRef, InputSection *> methnameToSelref;
342 Symbol *objcMsgSend = nullptr;
343};
344
345// Note that this section may also be targeted by non-lazy bindings. In
346// particular, this happens when branch relocations target weak symbols.
347class LazyPointerSection final : public SyntheticSection {
348public:
349 LazyPointerSection();
350 uint64_t getSize() const override;
351 bool isNeeded() const override;
352 void writeTo(uint8_t *buf) const override;
353 uint64_t getVA(uint32_t index) const {
354 return addr + (index << target->p2WordSize);
355 }
356};
357
358class LazyBindingSection final : public LinkEditSection {
359public:
360 LazyBindingSection();
361 void finalizeContents() override;
362 uint64_t getRawSize() const override { return contents.size(); }
363 bool isNeeded() const override { return !entries.empty(); }
364 void writeTo(uint8_t *buf) const override;
365 // Note that every entry here will by referenced by a corresponding entry in
366 // the StubHelperSection.
367 void addEntry(Symbol *dysym);
368 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
369
370private:
371 uint32_t encode(const Symbol &);
372
373 llvm::SetVector<Symbol *> entries;
374 SmallVector<char, 128> contents;
375 llvm::raw_svector_ostream os{contents};
376};
377
378// Stores a trie that describes the set of exported symbols.
379class ExportSection final : public LinkEditSection {
380public:
381 ExportSection();
382 void finalizeContents() override;
383 uint64_t getRawSize() const override { return size; }
384 bool isNeeded() const override { return size; }
385 void writeTo(uint8_t *buf) const override;
386
387 bool hasWeakSymbol = false;
388
389private:
390 TrieBuilder trieBuilder;
391 size_t size = 0;
392};
393
394// Stores 'data in code' entries that describe the locations of data regions
395// inside code sections. This is used by llvm-objdump to distinguish jump tables
396// and stop them from being disassembled as instructions.
397class DataInCodeSection final : public LinkEditSection {
398public:
399 DataInCodeSection();
400 void finalizeContents() override;
401 uint64_t getRawSize() const override {
402 return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
403 }
404 void writeTo(uint8_t *buf) const override;
405
406private:
407 std::vector<llvm::MachO::data_in_code_entry> entries;
408};
409
410// Stores ULEB128 delta encoded addresses of functions.
411class FunctionStartsSection final : public LinkEditSection {
412public:
413 FunctionStartsSection();
414 void finalizeContents() override;
415 uint64_t getRawSize() const override { return contents.size(); }
416 void writeTo(uint8_t *buf) const override;
417
418private:
419 SmallVector<char, 128> contents;
420};
421
422// Stores the strings referenced by the symbol table.
423class StringTableSection final : public LinkEditSection {
424public:
425 StringTableSection();
426 // Returns the start offset of the added string.
427 uint32_t addString(StringRef);
428 uint64_t getRawSize() const override { return size; }
429 void writeTo(uint8_t *buf) const override;
430
431 static constexpr size_t emptyStringIndex = 1;
432
433private:
434 // ld64 emits string tables which start with a space and a zero byte. We
435 // match its behavior here since some tools depend on it.
436 // Consequently, the empty string will be at index 1, not zero.
437 std::vector<StringRef> strings{" "};
438 size_t size = 2;
439};
440
441struct SymtabEntry {
442 Symbol *sym;
443 size_t strx;
444};
445
446struct StabsEntry {
447 uint8_t type = 0;
448 uint32_t strx = StringTableSection::emptyStringIndex;
449 uint8_t sect = 0;
450 uint16_t desc = 0;
451 uint64_t value = 0;
452
453 StabsEntry() = default;
454 explicit StabsEntry(uint8_t type) : type(type) {}
455};
456
457// Symbols of the same type must be laid out contiguously: we choose to emit
458// all local symbols first, then external symbols, and finally undefined
459// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
460// range (start index and total number) of those symbols in the symbol table.
461class SymtabSection : public LinkEditSection {
462public:
463 void finalizeContents() override;
464 uint32_t getNumSymbols() const;
465 uint32_t getNumLocalSymbols() const {
466 return stabs.size() + localSymbols.size();
467 }
468 uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
469 uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
470
471private:
472 void emitBeginSourceStab(StringRef);
473 void emitEndSourceStab();
474 void emitObjectFileStab(ObjFile *);
475 void emitEndFunStab(Defined *);
476 void emitStabs();
477
478protected:
479 SymtabSection(StringTableSection &);
480
481 StringTableSection &stringTableSection;
482 // STABS symbols are always local symbols, but we represent them with special
483 // entries because they may use fields like n_sect and n_desc differently.
484 std::vector<StabsEntry> stabs;
485 std::vector<SymtabEntry> localSymbols;
486 std::vector<SymtabEntry> externalSymbols;
487 std::vector<SymtabEntry> undefinedSymbols;
488};
489
490template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
491
492// The indirect symbol table is a list of 32-bit integers that serve as indices
493// into the (actual) symbol table. The indirect symbol table is a
494// concatenation of several sub-arrays of indices, each sub-array belonging to
495// a separate section. The starting offset of each sub-array is stored in the
496// reserved1 header field of the respective section.
497//
498// These sub-arrays provide symbol information for sections that store
499// contiguous sequences of symbol references. These references can be pointers
500// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
501// function stubs).
502class IndirectSymtabSection final : public LinkEditSection {
503public:
504 IndirectSymtabSection();
505 void finalizeContents() override;
506 uint32_t getNumSymbols() const;
507 uint64_t getRawSize() const override {
508 return getNumSymbols() * sizeof(uint32_t);
509 }
510 bool isNeeded() const override;
511 void writeTo(uint8_t *buf) const override;
512};
513
514// The code signature comes at the very end of the linked output file.
515class CodeSignatureSection final : public LinkEditSection {
516public:
517 // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
518 // and any changes here, should be repeated there.
519 static constexpr uint8_t blockSizeShift = 12;
520 static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
521 static constexpr size_t hashSize = 256 / 8;
522 static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
523 Value: sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
524 static constexpr uint32_t fixedHeadersSize =
525 blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
526
527 uint32_t fileNamePad = 0;
528 uint32_t allHeadersSize = 0;
529 StringRef fileName;
530
531 CodeSignatureSection();
532 uint64_t getRawSize() const override;
533 bool isNeeded() const override { return true; }
534 void writeTo(uint8_t *buf) const override;
535 uint32_t getBlockCount() const;
536 void writeHashes(uint8_t *buf) const;
537};
538
539class CStringSection : public SyntheticSection {
540public:
541 CStringSection(const char *name);
542 void addInput(CStringInputSection *);
543 uint64_t getSize() const override { return size; }
544 virtual void finalizeContents();
545 bool isNeeded() const override { return !inputs.empty(); }
546 void writeTo(uint8_t *buf) const override;
547
548 std::vector<CStringInputSection *> inputs;
549
550private:
551 uint64_t size;
552};
553
554class DeduplicatedCStringSection final : public CStringSection {
555public:
556 DeduplicatedCStringSection(const char *name) : CStringSection(name){};
557 uint64_t getSize() const override { return size; }
558 void finalizeContents() override;
559 void writeTo(uint8_t *buf) const override;
560
561 struct StringOffset {
562 uint8_t trailingZeros;
563 uint64_t outSecOff = UINT64_MAX;
564
565 explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
566 };
567
568 StringOffset getStringOffset(StringRef str) const;
569
570private:
571 llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
572 size_t size = 0;
573};
574
575/*
576 * This section contains deduplicated literal values. The 16-byte values are
577 * laid out first, followed by the 8- and then the 4-byte ones.
578 */
579class WordLiteralSection final : public SyntheticSection {
580public:
581 using UInt128 = std::pair<uint64_t, uint64_t>;
582 // I don't think the standard guarantees the size of a pair, so let's make
583 // sure it's exact -- that way we can construct it via `mmap`.
584 static_assert(sizeof(UInt128) == 16);
585
586 WordLiteralSection();
587 void addInput(WordLiteralInputSection *);
588 void finalizeContents();
589 void writeTo(uint8_t *buf) const override;
590
591 uint64_t getSize() const override {
592 return literal16Map.size() * 16 + literal8Map.size() * 8 +
593 literal4Map.size() * 4;
594 }
595
596 bool isNeeded() const override {
597 return !literal16Map.empty() || !literal4Map.empty() ||
598 !literal8Map.empty();
599 }
600
601 uint64_t getLiteral16Offset(uintptr_t buf) const {
602 return literal16Map.at(k: *reinterpret_cast<const UInt128 *>(buf)) * 16;
603 }
604
605 uint64_t getLiteral8Offset(uintptr_t buf) const {
606 return literal16Map.size() * 16 +
607 literal8Map.at(k: *reinterpret_cast<const uint64_t *>(buf)) * 8;
608 }
609
610 uint64_t getLiteral4Offset(uintptr_t buf) const {
611 return literal16Map.size() * 16 + literal8Map.size() * 8 +
612 literal4Map.at(k: *reinterpret_cast<const uint32_t *>(buf)) * 4;
613 }
614
615private:
616 std::vector<WordLiteralInputSection *> inputs;
617
618 template <class T> struct Hasher {
619 llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
620 };
621 // We're using unordered_map instead of DenseMap here because we need to
622 // support all possible integer values -- there are no suitable tombstone
623 // values for DenseMap.
624 std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
625 std::unordered_map<uint64_t, uint64_t> literal8Map;
626 std::unordered_map<uint32_t, uint64_t> literal4Map;
627};
628
629class ObjCImageInfoSection final : public SyntheticSection {
630public:
631 ObjCImageInfoSection();
632 bool isNeeded() const override { return !files.empty(); }
633 uint64_t getSize() const override { return 8; }
634 void addFile(const InputFile *file) {
635 assert(!file->objCImageInfo.empty());
636 files.push_back(x: file);
637 }
638 void finalizeContents();
639 void writeTo(uint8_t *buf) const override;
640
641private:
642 struct ImageInfo {
643 uint8_t swiftVersion = 0;
644 bool hasCategoryClassProperties = false;
645 } info;
646 static ImageInfo parseImageInfo(const InputFile *);
647 std::vector<const InputFile *> files; // files with image info
648};
649
650// This section stores 32-bit __TEXT segment offsets of initializer functions.
651//
652// The compiler stores pointers to initializers in __mod_init_func. These need
653// to be fixed up at load time, which takes time and dirties memory. By
654// synthesizing InitOffsetsSection from them, this data can live in the
655// read-only __TEXT segment instead. This section is used by default when
656// chained fixups are enabled.
657//
658// There is no similar counterpart to __mod_term_func, as that section is
659// deprecated, and static destructors are instead handled by registering them
660// via __cxa_atexit from an autogenerated initializer function (see D121736).
661class InitOffsetsSection final : public SyntheticSection {
662public:
663 InitOffsetsSection();
664 bool isNeeded() const override { return !sections.empty(); }
665 uint64_t getSize() const override;
666 void writeTo(uint8_t *buf) const override;
667 void setUp();
668
669 void addInput(ConcatInputSection *isec) { sections.push_back(x: isec); }
670 const std::vector<ConcatInputSection *> &inputs() const { return sections; }
671
672private:
673 std::vector<ConcatInputSection *> sections;
674};
675
676// Chained fixups are a replacement for classic dyld opcodes. In this format,
677// most of the metadata necessary for binding symbols and rebasing addresses is
678// stored directly in the memory location that will have the fixup applied.
679//
680// The fixups form singly linked lists; each one covering a single page in
681// memory. The __LINKEDIT,__chainfixups section stores the page offset of the
682// first fixup of each page; the rest can be found by walking the chain using
683// the offset that is embedded in each entry.
684//
685// This setup allows pages to be relocated lazily at page-in time and without
686// being dirtied. The kernel can discard and load them again as needed. This
687// technique, called page-in linking, was introduced in macOS 13.
688//
689// The benefits of this format are:
690// - smaller __LINKEDIT segment, as most of the fixup information is stored in
691// the data segment
692// - faster startup, since not all relocations need to be done upfront
693// - slightly lower memory usage, as fewer pages are dirtied
694//
695// Userspace x86_64 and arm64 binaries have two types of fixup entries:
696// - Rebase entries contain an absolute address, to which the object's load
697// address will be added to get the final value. This is used for loading
698// the address of a symbol defined in the same binary.
699// - Binding entries are mostly used for symbols imported from other dylibs,
700// but for weakly bound and interposable symbols as well. They are looked up
701// by a (symbol name, library) pair stored in __chainfixups. This import
702// entry also encodes whether the import is weak (i.e. if the symbol is
703// missing, it should be set to null instead of producing a load error).
704// The fixup encodes an ordinal associated with the import, and an optional
705// addend.
706//
707// The entries are tightly packed 64-bit bitfields. One of the bits specifies
708// which kind of fixup to interpret them as.
709//
710// LLD generates the fixup data in 5 stages:
711// 1. While scanning relocations, we make a note of each location that needs
712// a fixup by calling addRebase() or addBinding(). During this, we assign
713// a unique ordinal for each (symbol name, library, addend) import tuple.
714// 2. After addresses have been assigned to all sections, and thus the memory
715// layout of the linked image is final; finalizeContents() is called. Here,
716// the page offsets of the chain start entries are calculated.
717// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the
718// imports table to the output file.
719// 4. Each section's fixup entries are encoded and written to disk in
720// ConcatInputSection::writeTo(), but without writing the offsets that form
721// the chain.
722// 5. Finally, each page's (which might correspond to multiple sections)
723// fixups are linked together in Writer::buildFixupChains().
724class ChainedFixupsSection final : public LinkEditSection {
725public:
726 ChainedFixupsSection();
727 void finalizeContents() override;
728 uint64_t getRawSize() const override { return size; }
729 bool isNeeded() const override;
730 void writeTo(uint8_t *buf) const override;
731
732 void addRebase(const InputSection *isec, uint64_t offset) {
733 locations.emplace_back(args&: isec, args&: offset);
734 }
735 void addBinding(const Symbol *dysym, const InputSection *isec,
736 uint64_t offset, int64_t addend = 0);
737
738 void setHasNonWeakDefinition() { hasNonWeakDef = true; }
739
740 // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
741 std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
742 int64_t addend) const;
743
744 const std::vector<Location> &getLocations() const { return locations; }
745
746 bool hasWeakBinding() const { return hasWeakBind; }
747 bool hasNonWeakDefinition() const { return hasNonWeakDef; }
748
749private:
750 // Location::offset initially stores the offset within an InputSection, but
751 // contains output segment offsets after finalizeContents().
752 std::vector<Location> locations;
753 // (target symbol, addend) => import ordinal
754 llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
755
756 struct SegmentInfo {
757 SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
758
759 const OutputSegment *oseg;
760 // (page index, fixup starts offset)
761 llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
762
763 size_t getSize() const;
764 size_t writeTo(uint8_t *buf) const;
765 };
766 llvm::SmallVector<SegmentInfo, 4> fixupSegments;
767
768 size_t symtabSize = 0;
769 size_t size = 0;
770
771 bool needsAddend = false;
772 bool needsLargeAddend = false;
773 bool hasWeakBind = false;
774 bool hasNonWeakDef = false;
775 llvm::MachO::ChainedImportFormat importFormat;
776};
777
778void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
779void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
780
781struct InStruct {
782 const uint8_t *bufferStart = nullptr;
783 MachHeaderSection *header = nullptr;
784 CStringSection *cStringSection = nullptr;
785 DeduplicatedCStringSection *objcMethnameSection = nullptr;
786 WordLiteralSection *wordLiteralSection = nullptr;
787 RebaseSection *rebase = nullptr;
788 BindingSection *binding = nullptr;
789 WeakBindingSection *weakBinding = nullptr;
790 LazyBindingSection *lazyBinding = nullptr;
791 ExportSection *exports = nullptr;
792 GotSection *got = nullptr;
793 TlvPointerSection *tlvPointers = nullptr;
794 LazyPointerSection *lazyPointers = nullptr;
795 StubsSection *stubs = nullptr;
796 StubHelperSection *stubHelper = nullptr;
797 ObjCStubsSection *objcStubs = nullptr;
798 UnwindInfoSection *unwindInfo = nullptr;
799 ObjCImageInfoSection *objCImageInfo = nullptr;
800 ConcatInputSection *imageLoaderCache = nullptr;
801 InitOffsetsSection *initOffsets = nullptr;
802 ChainedFixupsSection *chainedFixups = nullptr;
803};
804
805extern InStruct in;
806extern std::vector<SyntheticSection *> syntheticSections;
807
808void createSyntheticSymbols();
809
810} // namespace lld::macho
811
812#endif
813

source code of lld/MachO/SyntheticSections.h