1//===- SyntheticSections.h -------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10#define LLD_MACHO_SYNTHETIC_SECTIONS_H
11
12#include "Config.h"
13#include "ExportTrie.h"
14#include "InputSection.h"
15#include "OutputSection.h"
16#include "OutputSegment.h"
17#include "Target.h"
18#include "Writer.h"
19
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/ADT/Hashing.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/BinaryFormat/MachO.h"
24#include "llvm/Support/MathExtras.h"
25#include "llvm/Support/raw_ostream.h"
26
27#include <unordered_map>
28
29namespace llvm {
30class DWARFUnit;
31} // namespace llvm
32
33namespace lld::macho {
34
35class Defined;
36class DylibSymbol;
37class LoadCommand;
38class ObjFile;
39class UnwindInfoSection;
40
41class SyntheticSection : public OutputSection {
42public:
43 SyntheticSection(const char *segname, const char *name);
44 virtual ~SyntheticSection() = default;
45
46 static bool classof(const OutputSection *sec) {
47 return sec->kind() == SyntheticKind;
48 }
49
50 StringRef segname;
51 // This fake InputSection makes it easier for us to write code that applies
52 // generically to both user inputs and synthetics.
53 InputSection *isec;
54};
55
56// All sections in __LINKEDIT should inherit from this.
57class LinkEditSection : public SyntheticSection {
58public:
59 LinkEditSection(const char *segname, const char *name)
60 : SyntheticSection(segname, name) {
61 align = target->wordSize;
62 }
63
64 // Implementations of this method can assume that the regular (non-__LINKEDIT)
65 // sections already have their addresses assigned.
66 virtual void finalizeContents() {}
67
68 // Sections in __LINKEDIT are special: their offsets are recorded in the
69 // load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
70 // headers.
71 bool isHidden() const final { return true; }
72
73 virtual uint64_t getRawSize() const = 0;
74
75 // codesign (or more specifically libstuff) checks that each section in
76 // __LINKEDIT ends where the next one starts -- no gaps are permitted. We
77 // therefore align every section's start and end points to WordSize.
78 //
79 // NOTE: This assumes that the extra bytes required for alignment can be
80 // zero-valued bytes.
81 uint64_t getSize() const final { return llvm::alignTo(Value: getRawSize(), Align: align); }
82};
83
84// The header of the Mach-O file, which must have a file offset of zero.
85class MachHeaderSection final : public SyntheticSection {
86public:
87 MachHeaderSection();
88 bool isHidden() const override { return true; }
89 uint64_t getSize() const override;
90 void writeTo(uint8_t *buf) const override;
91
92 void addLoadCommand(LoadCommand *);
93
94protected:
95 std::vector<LoadCommand *> loadCommands;
96 uint32_t sizeOfCmds = 0;
97};
98
99// A hidden section that exists solely for the purpose of creating the
100// __PAGEZERO segment, which is used to catch null pointer dereferences.
101class PageZeroSection final : public SyntheticSection {
102public:
103 PageZeroSection();
104 bool isHidden() const override { return true; }
105 bool isNeeded() const override { return target->pageZeroSize != 0; }
106 uint64_t getSize() const override { return target->pageZeroSize; }
107 uint64_t getFileSize() const override { return 0; }
108 void writeTo(uint8_t *buf) const override {}
109};
110
111// This is the base class for the GOT and TLVPointer sections, which are nearly
112// functionally identical -- they will both be populated by dyld with addresses
113// to non-lazily-loaded dylib symbols. The main difference is that the
114// TLVPointerSection stores references to thread-local variables.
115class NonLazyPointerSectionBase : public SyntheticSection {
116public:
117 NonLazyPointerSectionBase(const char *segname, const char *name);
118 const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
119 bool isNeeded() const override { return !entries.empty(); }
120 uint64_t getSize() const override {
121 return entries.size() * target->wordSize;
122 }
123 void writeTo(uint8_t *buf) const override;
124 void addEntry(Symbol *sym);
125 uint64_t getVA(uint32_t gotIndex) const {
126 return addr + gotIndex * target->wordSize;
127 }
128
129private:
130 llvm::SetVector<const Symbol *> entries;
131};
132
133class GotSection final : public NonLazyPointerSectionBase {
134public:
135 GotSection();
136};
137
138class TlvPointerSection final : public NonLazyPointerSectionBase {
139public:
140 TlvPointerSection();
141};
142
143struct Location {
144 const InputSection *isec;
145 uint64_t offset;
146
147 Location(const InputSection *isec, uint64_t offset)
148 : isec(isec), offset(offset) {}
149 uint64_t getVA() const { return isec->getVA(off: offset); }
150};
151
152// Stores rebase opcodes, which tell dyld where absolute addresses have been
153// encoded in the binary. If the binary is not loaded at its preferred address,
154// dyld has to rebase these addresses by adding an offset to them.
155class RebaseSection final : public LinkEditSection {
156public:
157 RebaseSection();
158 void finalizeContents() override;
159 uint64_t getRawSize() const override { return contents.size(); }
160 bool isNeeded() const override { return !locations.empty(); }
161 void writeTo(uint8_t *buf) const override;
162
163 void addEntry(const InputSection *isec, uint64_t offset) {
164 if (config->isPic)
165 locations.emplace_back(args&: isec, args&: offset);
166 }
167
168private:
169 std::vector<Location> locations;
170 SmallVector<char, 128> contents;
171};
172
173struct BindingEntry {
174 int64_t addend;
175 Location target;
176 BindingEntry(int64_t addend, Location target)
177 : addend(addend), target(target) {}
178};
179
180template <class Sym>
181using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
182
183// Stores bind opcodes for telling dyld which symbols to load non-lazily.
184class BindingSection final : public LinkEditSection {
185public:
186 BindingSection();
187 void finalizeContents() override;
188 uint64_t getRawSize() const override { return contents.size(); }
189 bool isNeeded() const override { return !bindingsMap.empty(); }
190 void writeTo(uint8_t *buf) const override;
191
192 void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset,
193 int64_t addend = 0) {
194 bindingsMap[dysym].emplace_back(args&: addend, args: Location(isec, offset));
195 }
196
197private:
198 BindingsMap<const Symbol *> bindingsMap;
199 SmallVector<char, 128> contents;
200};
201
202// Stores bind opcodes for telling dyld which weak symbols need coalescing.
203// There are two types of entries in this section:
204//
205// 1) Non-weak definitions: This is a symbol definition that weak symbols in
206// other dylibs should coalesce to.
207//
208// 2) Weak bindings: These tell dyld that a given symbol reference should
209// coalesce to a non-weak definition if one is found. Note that unlike the
210// entries in the BindingSection, the bindings here only refer to these
211// symbols by name, but do not specify which dylib to load them from.
212class WeakBindingSection final : public LinkEditSection {
213public:
214 WeakBindingSection();
215 void finalizeContents() override;
216 uint64_t getRawSize() const override { return contents.size(); }
217 bool isNeeded() const override {
218 return !bindingsMap.empty() || !definitions.empty();
219 }
220
221 void writeTo(uint8_t *buf) const override;
222
223 void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
224 int64_t addend = 0) {
225 bindingsMap[symbol].emplace_back(args&: addend, args: Location(isec, offset));
226 }
227
228 bool hasEntry() const { return !bindingsMap.empty(); }
229
230 void addNonWeakDefinition(const Defined *defined) {
231 definitions.emplace_back(args&: defined);
232 }
233
234 bool hasNonWeakDefinition() const { return !definitions.empty(); }
235
236private:
237 BindingsMap<const Symbol *> bindingsMap;
238 std::vector<const Defined *> definitions;
239 SmallVector<char, 128> contents;
240};
241
242// The following sections implement lazy symbol binding -- very similar to the
243// PLT mechanism in ELF.
244//
245// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
246// and StubHelperSection. Calls to functions in dylibs will end up calling into
247// StubsSection, which contains indirect jumps to addresses stored in the
248// LazyPointerSection (the counterpart to ELF's .plt.got).
249//
250// We will first describe how non-weak symbols are handled.
251//
252// At program start, the LazyPointerSection contains addresses that point into
253// one of the entry points in the middle of the StubHelperSection. The code in
254// StubHelperSection will push on the stack an offset into the
255// LazyBindingSection. The push is followed by a jump to the beginning of the
256// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
257// dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
258// the GOT.
259//
260// The stub binder will look up the bind opcodes in the LazyBindingSection at
261// the given offset. The bind opcodes will tell the binder to update the
262// address in the LazyPointerSection to point to the symbol, so that subsequent
263// calls don't have to redo the symbol resolution. The binder will then jump to
264// the resolved symbol.
265//
266// With weak symbols, the situation is slightly different. Since there is no
267// "weak lazy" lookup, function calls to weak symbols are always non-lazily
268// bound. We emit both regular non-lazy bindings as well as weak bindings, in
269// order that the weak bindings may overwrite the non-lazy bindings if an
270// appropriate symbol is found at runtime. However, the bound addresses will
271// still be written (non-lazily) into the LazyPointerSection.
272//
273// Symbols are always bound eagerly when chained fixups are used. In that case,
274// StubsSection contains indirect jumps to addresses stored in the GotSection.
275// The GOT directly contains the fixup entries, which will be replaced by the
276// address of the target symbols on load. LazyPointerSection and
277// StubHelperSection are not used.
278
279class StubsSection final : public SyntheticSection {
280public:
281 StubsSection();
282 uint64_t getSize() const override;
283 bool isNeeded() const override { return !entries.empty(); }
284 void finalize() override;
285 void writeTo(uint8_t *buf) const override;
286 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
287 // Creates a stub for the symbol and the corresponding entry in the
288 // LazyPointerSection.
289 void addEntry(Symbol *);
290 uint64_t getVA(uint32_t stubsIndex) const {
291 assert(isFinal || target->usesThunks());
292 // ConcatOutputSection::finalize() can seek the address of a
293 // stub before its address is assigned. Before __stubs is
294 // finalized, return a contrived out-of-range address.
295 return isFinal ? addr + stubsIndex * target->stubSize
296 : TargetInfo::outOfRangeVA;
297 }
298
299 bool isFinal = false; // is address assigned?
300
301private:
302 llvm::SetVector<Symbol *> entries;
303};
304
305class StubHelperSection final : public SyntheticSection {
306public:
307 StubHelperSection();
308 uint64_t getSize() const override;
309 bool isNeeded() const override;
310 void writeTo(uint8_t *buf) const override;
311
312 void setUp();
313
314 DylibSymbol *stubBinder = nullptr;
315 Defined *dyldPrivate = nullptr;
316};
317
318class ObjCSelRefsHelper {
319public:
320 static void initialize();
321 static void cleanup();
322
323 static ConcatInputSection *getSelRef(StringRef methname);
324 static ConcatInputSection *makeSelRef(StringRef methname);
325
326private:
327 static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
328 methnameToSelref;
329};
330
331// Objective-C stubs are hoisted objc_msgSend calls per selector called in the
332// program. Apple Clang produces undefined symbols to each stub, such as
333// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
334// load the particular selector 'foo' from __objc_selrefs, setting it to the
335// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
336// actual stub contents are mirrored from ld64.
337class ObjCStubsSection final : public SyntheticSection {
338public:
339 ObjCStubsSection();
340 void addEntry(Symbol *sym);
341 uint64_t getSize() const override;
342 bool isNeeded() const override { return !symbols.empty(); }
343 void finalize() override { isec->isFinal = true; }
344 void writeTo(uint8_t *buf) const override;
345 void setUp();
346
347 static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
348 static bool isObjCStubSymbol(Symbol *sym);
349 static StringRef getMethname(Symbol *sym);
350
351private:
352 std::vector<Defined *> symbols;
353 Symbol *objcMsgSend = nullptr;
354};
355
356// Note that this section may also be targeted by non-lazy bindings. In
357// particular, this happens when branch relocations target weak symbols.
358class LazyPointerSection final : public SyntheticSection {
359public:
360 LazyPointerSection();
361 uint64_t getSize() const override;
362 bool isNeeded() const override;
363 void writeTo(uint8_t *buf) const override;
364 uint64_t getVA(uint32_t index) const {
365 return addr + (index << target->p2WordSize);
366 }
367};
368
369class LazyBindingSection final : public LinkEditSection {
370public:
371 LazyBindingSection();
372 void finalizeContents() override;
373 uint64_t getRawSize() const override { return contents.size(); }
374 bool isNeeded() const override { return !entries.empty(); }
375 void writeTo(uint8_t *buf) const override;
376 // Note that every entry here will by referenced by a corresponding entry in
377 // the StubHelperSection.
378 void addEntry(Symbol *dysym);
379 const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
380
381private:
382 uint32_t encode(const Symbol &);
383
384 llvm::SetVector<Symbol *> entries;
385 SmallVector<char, 128> contents;
386 llvm::raw_svector_ostream os{contents};
387};
388
389// Stores a trie that describes the set of exported symbols.
390class ExportSection final : public LinkEditSection {
391public:
392 ExportSection();
393 void finalizeContents() override;
394 uint64_t getRawSize() const override { return size; }
395 bool isNeeded() const override { return size; }
396 void writeTo(uint8_t *buf) const override;
397
398 bool hasWeakSymbol = false;
399
400private:
401 TrieBuilder trieBuilder;
402 size_t size = 0;
403};
404
405// Stores 'data in code' entries that describe the locations of data regions
406// inside code sections. This is used by llvm-objdump to distinguish jump tables
407// and stop them from being disassembled as instructions.
408class DataInCodeSection final : public LinkEditSection {
409public:
410 DataInCodeSection();
411 void finalizeContents() override;
412 uint64_t getRawSize() const override {
413 return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
414 }
415 void writeTo(uint8_t *buf) const override;
416
417private:
418 std::vector<llvm::MachO::data_in_code_entry> entries;
419};
420
421// Stores ULEB128 delta encoded addresses of functions.
422class FunctionStartsSection final : public LinkEditSection {
423public:
424 FunctionStartsSection();
425 void finalizeContents() override;
426 uint64_t getRawSize() const override { return contents.size(); }
427 void writeTo(uint8_t *buf) const override;
428
429private:
430 SmallVector<char, 128> contents;
431};
432
433// Stores the strings referenced by the symbol table.
434class StringTableSection final : public LinkEditSection {
435public:
436 StringTableSection();
437 // Returns the start offset of the added string.
438 uint32_t addString(StringRef);
439 uint64_t getRawSize() const override { return size; }
440 void writeTo(uint8_t *buf) const override;
441
442 static constexpr size_t emptyStringIndex = 1;
443
444private:
445 // ld64 emits string tables which start with a space and a zero byte. We
446 // match its behavior here since some tools depend on it.
447 // Consequently, the empty string will be at index 1, not zero.
448 std::vector<StringRef> strings{" "};
449 size_t size = 2;
450};
451
452struct SymtabEntry {
453 Symbol *sym;
454 size_t strx;
455};
456
457struct StabsEntry {
458 uint8_t type = 0;
459 uint32_t strx = StringTableSection::emptyStringIndex;
460 uint8_t sect = 0;
461 uint16_t desc = 0;
462 uint64_t value = 0;
463
464 StabsEntry() = default;
465 explicit StabsEntry(uint8_t type) : type(type) {}
466};
467
468// Symbols of the same type must be laid out contiguously: we choose to emit
469// all local symbols first, then external symbols, and finally undefined
470// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
471// range (start index and total number) of those symbols in the symbol table.
472class SymtabSection : public LinkEditSection {
473public:
474 void finalizeContents() override;
475 uint32_t getNumSymbols() const;
476 uint32_t getNumLocalSymbols() const {
477 return stabs.size() + localSymbols.size();
478 }
479 uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
480 uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
481
482private:
483 void emitBeginSourceStab(StringRef);
484 void emitEndSourceStab();
485 void emitObjectFileStab(ObjFile *);
486 void emitEndFunStab(Defined *);
487 void emitStabs();
488
489protected:
490 SymtabSection(StringTableSection &);
491
492 StringTableSection &stringTableSection;
493 // STABS symbols are always local symbols, but we represent them with special
494 // entries because they may use fields like n_sect and n_desc differently.
495 std::vector<StabsEntry> stabs;
496 std::vector<SymtabEntry> localSymbols;
497 std::vector<SymtabEntry> externalSymbols;
498 std::vector<SymtabEntry> undefinedSymbols;
499};
500
501template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
502
503// The indirect symbol table is a list of 32-bit integers that serve as indices
504// into the (actual) symbol table. The indirect symbol table is a
505// concatenation of several sub-arrays of indices, each sub-array belonging to
506// a separate section. The starting offset of each sub-array is stored in the
507// reserved1 header field of the respective section.
508//
509// These sub-arrays provide symbol information for sections that store
510// contiguous sequences of symbol references. These references can be pointers
511// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
512// function stubs).
513class IndirectSymtabSection final : public LinkEditSection {
514public:
515 IndirectSymtabSection();
516 void finalizeContents() override;
517 uint32_t getNumSymbols() const;
518 uint64_t getRawSize() const override {
519 return getNumSymbols() * sizeof(uint32_t);
520 }
521 bool isNeeded() const override;
522 void writeTo(uint8_t *buf) const override;
523};
524
525// The code signature comes at the very end of the linked output file.
526class CodeSignatureSection final : public LinkEditSection {
527public:
528 // NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
529 // and any changes here, should be repeated there.
530 static constexpr uint8_t blockSizeShift = 12;
531 static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
532 static constexpr size_t hashSize = 256 / 8;
533 static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
534 Value: sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
535 static constexpr uint32_t fixedHeadersSize =
536 blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
537
538 uint32_t fileNamePad = 0;
539 uint32_t allHeadersSize = 0;
540 StringRef fileName;
541
542 CodeSignatureSection();
543 uint64_t getRawSize() const override;
544 bool isNeeded() const override { return true; }
545 void writeTo(uint8_t *buf) const override;
546 uint32_t getBlockCount() const;
547 void writeHashes(uint8_t *buf) const;
548};
549
550class CStringSection : public SyntheticSection {
551public:
552 CStringSection(const char *name);
553 void addInput(CStringInputSection *);
554 uint64_t getSize() const override { return size; }
555 virtual void finalizeContents();
556 bool isNeeded() const override { return !inputs.empty(); }
557 void writeTo(uint8_t *buf) const override;
558
559 std::vector<CStringInputSection *> inputs;
560
561private:
562 uint64_t size;
563};
564
565class DeduplicatedCStringSection final : public CStringSection {
566public:
567 DeduplicatedCStringSection(const char *name) : CStringSection(name){};
568 uint64_t getSize() const override { return size; }
569 void finalizeContents() override;
570 void writeTo(uint8_t *buf) const override;
571
572 struct StringOffset {
573 uint8_t trailingZeros;
574 uint64_t outSecOff = UINT64_MAX;
575
576 explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
577 };
578
579 StringOffset getStringOffset(StringRef str) const;
580
581private:
582 llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
583 size_t size = 0;
584};
585
586/*
587 * This section contains deduplicated literal values. The 16-byte values are
588 * laid out first, followed by the 8- and then the 4-byte ones.
589 */
590class WordLiteralSection final : public SyntheticSection {
591public:
592 using UInt128 = std::pair<uint64_t, uint64_t>;
593 // I don't think the standard guarantees the size of a pair, so let's make
594 // sure it's exact -- that way we can construct it via `mmap`.
595 static_assert(sizeof(UInt128) == 16);
596
597 WordLiteralSection();
598 void addInput(WordLiteralInputSection *);
599 void finalizeContents();
600 void writeTo(uint8_t *buf) const override;
601
602 uint64_t getSize() const override {
603 return literal16Map.size() * 16 + literal8Map.size() * 8 +
604 literal4Map.size() * 4;
605 }
606
607 bool isNeeded() const override {
608 return !literal16Map.empty() || !literal4Map.empty() ||
609 !literal8Map.empty();
610 }
611
612 uint64_t getLiteral16Offset(uintptr_t buf) const {
613 return literal16Map.at(k: *reinterpret_cast<const UInt128 *>(buf)) * 16;
614 }
615
616 uint64_t getLiteral8Offset(uintptr_t buf) const {
617 return literal16Map.size() * 16 +
618 literal8Map.at(k: *reinterpret_cast<const uint64_t *>(buf)) * 8;
619 }
620
621 uint64_t getLiteral4Offset(uintptr_t buf) const {
622 return literal16Map.size() * 16 + literal8Map.size() * 8 +
623 literal4Map.at(k: *reinterpret_cast<const uint32_t *>(buf)) * 4;
624 }
625
626private:
627 std::vector<WordLiteralInputSection *> inputs;
628
629 template <class T> struct Hasher {
630 llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
631 };
632 // We're using unordered_map instead of DenseMap here because we need to
633 // support all possible integer values -- there are no suitable tombstone
634 // values for DenseMap.
635 std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
636 std::unordered_map<uint64_t, uint64_t> literal8Map;
637 std::unordered_map<uint32_t, uint64_t> literal4Map;
638};
639
640class ObjCImageInfoSection final : public SyntheticSection {
641public:
642 ObjCImageInfoSection();
643 bool isNeeded() const override { return !files.empty(); }
644 uint64_t getSize() const override { return 8; }
645 void addFile(const InputFile *file) {
646 assert(!file->objCImageInfo.empty());
647 files.push_back(x: file);
648 }
649 void finalizeContents();
650 void writeTo(uint8_t *buf) const override;
651
652private:
653 struct ImageInfo {
654 uint8_t swiftVersion = 0;
655 bool hasCategoryClassProperties = false;
656 } info;
657 static ImageInfo parseImageInfo(const InputFile *);
658 std::vector<const InputFile *> files; // files with image info
659};
660
661// This section stores 32-bit __TEXT segment offsets of initializer functions.
662//
663// The compiler stores pointers to initializers in __mod_init_func. These need
664// to be fixed up at load time, which takes time and dirties memory. By
665// synthesizing InitOffsetsSection from them, this data can live in the
666// read-only __TEXT segment instead. This section is used by default when
667// chained fixups are enabled.
668//
669// There is no similar counterpart to __mod_term_func, as that section is
670// deprecated, and static destructors are instead handled by registering them
671// via __cxa_atexit from an autogenerated initializer function (see D121736).
672class InitOffsetsSection final : public SyntheticSection {
673public:
674 InitOffsetsSection();
675 bool isNeeded() const override { return !sections.empty(); }
676 uint64_t getSize() const override;
677 void writeTo(uint8_t *buf) const override;
678 void setUp();
679
680 void addInput(ConcatInputSection *isec) { sections.push_back(x: isec); }
681 const std::vector<ConcatInputSection *> &inputs() const { return sections; }
682
683private:
684 std::vector<ConcatInputSection *> sections;
685};
686
687// This SyntheticSection is for the __objc_methlist section, which contains
688// relative method lists if the -objc_relative_method_lists option is enabled.
689class ObjCMethListSection final : public SyntheticSection {
690public:
691 ObjCMethListSection();
692
693 static bool isMethodList(const ConcatInputSection *isec);
694 void addInput(ConcatInputSection *isec) { inputs.push_back(x: isec); }
695 std::vector<ConcatInputSection *> getInputs() { return inputs; }
696
697 void setUp();
698 void finalize() override;
699 bool isNeeded() const override { return !inputs.empty(); }
700 uint64_t getSize() const override { return sectionSize; }
701 void writeTo(uint8_t *bufStart) const override;
702
703private:
704 void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags,
705 uint32_t &structCount) const;
706 void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
707 uint32_t structCount) const;
708 uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const;
709 void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf,
710 uint32_t &inSecOff, uint32_t &outSecOff,
711 bool useSelRef) const;
712 uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
713 uint8_t *buf) const;
714
715 static constexpr uint32_t methodListHeaderSize =
716 /*structSizeAndFlags*/ sizeof(uint32_t) +
717 /*structCount*/ sizeof(uint32_t);
718 // Relative method lists are supported only for 3-pointer method lists
719 static constexpr uint32_t pointersPerStruct = 3;
720 // The runtime identifies relative method lists via this magic value
721 static constexpr uint32_t relMethodHeaderFlag = 0x80000000;
722 // In the method list header, the first 2 bytes are the size of struct
723 static constexpr uint32_t structSizeMask = 0x0000FFFF;
724 // In the method list header, the last 2 bytes are the flags for the struct
725 static constexpr uint32_t structFlagsMask = 0xFFFF0000;
726 // Relative method lists have 4 byte alignment as all data in the InputSection
727 // is 4 byte
728 static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t);
729
730 // The output size of the __objc_methlist section, computed during finalize()
731 uint32_t sectionSize = 0;
732 std::vector<ConcatInputSection *> inputs;
733};
734
735// Chained fixups are a replacement for classic dyld opcodes. In this format,
736// most of the metadata necessary for binding symbols and rebasing addresses is
737// stored directly in the memory location that will have the fixup applied.
738//
739// The fixups form singly linked lists; each one covering a single page in
740// memory. The __LINKEDIT,__chainfixups section stores the page offset of the
741// first fixup of each page; the rest can be found by walking the chain using
742// the offset that is embedded in each entry.
743//
744// This setup allows pages to be relocated lazily at page-in time and without
745// being dirtied. The kernel can discard and load them again as needed. This
746// technique, called page-in linking, was introduced in macOS 13.
747//
748// The benefits of this format are:
749// - smaller __LINKEDIT segment, as most of the fixup information is stored in
750// the data segment
751// - faster startup, since not all relocations need to be done upfront
752// - slightly lower memory usage, as fewer pages are dirtied
753//
754// Userspace x86_64 and arm64 binaries have two types of fixup entries:
755// - Rebase entries contain an absolute address, to which the object's load
756// address will be added to get the final value. This is used for loading
757// the address of a symbol defined in the same binary.
758// - Binding entries are mostly used for symbols imported from other dylibs,
759// but for weakly bound and interposable symbols as well. They are looked up
760// by a (symbol name, library) pair stored in __chainfixups. This import
761// entry also encodes whether the import is weak (i.e. if the symbol is
762// missing, it should be set to null instead of producing a load error).
763// The fixup encodes an ordinal associated with the import, and an optional
764// addend.
765//
766// The entries are tightly packed 64-bit bitfields. One of the bits specifies
767// which kind of fixup to interpret them as.
768//
769// LLD generates the fixup data in 5 stages:
770// 1. While scanning relocations, we make a note of each location that needs
771// a fixup by calling addRebase() or addBinding(). During this, we assign
772// a unique ordinal for each (symbol name, library, addend) import tuple.
773// 2. After addresses have been assigned to all sections, and thus the memory
774// layout of the linked image is final; finalizeContents() is called. Here,
775// the page offsets of the chain start entries are calculated.
776// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the
777// imports table to the output file.
778// 4. Each section's fixup entries are encoded and written to disk in
779// ConcatInputSection::writeTo(), but without writing the offsets that form
780// the chain.
781// 5. Finally, each page's (which might correspond to multiple sections)
782// fixups are linked together in Writer::buildFixupChains().
783class ChainedFixupsSection final : public LinkEditSection {
784public:
785 ChainedFixupsSection();
786 void finalizeContents() override;
787 uint64_t getRawSize() const override { return size; }
788 bool isNeeded() const override;
789 void writeTo(uint8_t *buf) const override;
790
791 void addRebase(const InputSection *isec, uint64_t offset) {
792 locations.emplace_back(args&: isec, args&: offset);
793 }
794 void addBinding(const Symbol *dysym, const InputSection *isec,
795 uint64_t offset, int64_t addend = 0);
796
797 void setHasNonWeakDefinition() { hasNonWeakDef = true; }
798
799 // Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
800 std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
801 int64_t addend) const;
802
803 const std::vector<Location> &getLocations() const { return locations; }
804
805 bool hasWeakBinding() const { return hasWeakBind; }
806 bool hasNonWeakDefinition() const { return hasNonWeakDef; }
807
808private:
809 // Location::offset initially stores the offset within an InputSection, but
810 // contains output segment offsets after finalizeContents().
811 std::vector<Location> locations;
812 // (target symbol, addend) => import ordinal
813 llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
814
815 struct SegmentInfo {
816 SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
817
818 const OutputSegment *oseg;
819 // (page index, fixup starts offset)
820 llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
821
822 size_t getSize() const;
823 size_t writeTo(uint8_t *buf) const;
824 };
825 llvm::SmallVector<SegmentInfo, 4> fixupSegments;
826
827 size_t symtabSize = 0;
828 size_t size = 0;
829
830 bool needsAddend = false;
831 bool needsLargeAddend = false;
832 bool hasWeakBind = false;
833 bool hasNonWeakDef = false;
834 llvm::MachO::ChainedImportFormat importFormat;
835};
836
837void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
838void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
839
840struct InStruct {
841 const uint8_t *bufferStart = nullptr;
842 MachHeaderSection *header = nullptr;
843 CStringSection *cStringSection = nullptr;
844 DeduplicatedCStringSection *objcMethnameSection = nullptr;
845 WordLiteralSection *wordLiteralSection = nullptr;
846 RebaseSection *rebase = nullptr;
847 BindingSection *binding = nullptr;
848 WeakBindingSection *weakBinding = nullptr;
849 LazyBindingSection *lazyBinding = nullptr;
850 ExportSection *exports = nullptr;
851 GotSection *got = nullptr;
852 TlvPointerSection *tlvPointers = nullptr;
853 LazyPointerSection *lazyPointers = nullptr;
854 StubsSection *stubs = nullptr;
855 StubHelperSection *stubHelper = nullptr;
856 ObjCStubsSection *objcStubs = nullptr;
857 UnwindInfoSection *unwindInfo = nullptr;
858 ObjCImageInfoSection *objCImageInfo = nullptr;
859 ConcatInputSection *imageLoaderCache = nullptr;
860 InitOffsetsSection *initOffsets = nullptr;
861 ObjCMethListSection *objcMethList = nullptr;
862 ChainedFixupsSection *chainedFixups = nullptr;
863};
864
865extern InStruct in;
866extern std::vector<SyntheticSection *> syntheticSections;
867
868void createSyntheticSymbols();
869
870} // namespace lld::macho
871
872#endif
873

source code of lld/MachO/SyntheticSections.h