SyntheticSections.h source code [lld/MachO/SyntheticSections.h]

1	//===- SyntheticSections.h -------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10	#define LLD_MACHO_SYNTHETIC_SECTIONS_H
11
12	#include "Config.h"
13	#include "ExportTrie.h"
14	#include "InputSection.h"
15	#include "OutputSection.h"
16	#include "OutputSegment.h"
17	#include "Target.h"
18	#include "Writer.h"
19
20	#include "llvm/ADT/DenseMap.h"
21	#include "llvm/ADT/Hashing.h"
22	#include "llvm/ADT/MapVector.h"
23	#include "llvm/ADT/SetVector.h"
24	#include "llvm/BinaryFormat/MachO.h"
25	#include "llvm/Support/MathExtras.h"
26	#include "llvm/Support/raw_ostream.h"
27
28	#include <unordered_map>
29
30	namespace llvm {
31	class DWARFUnit;
32	} // namespace llvm
33
34	namespace lld::macho {
35
36	class Defined;
37	class DylibSymbol;
38	class LoadCommand;
39	class ObjFile;
40	class UnwindInfoSection;
41
42	class SyntheticSection : public OutputSection {
43	public:
44	SyntheticSection(const char segname, const* char *name);
45	virtual ~SyntheticSection() = default;
46
47	static bool classof(const OutputSection *sec) {
48	return sec->kind() == SyntheticKind;
49	}
50
51	StringRef segname;
52	// This fake InputSection makes it easier for us to write code that applies
53	// generically to both user inputs and synthetics.
54	InputSection *isec;
55	};
56
57	// All sections in __LINKEDIT should inherit from this.
58	class LinkEditSection : public SyntheticSection {
59	public:
60	LinkEditSection(const char segname, const* char *name)
61	: SyntheticSection (segname, name) {
62	align = target->wordSize;
63	}
64
65	// Implementations of this method can assume that the regular (non-__LINKEDIT)
66	// sections already have their addresses assigned.
67	virtual void finalizeContents() {}
68
69	// Sections in __LINKEDIT are special: their offsets are recorded in the
70	// load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
71	// headers.
72	bool isHidden() const final { return true; }
73
74	virtual uint64_t getRawSize() const = `0`;
75
76	// codesign (or more specifically libstuff) checks that each section in
77	// __LINKEDIT ends where the next one starts -- no gaps are permitted. We
78	// therefore align every section's start and end points to WordSize.
79	//
80	// NOTE: This assumes that the extra bytes required for alignment can be
81	// zero-valued bytes.
82	uint64_t getSize() const final { return llvm::alignTo(Value: getRawSize(), Align: align); }
83	};
84
85	// The header of the Mach-O file, which must have a file offset of zero.
86	class MachHeaderSection final : public SyntheticSection {
87	public:
88	MachHeaderSection();
89	bool isHidden() const override { return true; }
90	uint64_t getSize() const override;
91	void writeTo(uint8_t buf) const* override;
92
93	void addLoadCommand(LoadCommand *);
94
95	protected:
96	std::vector<LoadCommand *> loadCommands;
97	uint32_t sizeOfCmds = `0`;
98	};
99
100	// A hidden section that exists solely for the purpose of creating the
101	// __PAGEZERO segment, which is used to catch null pointer dereferences.
102	class PageZeroSection final : public SyntheticSection {
103	public:
104	PageZeroSection();
105	bool isHidden() const override { return true; }
106	bool isNeeded() const override { return target->pageZeroSize != `0`; }
107	uint64_t getSize() const override { return target->pageZeroSize; }
108	uint64_t getFileSize() const override { return `0`; }
109	void writeTo(uint8_t buf) const* override {}
110	};
111
112	// This is the base class for the GOT and TLVPointer sections, which are nearly
113	// functionally identical -- they will both be populated by dyld with addresses
114	// to non-lazily-loaded dylib symbols. The main difference is that the
115	// TLVPointerSection stores references to thread-local variables.
116	class NonLazyPointerSectionBase : public SyntheticSection {
117	public:
118	NonLazyPointerSectionBase(const char segname, const* char *name);
119	const llvm::SetVector<const Symbol > &getEntries() const* { return entries; }
120	bool isNeeded() const override { return !entries.empty(); }
121	uint64_t getSize() const override {
122	return entries.size() * target->wordSize;
123	}
124	void writeTo(uint8_t buf) const* override;
125	void addEntry(Symbol *sym);
126	uint64_t getVA(uint32_t gotIndex) const {
127	return addr + gotIndex * target->wordSize;
128	}
129
130	private:
131	llvm::SetVector<const Symbol *> entries;
132	};
133
134	class GotSection final : public NonLazyPointerSectionBase {
135	public:
136	GotSection();
137	};
138
139	class TlvPointerSection final : public NonLazyPointerSectionBase {
140	public:
141	TlvPointerSection();
142	};
143
144	struct Location {
145	const InputSection *isec;
146	uint64_t offset;
147
148	Location(const InputSection *isec, uint64_t offset)
149	: isec(isec), offset(offset) {}
150	uint64_t getVA() const { return isec->getVA(off: offset); }
151	};
152
153	// Stores rebase opcodes, which tell dyld where absolute addresses have been
154	// encoded in the binary. If the binary is not loaded at its preferred address,
155	// dyld has to rebase these addresses by adding an offset to them.
156	class RebaseSection final : public LinkEditSection {
157	public:
158	RebaseSection();
159	void finalizeContents() override;
160	uint64_t getRawSize() const override { return contents.size(); }
161	bool isNeeded() const override { return !locations.empty(); }
162	void writeTo(uint8_t buf) const* override;
163
164	void addEntry(const InputSection *isec, uint64_t offset) {
165	if (config ->isPic)
166	locations.emplace_back(args&: isec, args&: offset);
167	}
168
169	private:
170	std::vector<Location> locations;
171	SmallVector<char, `128`> contents;
172	};
173
174	struct BindingEntry {
175	int64_t addend;
176	Location target;
177	BindingEntry(int64_t addend, Location target)
178	: addend(addend), target (target) {}
179	};
180
181	template <class Sym>
182	using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
183
184	// Stores bind opcodes for telling dyld which symbols to load non-lazily.
185	class BindingSection final : public LinkEditSection {
186	public:
187	BindingSection();
188	void finalizeContents() override;
189	uint64_t getRawSize() const override { return contents.size(); }
190	bool isNeeded() const override { return !bindingsMap.empty(); }
191	void writeTo(uint8_t buf) const* override;
192
193	void addEntry(const Symbol dysym, const* InputSection *isec, uint64_t offset,
194	int64_t addend = `0`) {
195	bindingsMap [dysym].emplace_back(args&: addend, args: Location (isec, offset));
196	}
197
198	private:
199	BindingsMap<const Symbol *> bindingsMap;
200	SmallVector<char, `128`> contents;
201	};
202
203	// Stores bind opcodes for telling dyld which weak symbols need coalescing.
204	// There are two types of entries in this section:
205	//
206	// 1) Non-weak definitions: This is a symbol definition that weak symbols in
207	// other dylibs should coalesce to.
208	//
209	// 2) Weak bindings: These tell dyld that a given symbol reference should
210	// coalesce to a non-weak definition if one is found. Note that unlike the
211	// entries in the BindingSection, the bindings here only refer to these
212	// symbols by name, but do not specify which dylib to load them from.
213	class WeakBindingSection final : public LinkEditSection {
214	public:
215	WeakBindingSection();
216	void finalizeContents() override;
217	uint64_t getRawSize() const override { return contents.size(); }
218	bool isNeeded() const override {
219	return !bindingsMap.empty() \|\| !definitions.empty();
220	}
221
222	void writeTo(uint8_t buf) const* override;
223
224	void addEntry(const Symbol symbol, const* InputSection *isec, uint64_t offset,
225	int64_t addend = `0`) {
226	bindingsMap [symbol].emplace_back(args&: addend, args: Location (isec, offset));
227	}
228
229	bool hasEntry() const { return !bindingsMap.empty(); }
230
231	void addNonWeakDefinition(const Defined *defined) {
232	definitions.emplace_back(args&: defined);
233	}
234
235	bool hasNonWeakDefinition() const { return !definitions.empty(); }
236
237	private:
238	BindingsMap<const Symbol *> bindingsMap;
239	std::vector<const Defined *> definitions;
240	SmallVector<char, `128`> contents;
241	};
242
243	// The following sections implement lazy symbol binding -- very similar to the
244	// PLT mechanism in ELF.
245	//
246	// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
247	// and StubHelperSection. Calls to functions in dylibs will end up calling into
248	// StubsSection, which contains indirect jumps to addresses stored in the
249	// LazyPointerSection (the counterpart to ELF's .plt.got).
250	//
251	// We will first describe how non-weak symbols are handled.
252	//
253	// At program start, the LazyPointerSection contains addresses that point into
254	// one of the entry points in the middle of the StubHelperSection. The code in
255	// StubHelperSection will push on the stack an offset into the
256	// LazyBindingSection. The push is followed by a jump to the beginning of the
257	// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
258	// dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
259	// the GOT.
260	//
261	// The stub binder will look up the bind opcodes in the LazyBindingSection at
262	// the given offset. The bind opcodes will tell the binder to update the
263	// address in the LazyPointerSection to point to the symbol, so that subsequent
264	// calls don't have to redo the symbol resolution. The binder will then jump to
265	// the resolved symbol.
266	//
267	// With weak symbols, the situation is slightly different. Since there is no
268	// "weak lazy" lookup, function calls to weak symbols are always non-lazily
269	// bound. We emit both regular non-lazy bindings as well as weak bindings, in
270	// order that the weak bindings may overwrite the non-lazy bindings if an
271	// appropriate symbol is found at runtime. However, the bound addresses will
272	// still be written (non-lazily) into the LazyPointerSection.
273	//
274	// Symbols are always bound eagerly when chained fixups are used. In that case,
275	// StubsSection contains indirect jumps to addresses stored in the GotSection.
276	// The GOT directly contains the fixup entries, which will be replaced by the
277	// address of the target symbols on load. LazyPointerSection and
278	// StubHelperSection are not used.
279
280	class StubsSection final : public SyntheticSection {
281	public:
282	StubsSection();
283	uint64_t getSize() const override;
284	bool isNeeded() const override { return !entries.empty(); }
285	void finalize() override;
286	void writeTo(uint8_t buf) const* override;
287	const llvm::SetVector<Symbol > &getEntries() const* { return entries; }
288	// Creates a stub for the symbol and the corresponding entry in the
289	// LazyPointerSection.
290	void addEntry(Symbol *);
291	uint64_t getVA(uint32_t stubsIndex) const {
292	assert(isFinal \|\| target->usesThunks());
293	// ConcatOutputSection::finalize() can seek the address of a
294	// stub before its address is assigned. Before __stubs is
295	// finalized, return a contrived out-of-range address.
296	return isFinal ? addr + stubsIndex * target->stubSize
297	: TargetInfo::outOfRangeVA;
298	}
299
300	bool isFinal = false; // is address assigned?
301
302	private:
303	llvm::SetVector<Symbol *> entries;
304	};
305
306	class StubHelperSection final : public SyntheticSection {
307	public:
308	StubHelperSection();
309	uint64_t getSize() const override;
310	bool isNeeded() const override;
311	void writeTo(uint8_t buf) const* override;
312
313	void setUp();
314
315	DylibSymbol stubBinder = nullptr*;
316	Defined dyldPrivate = nullptr*;
317	};
318
319	class ObjCSelRefsHelper {
320	public:
321	static void initialize();
322	static void cleanup();
323
324	static ConcatInputSection *getSelRef(StringRef methname);
325	static ConcatInputSection *makeSelRef(StringRef methname);
326
327	private:
328	static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
329	methnameToSelref;
330	};
331
332	// Objective-C stubs are hoisted objc_msgSend calls per selector called in the
333	// program. Apple Clang produces undefined symbols to each stub, such as
334	// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
335	// load the particular selector 'foo' from __objc_selrefs, setting it to the
336	// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
337	// actual stub contents are mirrored from ld64.
338	class ObjCStubsSection final : public SyntheticSection {
339	public:
340	ObjCStubsSection();
341	void addEntry(Symbol *sym);
342	uint64_t getSize() const override;
343	bool isNeeded() const override { return !symbols.empty(); }
344	void finalize() override { isec->isFinal = true; }
345	void writeTo(uint8_t buf) const* override;
346	void setUp();
347
348	static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
349	static bool isObjCStubSymbol(Symbol *sym);
350	static StringRef getMethname(Symbol *sym);
351
352	private:
353	std::vector<Defined *> symbols;
354	Symbol objcMsgSend = nullptr*;
355	};
356
357	// Note that this section may also be targeted by non-lazy bindings. In
358	// particular, this happens when branch relocations target weak symbols.
359	class LazyPointerSection final : public SyntheticSection {
360	public:
361	LazyPointerSection();
362	uint64_t getSize() const override;
363	bool isNeeded() const override;
364	void writeTo(uint8_t buf) const* override;
365	uint64_t getVA(uint32_t index) const {
366	return addr + (index << target->p2WordSize);
367	}
368	};
369
370	class LazyBindingSection final : public LinkEditSection {
371	public:
372	LazyBindingSection();
373	void finalizeContents() override;
374	uint64_t getRawSize() const override { return contents.size(); }
375	bool isNeeded() const override { return !entries.empty(); }
376	void writeTo(uint8_t buf) const* override;
377	// Note that every entry here will by referenced by a corresponding entry in
378	// the StubHelperSection.
379	void addEntry(Symbol *dysym);
380	const llvm::SetVector<Symbol > &getEntries() const* { return entries; }
381
382	private:
383	uint32_t encode(const Symbol &);
384
385	llvm::SetVector<Symbol *> entries;
386	SmallVector<char, `128`> contents;
387	llvm::raw_svector_ostream os{contents};
388	};
389
390	// Stores a trie that describes the set of exported symbols.
391	class ExportSection final : public LinkEditSection {
392	public:
393	ExportSection();
394	void finalizeContents() override;
395	uint64_t getRawSize() const override { return size; }
396	bool isNeeded() const override { return size; }
397	void writeTo(uint8_t buf) const* override;
398
399	bool hasWeakSymbol = false;
400
401	private:
402	TrieBuilder trieBuilder;
403	size_t size = `0`;
404	};
405
406	// Stores 'data in code' entries that describe the locations of data regions
407	// inside code sections. This is used by llvm-objdump to distinguish jump tables
408	// and stop them from being disassembled as instructions.
409	class DataInCodeSection final : public LinkEditSection {
410	public:
411	DataInCodeSection();
412	void finalizeContents() override;
413	uint64_t getRawSize() const override {
414	return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
415	}
416	void writeTo(uint8_t buf) const* override;
417
418	private:
419	std::vector<llvm::MachO::data_in_code_entry> entries;
420	};
421
422	// Stores ULEB128 delta encoded addresses of functions.
423	class FunctionStartsSection final : public LinkEditSection {
424	public:
425	FunctionStartsSection();
426	void finalizeContents() override;
427	uint64_t getRawSize() const override { return contents.size(); }
428	void writeTo(uint8_t buf) const* override;
429
430	private:
431	SmallVector<char, `128`> contents;
432	};
433
434	// Stores the strings referenced by the symbol table.
435	class StringTableSection final : public LinkEditSection {
436	public:
437	StringTableSection();
438	// Returns the start offset of the added string.
439	uint32_t addString(StringRef);
440	uint64_t getRawSize() const override { return size; }
441	void writeTo(uint8_t buf) const* override;
442
443	static constexpr size_t emptyStringIndex = `1`;
444
445	private:
446	// ld64 emits string tables which start with a space and a zero byte. We
447	// match its behavior here since some tools depend on it.
448	// Consequently, the empty string will be at index 1, not zero.
449	std::vector<StringRef> strings{" "};
450	llvm::DenseMap<llvm::CachedHashStringRef, uint32_t> stringMap;
451	size_t size = `2`;
452	};
453
454	struct SymtabEntry {
455	Symbol *sym;
456	size_t strx;
457	};
458
459	struct StabsEntry {
460	uint8_t type = `0`;
461	uint32_t strx = StringTableSection::emptyStringIndex;
462	uint8_t sect = `0`;
463	uint16_t desc = `0`;
464	uint64_t value = `0`;
465
466	StabsEntry() = default;
467	explicit StabsEntry(uint8_t type) : type(type) {}
468	};
469
470	// Symbols of the same type must be laid out contiguously: we choose to emit
471	// all local symbols first, then external symbols, and finally undefined
472	// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
473	// range (start index and total number) of those symbols in the symbol table.
474	class SymtabSection : public LinkEditSection {
475	public:
476	void finalizeContents() override;
477	uint32_t getNumSymbols() const;
478	uint32_t getNumLocalSymbols() const {
479	return stabs.size() + localSymbols.size();
480	}
481	uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
482	uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
483
484	private:
485	void emitBeginSourceStab(StringRef);
486	void emitEndSourceStab();
487	void emitObjectFileStab(ObjFile *);
488	void emitEndFunStab(Defined *);
489	Defined getFuncBodySym(Defined );
490	void emitStabs();
491
492	protected:
493	SymtabSection(StringTableSection &);
494
495	StringTableSection &stringTableSection;
496	// STABS symbols are always local symbols, but we represent them with special
497	// entries because they may use fields like n_sect and n_desc differently.
498	std::vector<StabsEntry> stabs;
499	std::vector<SymtabEntry> localSymbols;
500	std::vector<SymtabEntry> externalSymbols;
501	std::vector<SymtabEntry> undefinedSymbols;
502	};
503
504	template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
505
506	// The indirect symbol table is a list of 32-bit integers that serve as indices
507	// into the (actual) symbol table. The indirect symbol table is a
508	// concatenation of several sub-arrays of indices, each sub-array belonging to
509	// a separate section. The starting offset of each sub-array is stored in the
510	// reserved1 header field of the respective section.
511	//
512	// These sub-arrays provide symbol information for sections that store
513	// contiguous sequences of symbol references. These references can be pointers
514	// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
515	// function stubs).
516	class IndirectSymtabSection final : public LinkEditSection {
517	public:
518	IndirectSymtabSection();
519	void finalizeContents() override;
520	uint32_t getNumSymbols() const;
521	uint64_t getRawSize() const override {
522	return getNumSymbols() * sizeof(uint32_t);
523	}
524	bool isNeeded() const override;
525	void writeTo(uint8_t buf) const* override;
526	};
527
528	// The code signature comes at the very end of the linked output file.
529	class CodeSignatureSection final : public LinkEditSection {
530	public:
531	// NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
532	// and any changes here, should be repeated there.
533	static constexpr uint8_t blockSizeShift = `12`;
534	static constexpr size_t blockSize = (`1` << blockSizeShift); // 4 KiB
535	static constexpr size_t hashSize = `256` / `8`;
536	static constexpr size_t blobHeadersSize = llvm::alignTo<`8`>(
537	Value: sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
538	static constexpr uint32_t fixedHeadersSize =
539	blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
540
541	uint32_t fileNamePad = `0`;
542	uint32_t allHeadersSize = `0`;
543	StringRef fileName;
544
545	CodeSignatureSection();
546	uint64_t getRawSize() const override;
547	bool isNeeded() const override { return true; }
548	void writeTo(uint8_t buf) const* override;
549	uint32_t getBlockCount() const;
550	void writeHashes(uint8_t buf) const*;
551	};
552
553	class CStringSection : public SyntheticSection {
554	public:
555	CStringSection(const char *name);
556	void addInput(CStringInputSection *);
557	uint64_t getSize() const override { return size; }
558	virtual void finalizeContents();
559	bool isNeeded() const override { return !inputs.empty(); }
560	void writeTo(uint8_t buf) const* override;
561
562	std::vector<CStringInputSection *> inputs;
563
564	private:
565	uint64_t size;
566	};
567
568	class DeduplicatedCStringSection final : public CStringSection {
569	public:
570	DeduplicatedCStringSection(const char *name) : CStringSection (name){};
571	uint64_t getSize() const override { return size; }
572	void finalizeContents() override;
573	void writeTo(uint8_t buf) const* override;
574
575	struct StringOffset {
576	uint8_t trailingZeros;
577	uint64_t outSecOff = UINT64_MAX;
578
579	explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
580	};
581
582	StringOffset getStringOffset(StringRef str) const;
583
584	private:
585	llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
586	size_t size = `0`;
587	};
588
589	/*
590	* This section contains deduplicated literal values. The 16-byte values are
591	* laid out first, followed by the 8- and then the 4-byte ones.
592	*/
593	class WordLiteralSection final : public SyntheticSection {
594	public:
595	using UInt128 = std::pair<uint64_t, uint64_t>;
596	// I don't think the standard guarantees the size of a pair, so let's make
597	// sure it's exact -- that way we can construct it via `mmap`.
598	static_assert(sizeof(UInt128) == `16`);
599
600	WordLiteralSection();
601	void addInput(WordLiteralInputSection *);
602	void finalizeContents();
603	void writeTo(uint8_t buf) const* override;
604
605	uint64_t getSize() const override {
606	return literal16Map.size() * `16` + literal8Map.size() * `8` +
607	literal4Map.size() * `4`;
608	}
609
610	bool isNeeded() const override {
611	return !literal16Map.empty() \|\| !literal4Map.empty() \|\|
612	!literal8Map.empty();
613	}
614
615	uint64_t getLiteral16Offset(uintptr_t buf) const {
616	return literal16Map.at(k: *reinterpret_cast<const UInt128 >(buf)) `16`;
617	}
618
619	uint64_t getLiteral8Offset(uintptr_t buf) const {
620	return literal16Map.size() * `16` +
621	literal8Map.at(k: *reinterpret_cast<const uint64_t >(buf)) `8`;
622	}
623
624	uint64_t getLiteral4Offset(uintptr_t buf) const {
625	return literal16Map.size() * `16` + literal8Map.size() * `8` +
626	literal4Map.at(k: *reinterpret_cast<const uint32_t >(buf)) `4`;
627	}
628
629	private:
630	std::vector<WordLiteralInputSection *> inputs;
631
632	template <class T> struct Hasher {
633	llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
634	};
635	// We're using unordered_map instead of DenseMap here because we need to
636	// support all possible integer values -- there are no suitable tombstone
637	// values for DenseMap.
638	std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
639	std::unordered_map<uint64_t, uint64_t> literal8Map;
640	std::unordered_map<uint32_t, uint64_t> literal4Map;
641	};
642
643	class ObjCImageInfoSection final : public SyntheticSection {
644	public:
645	ObjCImageInfoSection();
646	bool isNeeded() const override { return !files.empty(); }
647	uint64_t getSize() const override { return `8`; }
648	void addFile(const InputFile *file) {
649	assert(!file->objCImageInfo.empty());
650	files.push_back(x: file);
651	}
652	void finalizeContents();
653	void writeTo(uint8_t buf) const* override;
654
655	private:
656	struct ImageInfo {
657	uint8_t swiftVersion = `0`;
658	bool hasCategoryClassProperties = false;
659	} info;
660	static ImageInfo parseImageInfo(const InputFile *);
661	std::vector<const InputFile > files; // files with image info*
662	};
663
664	// This section stores 32-bit __TEXT segment offsets of initializer functions.
665	//
666	// The compiler stores pointers to initializers in __mod_init_func. These need
667	// to be fixed up at load time, which takes time and dirties memory. By
668	// synthesizing InitOffsetsSection from them, this data can live in the
669	// read-only __TEXT segment instead. This section is used by default when
670	// chained fixups are enabled.
671	//
672	// There is no similar counterpart to __mod_term_func, as that section is
673	// deprecated, and static destructors are instead handled by registering them
674	// via __cxa_atexit from an autogenerated initializer function (see D121736).
675	class InitOffsetsSection final : public SyntheticSection {
676	public:
677	InitOffsetsSection();
678	bool isNeeded() const override { return !sections.empty(); }
679	uint64_t getSize() const override;
680	void writeTo(uint8_t buf) const* override;
681	void setUp();
682
683	void addInput(ConcatInputSection *isec) { sections.push_back(x: isec); }
684	const std::vector<ConcatInputSection > &inputs() const* { return sections; }
685
686	private:
687	std::vector<ConcatInputSection *> sections;
688	};
689
690	// This SyntheticSection is for the __objc_methlist section, which contains
691	// relative method lists if the -objc_relative_method_lists option is enabled.
692	class ObjCMethListSection final : public SyntheticSection {
693	public:
694	ObjCMethListSection();
695
696	static bool isMethodList(const ConcatInputSection *isec);
697	void addInput(ConcatInputSection *isec) { inputs.push_back(x: isec); }
698	std::vector<ConcatInputSection > getInputs() { return* inputs; }
699
700	void setUp();
701	void finalize() override;
702	bool isNeeded() const override { return !inputs.empty(); }
703	uint64_t getSize() const override { return sectionSize; }
704	void writeTo(uint8_t bufStart) const* override;
705
706	private:
707	void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags,
708	uint32_t &structCount) const;
709	void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
710	uint32_t structCount) const;
711	uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const;
712	void writeRelativeOffsetForIsec(const ConcatInputSection isec, uint8_t buf,
713	uint32_t &inSecOff, uint32_t &outSecOff,
714	bool useSelRef) const;
715	uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
716	uint8_t buf) const*;
717
718	static constexpr uint32_t methodListHeaderSize =
719	/structSizeAndFlags/ sizeof(uint32_t) +
720	/structCount/ sizeof(uint32_t);
721	// Relative method lists are supported only for 3-pointer method lists
722	static constexpr uint32_t pointersPerStruct = `3`;
723	// The runtime identifies relative method lists via this magic value
724	static constexpr uint32_t relMethodHeaderFlag = `0x80000000`;
725	// In the method list header, the first 2 bytes are the size of struct
726	static constexpr uint32_t structSizeMask = `0x0000FFFF`;
727	// In the method list header, the last 2 bytes are the flags for the struct
728	static constexpr uint32_t structFlagsMask = `0xFFFF0000`;
729	// Relative method lists have 4 byte alignment as all data in the InputSection
730	// is 4 byte
731	static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t);
732
733	// The output size of the __objc_methlist section, computed during finalize()
734	uint32_t sectionSize = `0`;
735	std::vector<ConcatInputSection *> inputs;
736	};
737
738	// Chained fixups are a replacement for classic dyld opcodes. In this format,
739	// most of the metadata necessary for binding symbols and rebasing addresses is
740	// stored directly in the memory location that will have the fixup applied.
741	//
742	// The fixups form singly linked lists; each one covering a single page in
743	// memory. The __LINKEDIT,__chainfixups section stores the page offset of the
744	// first fixup of each page; the rest can be found by walking the chain using
745	// the offset that is embedded in each entry.
746	//
747	// This setup allows pages to be relocated lazily at page-in time and without
748	// being dirtied. The kernel can discard and load them again as needed. This
749	// technique, called page-in linking, was introduced in macOS 13.
750	//
751	// The benefits of this format are:
752	// - smaller __LINKEDIT segment, as most of the fixup information is stored in
753	// the data segment
754	// - faster startup, since not all relocations need to be done upfront
755	// - slightly lower memory usage, as fewer pages are dirtied
756	//
757	// Userspace x86_64 and arm64 binaries have two types of fixup entries:
758	// - Rebase entries contain an absolute address, to which the object's load
759	// address will be added to get the final value. This is used for loading
760	// the address of a symbol defined in the same binary.
761	// - Binding entries are mostly used for symbols imported from other dylibs,
762	// but for weakly bound and interposable symbols as well. They are looked up
763	// by a (symbol name, library) pair stored in __chainfixups. This import
764	// entry also encodes whether the import is weak (i.e. if the symbol is
765	// missing, it should be set to null instead of producing a load error).
766	// The fixup encodes an ordinal associated with the import, and an optional
767	// addend.
768	//
769	// The entries are tightly packed 64-bit bitfields. One of the bits specifies
770	// which kind of fixup to interpret them as.
771	//
772	// LLD generates the fixup data in 5 stages:
773	// 1. While scanning relocations, we make a note of each location that needs
774	// a fixup by calling addRebase() or addBinding(). During this, we assign
775	// a unique ordinal for each (symbol name, library, addend) import tuple.
776	// 2. After addresses have been assigned to all sections, and thus the memory
777	// layout of the linked image is final; finalizeContents() is called. Here,
778	// the page offsets of the chain start entries are calculated.
779	// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the
780	// imports table to the output file.
781	// 4. Each section's fixup entries are encoded and written to disk in
782	// ConcatInputSection::writeTo(), but without writing the offsets that form
783	// the chain.
784	// 5. Finally, each page's (which might correspond to multiple sections)
785	// fixups are linked together in Writer::buildFixupChains().
786	class ChainedFixupsSection final : public LinkEditSection {
787	public:
788	ChainedFixupsSection();
789	void finalizeContents() override;
790	uint64_t getRawSize() const override { return size; }
791	bool isNeeded() const override;
792	void writeTo(uint8_t buf) const* override;
793
794	void addRebase(const InputSection *isec, uint64_t offset) {
795	locations.emplace_back(args&: isec, args&: offset);
796	}
797	void addBinding(const Symbol dysym, const* InputSection *isec,
798	uint64_t offset, int64_t addend = `0`);
799
800	void setHasNonWeakDefinition() { hasNonWeakDef = true; }
801
802	// Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
803	std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
804	int64_t addend) const;
805
806	const std::vector<Location> &getLocations() const { return locations; }
807
808	bool hasWeakBinding() const { return hasWeakBind; }
809	bool hasNonWeakDefinition() const { return hasNonWeakDef; }
810
811	private:
812	// Location::offset initially stores the offset within an InputSection, but
813	// contains output segment offsets after finalizeContents().
814	std::vector<Location> locations;
815	// (target symbol, addend) => import ordinal
816	llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
817
818	struct SegmentInfo {
819	SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
820
821	const OutputSegment *oseg;
822	// (page index, fixup starts offset)
823	llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
824
825	size_t getSize() const;
826	size_t writeTo(uint8_t buf) const*;
827	};
828	llvm::SmallVector<SegmentInfo, `4`> fixupSegments;
829
830	size_t symtabSize = `0`;
831	size_t size = `0`;
832
833	bool needsAddend = false;
834	bool needsLargeAddend = false;
835	bool hasWeakBind = false;
836	bool hasNonWeakDef = false;
837	llvm::MachO::ChainedImportFormat importFormat;
838	};
839
840	void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
841	void writeChainedFixup(uint8_t buf, const* Symbol *sym, int64_t addend);
842
843	struct InStruct {
844	const uint8_t bufferStart = nullptr*;
845	MachHeaderSection header = nullptr*;
846	CStringSection cStringSection = nullptr*;
847	DeduplicatedCStringSection objcMethnameSection = nullptr*;
848	WordLiteralSection wordLiteralSection = nullptr*;
849	RebaseSection rebase = nullptr*;
850	BindingSection binding = nullptr*;
851	WeakBindingSection weakBinding = nullptr*;
852	LazyBindingSection lazyBinding = nullptr*;
853	ExportSection exports = nullptr*;
854	GotSection got = nullptr*;
855	TlvPointerSection tlvPointers = nullptr*;
856	LazyPointerSection lazyPointers = nullptr*;
857	StubsSection stubs = nullptr*;
858	StubHelperSection stubHelper = nullptr*;
859	ObjCStubsSection objcStubs = nullptr*;
860	UnwindInfoSection unwindInfo = nullptr*;
861	ObjCImageInfoSection objCImageInfo = nullptr*;
862	ConcatInputSection imageLoaderCache = nullptr*;
863	InitOffsetsSection initOffsets = nullptr*;
864	ObjCMethListSection objcMethList = nullptr*;
865	ChainedFixupsSection chainedFixups = nullptr*;
866	};
867
868	extern InStruct in;
869	extern std::vector<SyntheticSection *> syntheticSections;
870
871	void createSyntheticSymbols();
872
873	} // namespace lld::macho
874
875	#endif
876

source code of lld/MachO/SyntheticSections.h