SyntheticSections.cpp source code [lld/MachO/SyntheticSections.cpp]

1	//===- SyntheticSections.cpp ---------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "SyntheticSections.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "ExportTrie.h"
13	#include "ICF.h"
14	#include "InputFiles.h"
15	#include "ObjC.h"
16	#include "OutputSegment.h"
17	#include "SectionPriorities.h"
18	#include "SymbolTable.h"
19	#include "Symbols.h"
20
21	#include "lld/Common/CommonLinkerContext.h"
22	#include "llvm/ADT/STLExtras.h"
23	#include "llvm/Config/llvm-config.h"
24	#include "llvm/Support/FileSystem.h"
25	#include "llvm/Support/LEB128.h"
26	#include "llvm/Support/Parallel.h"
27	#include "llvm/Support/xxhash.h"
28
29	#if defined(__APPLE__)
30	#include <sys/mman.h>
31
32	#define COMMON_DIGEST_FOR_OPENSSL
33	#include <CommonCrypto/CommonDigest.h>
34	#else
35	#include "llvm/Support/SHA256.h"
36	#endif
37
38	using namespace llvm;
39	using namespace llvm::MachO;
40	using namespace llvm::support;
41	using namespace llvm::support::endian;
42	using namespace lld;
43	using namespace lld::macho;
44
45	// Reads `len` bytes at data and writes the 32-byte SHA256 checksum to `output`.
46	static void sha256(const uint8_t data, size_t len, uint8_t output) {
47	#if defined(__APPLE__)
48	// FIXME: Make LLVM's SHA256 faster and use it unconditionally. See PR56121
49	// for some notes on this.
50	CC_SHA256(data, len, output);
51	#else
52	ArrayRef<uint8_t> block(data, len);
53	std::array<uint8_t, `32`> hash = SHA256::hash(Data: block);
54	static_assert(hash.size() == CodeSignatureSection::hashSize);
55	memcpy(dest: output, src: hash.data(), n: hash.size());
56	#endif
57	}
58
59	InStruct macho::in;
60	std::vector<SyntheticSection *> macho::syntheticSections;
61
62	SyntheticSection::SyntheticSection(const char segname, const* char *name)
63	: OutputSection (SyntheticKind, name) {
64	std::tie(args&: this->segname, args&: this->name) = maybeRenameSection(key: {segname, name});
65	isec = makeSyntheticInputSection(segName: segname, sectName: name);
66	isec->parent = this;
67	syntheticSections.push_back(x: this);
68	}
69
70	// dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts
71	// from the beginning of the file (i.e. the header).
72	MachHeaderSection::MachHeaderSection()
73	: SyntheticSection (segment_names::text, section_names::header) {
74	// XXX: This is a hack. (See D97007)
75	// Setting the index to 1 to pretend that this section is the text
76	// section.
77	index = `1`;
78	isec->isFinal = true;
79	}
80
81	void MachHeaderSection::addLoadCommand(LoadCommand *lc) {
82	loadCommands.push_back(x: lc);
83	sizeOfCmds += lc->getSize();
84	}
85
86	uint64_t MachHeaderSection::getSize() const {
87	uint64_t size = target->headerSize + sizeOfCmds + config ->headerPad;
88	// If we are emitting an encryptable binary, our load commands must have a
89	// separate (non-encrypted) page to themselves.
90	if (config ->emitEncryptionInfo)
91	size = alignToPowerOf2(Value: size, Align: target->getPageSize());
92	return size;
93	}
94
95	static uint32_t cpuSubtype() {
96	uint32_t subtype = target->cpuSubtype;
97
98	if (config ->outputType == MH_EXECUTE && !config ->staticLink &&
99	target->cpuSubtype == CPU_SUBTYPE_X86_64_ALL &&
100	config ->platform() == PLATFORM_MACOS &&
101	config ->platformInfo.target.MinDeployment >= VersionTuple (`10`, `5`))
102	subtype \|= CPU_SUBTYPE_LIB64;
103
104	return subtype;
105	}
106
107	static bool hasWeakBinding() {
108	return config ->emitChainedFixups ? in.chainedFixups->hasWeakBinding()
109	: in.weakBinding->hasEntry();
110	}
111
112	static bool hasNonWeakDefinition() {
113	return config ->emitChainedFixups ? in.chainedFixups->hasNonWeakDefinition()
114	: in.weakBinding->hasNonWeakDefinition();
115	}
116
117	void MachHeaderSection::writeTo(uint8_t buf) const* {
118	auto hdr = reinterpret_cast<mach_header >(buf);
119	hdr->magic = target->magic;
120	hdr->cputype = target->cpuType;
121	hdr->cpusubtype = cpuSubtype();
122	hdr->filetype = config ->outputType;
123	hdr->ncmds = loadCommands.size();
124	hdr->sizeofcmds = sizeOfCmds;
125	hdr->flags = MH_DYLDLINK;
126
127	if (config ->namespaceKind == NamespaceKind::twolevel)
128	hdr->flags \|= MH_NOUNDEFS \| MH_TWOLEVEL;
129
130	if (config ->outputType == MH_DYLIB && !config ->hasReexports)
131	hdr->flags \|= MH_NO_REEXPORTED_DYLIBS;
132
133	if (config ->markDeadStrippableDylib)
134	hdr->flags \|= MH_DEAD_STRIPPABLE_DYLIB;
135
136	if (config ->outputType == MH_EXECUTE && config ->isPic)
137	hdr->flags \|= MH_PIE;
138
139	if (config ->outputType == MH_DYLIB && config ->applicationExtension)
140	hdr->flags \|= MH_APP_EXTENSION_SAFE;
141
142	if (in.exports->hasWeakSymbol \|\| hasNonWeakDefinition())
143	hdr->flags \|= MH_WEAK_DEFINES;
144
145	if (in.exports->hasWeakSymbol \|\| hasWeakBinding())
146	hdr->flags \|= MH_BINDS_TO_WEAK;
147
148	for (const OutputSegment *seg : outputSegments) {
149	for (const OutputSection *osec : seg->getSections()) {
150	if (isThreadLocalVariables(flags: osec->flags)) {
151	hdr->flags \|= MH_HAS_TLV_DESCRIPTORS;
152	break;
153	}
154	}
155	}
156
157	uint8_t p = reinterpret_cast<uint8_t >(hdr) + target->headerSize;
158	for (const LoadCommand *lc : loadCommands) {
159	lc->writeTo(buf: p);
160	p += lc->getSize();
161	}
162	}
163
164	PageZeroSection::PageZeroSection()
165	: SyntheticSection (segment_names::pageZero, section_names::pageZero) {}
166
167	RebaseSection::RebaseSection()
168	: LinkEditSection (segment_names::linkEdit, section_names::rebase) {}
169
170	namespace {
171	struct RebaseState {
172	uint64_t sequenceLength;
173	uint64_t skipLength;
174	};
175	} // namespace
176
177	static void emitIncrement(uint64_t incr, raw_svector_ostream &os) {
178	assert(incr != `0`);
179
180	if ((incr >> target->p2WordSize) <= REBASE_IMMEDIATE_MASK &&
181	(incr % target->wordSize) == `0`) {
182	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_IMM_SCALED \|
183	(incr >> target->p2WordSize));
184	} else {
185	os << static_cast<uint8_t>(REBASE_OPCODE_ADD_ADDR_ULEB);
186	encodeULEB128(Value: incr, OS&: os);
187	}
188	}
189
190	static void flushRebase(const RebaseState &state, raw_svector_ostream &os) {
191	assert(state.sequenceLength > `0`);
192
193	if (state.skipLength == target->wordSize) {
194	if (state.sequenceLength <= REBASE_IMMEDIATE_MASK) {
195	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_IMM_TIMES \|
196	state.sequenceLength);
197	} else {
198	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ULEB_TIMES);
199	encodeULEB128(Value: state.sequenceLength, OS&: os);
200	}
201	} else if (state.sequenceLength == `1`) {
202	os << static_cast<uint8_t>(REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB);
203	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
204	} else {
205	os << static_cast<uint8_t>(
206	REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB);
207	encodeULEB128(Value: state.sequenceLength, OS&: os);
208	encodeULEB128(Value: state.skipLength - target->wordSize, OS&: os);
209	}
210	}
211
212	// Rebases are communicated to dyld using a bytecode, whose opcodes cause the
213	// memory location at a specific address to be rebased and/or the address to be
214	// incremented.
215	//
216	// Opcode REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB is the most generic
217	// one, encoding a series of evenly spaced addresses. This algorithm works by
218	// splitting up the sorted list of addresses into such chunks. If the locations
219	// are consecutive or the sequence consists of a single location, flushRebase
220	// will use a smaller, more specialized encoding.
221	static void encodeRebases(const OutputSegment *seg,
222	MutableArrayRef<Location> locations,
223	raw_svector_ostream &os) {
224	// dyld operates on segments. Translate section offsets into segment offsets.
225	for (Location &loc : locations)
226	loc.offset =
227	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
228	// The algorithm assumes that locations are unique.
229	Location *end =
230	llvm::unique(R&: locations, P: [](const Location &a, const Location &b) {
231	return a.offset == b.offset;
232	});
233	size_t count = end - locations.begin();
234
235	os << static_cast<uint8_t>(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
236	seg->index);
237	assert(!locations.empty());
238	uint64_t offset = locations [`0`].offset;
239	encodeULEB128(Value: offset, OS&: os);
240
241	RebaseState state{.sequenceLength: `1`, .skipLength: target->wordSize};
242
243	for (size_t i = `1`; i < count; ++i) {
244	offset = locations [i].offset;
245
246	uint64_t skip = offset - locations [i - `1`].offset;
247	assert(skip != `0` && "duplicate locations should have been weeded out");
248
249	if (skip == state.skipLength) {
250	++state.sequenceLength;
251	} else if (state.sequenceLength == `1`) {
252	++state.sequenceLength;
253	state.skipLength = skip;
254	} else if (skip < state.skipLength) {
255	// The address is lower than what the rebase pointer would be if the last
256	// location would be part of a sequence. We start a new sequence from the
257	// previous location.
258	--state.sequenceLength;
259	flushRebase(state, os);
260
261	state.sequenceLength = `2`;
262	state.skipLength = skip;
263	} else {
264	// The address is at some positive offset from the rebase pointer. We
265	// start a new sequence which begins with the current location.
266	flushRebase(state, os);
267	emitIncrement(incr: skip - state.skipLength, os);
268	state.sequenceLength = `1`;
269	state.skipLength = target->wordSize;
270	}
271	}
272	flushRebase(state, os);
273	}
274
275	void RebaseSection::finalizeContents() {
276	if (locations.empty())
277	return;
278
279	raw_svector_ostream os{contents};
280	os << static_cast<uint8_t>(REBASE_OPCODE_SET_TYPE_IMM \| REBASE_TYPE_POINTER);
281
282	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
283	return a.isec->getVA(off: a.offset) < b.isec->getVA(off: b.offset);
284	});
285
286	for (size_t i = `0`, count = locations.size(); i < count;) {
287	const OutputSegment *seg = locations [i].isec->parent->parent;
288	size_t j = i + `1`;
289	while (j < count && locations [j].isec->parent->parent == seg)
290	++j;
291	encodeRebases(seg, locations: {locations.data() + i, locations.data() + j}, os);
292	i = j;
293	}
294	os << static_cast<uint8_t>(REBASE_OPCODE_DONE);
295	}
296
297	void RebaseSection::writeTo(uint8_t buf) const* {
298	memcpy(dest: buf, src: contents.data(), n: contents.size());
299	}
300
301	NonLazyPointerSectionBase::NonLazyPointerSectionBase(const char *segname,
302	const char *name)
303	: SyntheticSection (segname, name) {
304	align = target->wordSize;
305	}
306
307	void macho::addNonLazyBindingEntries(const Symbol *sym,
308	const InputSection *isec, uint64_t offset,
309	int64_t addend) {
310	if (config ->emitChainedFixups) {
311	if (needsBinding(sym))
312	in.chainedFixups->addBinding(dysym: sym, isec, offset, addend);
313	else if (isa<Defined>(Val: sym))
314	in.chainedFixups->addRebase(isec, offset);
315	else
316	llvm_unreachable("cannot bind to an undefined symbol");
317	return;
318	}
319
320	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
321	in.binding->addEntry(dysym, isec, offset, addend);
322	if (dysym->isWeakDef())
323	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
324	} else if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
325	in.rebase->addEntry(isec, offset);
326	if (defined->isExternalWeakDef())
327	in.weakBinding->addEntry(symbol: sym, isec, offset, addend);
328	else if (defined->interposable)
329	in.binding->addEntry(dysym: sym, isec, offset, addend);
330	} else {
331	// Undefined symbols are filtered out in scanRelocations(); we should never
332	// get here
333	llvm_unreachable("cannot bind to an undefined symbol");
334	}
335	}
336
337	void NonLazyPointerSectionBase::addEntry(Symbol *sym) {
338	if (entries.insert(X: sym)) {
339	assert(!sym->isInGot());
340	sym->gotIndex = entries.size() - `1`;
341
342	addNonLazyBindingEntries(sym, isec, offset: sym->gotIndex * target->wordSize);
343	}
344	}
345
346	void macho::writeChainedRebase(uint8_t *buf, uint64_t targetVA) {
347	assert(config ->emitChainedFixups);
348	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
349	auto rebase = reinterpret_cast<dyld_chained_ptr_64_rebase >(buf);
350	rebase->target = targetVA & `0xf'ffff'ffff`;
351	rebase->high8 = (targetVA >> `56`);
352	rebase->reserved = `0`;
353	rebase->next = `0`;
354	rebase->bind = `0`;
355
356	// The fixup format places a 64 GiB limit on the output's size.
357	// Should we handle this gracefully?
358	uint64_t encodedVA = rebase->target \| ((uint64_t)rebase->high8 << `56`);
359	if (encodedVA != targetVA)
360	error(msg: "rebase target address 0x" + Twine::utohexstr(Val: targetVA) +
361	" does not fit into chained fixup. Re-link with -no_fixup_chains");
362	}
363
364	static void writeChainedBind(uint8_t buf, const* Symbol *sym, int64_t addend) {
365	assert(config ->emitChainedFixups);
366	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
367	auto bind = reinterpret_cast<dyld_chained_ptr_64_bind >(buf);
368	auto [ordinal, inlineAddend] = in.chainedFixups->getBinding(sym, addend);
369	bind->ordinal = ordinal;
370	bind->addend = inlineAddend;
371	bind->reserved = `0`;
372	bind->next = `0`;
373	bind->bind = `1`;
374	}
375
376	void macho::writeChainedFixup(uint8_t buf, const* Symbol *sym, int64_t addend) {
377	if (needsBinding(sym))
378	writeChainedBind(buf, sym, addend);
379	else
380	writeChainedRebase(buf, targetVA: sym->getVA() + addend);
381	}
382
383	void NonLazyPointerSectionBase::writeTo(uint8_t buf) const* {
384	if (config ->emitChainedFixups) {
385	for (const auto &[i, entry] : llvm::enumerate(First: entries))
386	writeChainedFixup(buf: &buf[i * target->wordSize], sym: entry, addend: `0`);
387	} else {
388	for (const auto &[i, entry] : llvm::enumerate(First: entries))
389	if (auto *defined = dyn_cast<Defined>(Val: entry))
390	write64le(P: &buf[i * target->wordSize], V: defined->getVA());
391	}
392	}
393
394	GotSection::GotSection()
395	: NonLazyPointerSectionBase (segment_names::data, section_names::got) {
396	flags = S_NON_LAZY_SYMBOL_POINTERS;
397	}
398
399	TlvPointerSection::TlvPointerSection()
400	: NonLazyPointerSectionBase (segment_names::data,
401	section_names::threadPtrs) {
402	flags = S_THREAD_LOCAL_VARIABLE_POINTERS;
403	}
404
405	BindingSection::BindingSection()
406	: LinkEditSection (segment_names::linkEdit, section_names::binding) {}
407
408	namespace {
409	struct Binding {
410	OutputSegment segment = nullptr*;
411	uint64_t offset = `0`;
412	int64_t addend = `0`;
413	};
414	struct BindIR {
415	// Default value of 0xF0 is not valid opcode and should make the program
416	// scream instead of accidentally writing "valid" values.
417	uint8_t opcode = `0xF0`;
418	uint64_t data = `0`;
419	uint64_t consecutiveCount = `0`;
420	};
421	} // namespace
422
423	// Encode a sequence of opcodes that tell dyld to write the address of symbol +
424	// addend at osec->addr + outSecOff.
425	//
426	// The bind opcode "interpreter" remembers the values of each binding field, so
427	// we only need to encode the differences between bindings. Hence the use of
428	// lastBinding.
429	static void encodeBinding(const OutputSection *osec, uint64_t outSecOff,
430	int64_t addend, Binding &lastBinding,
431	std::vector<BindIR> &opcodes) {
432	OutputSegment *seg = osec->parent;
433	uint64_t offset = osec->getSegmentOffset() + outSecOff;
434	if (lastBinding.segment != seg) {
435	opcodes.push_back(
436	x: {.opcode: static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
437	seg->index),
438	.data: offset});
439	lastBinding.segment = seg;
440	lastBinding.offset = offset;
441	} else if (lastBinding.offset != offset) {
442	opcodes.push_back(x: {.opcode: BIND_OPCODE_ADD_ADDR_ULEB, .data: offset - lastBinding.offset});
443	lastBinding.offset = offset;
444	}
445
446	if (lastBinding.addend != addend) {
447	opcodes.push_back(
448	x: {.opcode: BIND_OPCODE_SET_ADDEND_SLEB, .data: static_cast<uint64_t>(addend)});
449	lastBinding.addend = addend;
450	}
451
452	opcodes.push_back(x: {.opcode: BIND_OPCODE_DO_BIND, .data: `0`});
453	// DO_BIND causes dyld to both perform the binding and increment the offset
454	lastBinding.offset += target->wordSize;
455	}
456
457	static void optimizeOpcodes(std::vector<BindIR> &opcodes) {
458	// Pass 1: Combine bind/add pairs
459	size_t i;
460	int pWrite = `0`;
461	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
462	if ((opcodes [i].opcode == BIND_OPCODE_ADD_ADDR_ULEB) &&
463	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND)) {
464	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB;
465	opcodes [pWrite].data = opcodes [i].data;
466	++i;
467	} else {
468	opcodes [pWrite] = opcodes [i - `1`];
469	}
470	}
471	if (i == opcodes.size())
472	opcodes [pWrite] = opcodes [i - `1`];
473	opcodes.resize(new_size: pWrite + `1`);
474
475	// Pass 2: Compress two or more bind_add opcodes
476	pWrite = `0`;
477	for (i = `1`; i < opcodes.size(); ++i, ++pWrite) {
478	if ((opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
479	(opcodes [i - `1`].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
480	(opcodes [i].data == opcodes [i - `1`].data)) {
481	opcodes [pWrite].opcode = BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB;
482	opcodes [pWrite].consecutiveCount = `2`;
483	opcodes [pWrite].data = opcodes [i].data;
484	++i;
485	while (i < opcodes.size() &&
486	(opcodes [i].opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
487	(opcodes [i].data == opcodes [i - `1`].data)) {
488	opcodes [pWrite].consecutiveCount++;
489	++i;
490	}
491	} else {
492	opcodes [pWrite] = opcodes [i - `1`];
493	}
494	}
495	if (i == opcodes.size())
496	opcodes [pWrite] = opcodes [i - `1`];
497	opcodes.resize(new_size: pWrite + `1`);
498
499	// Pass 3: Use immediate encodings
500	// Every binding is the size of one pointer. If the next binding is a
501	// multiple of wordSize away that is within BIND_IMMEDIATE_MASK, the
502	// opcode can be scaled by wordSize into a single byte and dyld will
503	// expand it to the correct address.
504	for (auto &p : opcodes) {
505	// It's unclear why the check needs to be less than BIND_IMMEDIATE_MASK,
506	// but ld64 currently does this. This could be a potential bug, but
507	// for now, perform the same behavior to prevent mysterious bugs.
508	if ((p.opcode == BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB) &&
509	((p.data / target->wordSize) < BIND_IMMEDIATE_MASK) &&
510	((p.data % target->wordSize) == `0`)) {
511	p.opcode = BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED;
512	p.data /= target->wordSize;
513	}
514	}
515	}
516
517	static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) {
518	uint8_t opcode = op.opcode & BIND_OPCODE_MASK;
519	switch (opcode) {
520	case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
521	case BIND_OPCODE_ADD_ADDR_ULEB:
522	case BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
523	os << op.opcode;
524	encodeULEB128(Value: op.data, OS&: os);
525	break;
526	case BIND_OPCODE_SET_ADDEND_SLEB:
527	os << op.opcode;
528	encodeSLEB128(Value: static_cast<int64_t>(op.data), OS&: os);
529	break;
530	case BIND_OPCODE_DO_BIND:
531	os << op.opcode;
532	break;
533	case BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
534	os << op.opcode;
535	encodeULEB128(Value: op.consecutiveCount, OS&: os);
536	encodeULEB128(Value: op.data, OS&: os);
537	break;
538	case BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED:
539	os << static_cast<uint8_t>(op.opcode \| op.data);
540	break;
541	default:
542	llvm_unreachable("cannot bind to an unrecognized symbol");
543	}
544	}
545
546	static bool needsWeakBind(const Symbol &sym) {
547	if (auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
548	return dysym->isWeakDef();
549	if (auto *defined = dyn_cast<Defined>(Val: &sym))
550	return defined->isExternalWeakDef();
551	return false;
552	}
553
554	// Non-weak bindings need to have their dylib ordinal encoded as well.
555	static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
556	if (config ->namespaceKind == NamespaceKind::flat \|\| dysym.isDynamicLookup())
557	return static_cast<int16_t>(BIND_SPECIAL_DYLIB_FLAT_LOOKUP);
558	assert(dysym.getFile()->isReferenced());
559	return dysym.getFile()->ordinal;
560	}
561
562	static int16_t ordinalForSymbol(const Symbol &sym) {
563	if (config ->emitChainedFixups && needsWeakBind(sym))
564	return BIND_SPECIAL_DYLIB_WEAK_LOOKUP;
565	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: &sym))
566	return ordinalForDylibSymbol(dysym: *dysym);
567	assert(cast<Defined>(&sym)->interposable);
568	return BIND_SPECIAL_DYLIB_FLAT_LOOKUP;
569	}
570
571	static void encodeDylibOrdinal(int16_t ordinal, raw_svector_ostream &os) {
572	if (ordinal <= `0`) {
573	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM \|
574	(ordinal & BIND_IMMEDIATE_MASK));
575	} else if (ordinal <= BIND_IMMEDIATE_MASK) {
576	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM \| ordinal);
577	} else {
578	os << static_cast<uint8_t>(BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
579	encodeULEB128(Value: ordinal, OS&: os);
580	}
581	}
582
583	static void encodeWeakOverride(const Defined *defined,
584	raw_svector_ostream &os) {
585	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM \|
586	BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION)
587	<< defined->getName() << `'\0'`;
588	}
589
590	// Organize the bindings so we can encoded them with fewer opcodes.
591	//
592	// First, all bindings for a given symbol should be grouped together.
593	// BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM is the largest opcode (since it
594	// has an associated symbol string), so we only want to emit it once per symbol.
595	//
596	// Within each group, we sort the bindings by address. Since bindings are
597	// delta-encoded, sorting them allows for a more compact result. Note that
598	// sorting by address alone ensures that bindings for the same segment / section
599	// are located together, minimizing the number of times we have to emit
600	// BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB.
601	//
602	// Finally, we sort the symbols by the address of their first binding, again
603	// to facilitate the delta-encoding process.
604	template <class Sym>
605	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>>
606	sortBindings(const BindingsMap<const Sym *> &bindingsMap) {
607	std::vector<std::pair<const Sym *, std::vector<BindingEntry>>> bindingsVec(
608	bindingsMap.begin(), bindingsMap.end());
609	for (auto &p : bindingsVec) {
610	std::vector<BindingEntry> &bindings = p.second;
611	llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
612	return a.target.getVA() < b.target.getVA();
613	});
614	}
615	llvm::sort(bindingsVec, [](const auto &a, const auto &b) {
616	return a.second[`0`].target.getVA() < b.second[`0`].target.getVA();
617	});
618	return bindingsVec;
619	}
620
621	// Emit bind opcodes, which are a stream of byte-sized opcodes that dyld
622	// interprets to update a record with the following fields:
623	// segment index (of the segment to write the symbol addresses to, typically*
624	// the __DATA_CONST segment which contains the GOT)
625	// offset within the segment, indicating the next location to write a binding*
626	// symbol type*
627	// symbol library ordinal (the index of its library's LC_LOAD_DYLIB command)*
628	// symbol name*
629	// addend*
630	// When dyld sees BIND_OPCODE_DO_BIND, it uses the current record state to bind
631	// a symbol in the GOT, and increments the segment offset to point to the next
632	// entry. It does not* clear the record state after doing the bind, so*
633	// subsequent opcodes only need to encode the differences between bindings.
634	void BindingSection::finalizeContents() {
635	raw_svector_ostream os{contents};
636	Binding lastBinding;
637	int16_t lastOrdinal = `0`;
638
639	for (auto &p : sortBindings(bindingsMap)) {
640	const Symbol *sym = p.first;
641	std::vector<BindingEntry> &bindings = p.second;
642	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
643	if (sym->isWeakRef())
644	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
645	os << flags << sym->getName() << `'\0'`
646	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
647	int16_t ordinal = ordinalForSymbol(sym: *sym);
648	if (ordinal != lastOrdinal) {
649	encodeDylibOrdinal(ordinal, os);
650	lastOrdinal = ordinal;
651	}
652	std::vector<BindIR> opcodes;
653	for (const BindingEntry &b : bindings)
654	encodeBinding(osec: b.target.isec->parent,
655	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
656	lastBinding, opcodes);
657	if (config ->optimize > `1`)
658	optimizeOpcodes(opcodes);
659	for (const auto &op : opcodes)
660	flushOpcodes(op, os);
661	}
662	if (!bindingsMap.empty())
663	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
664	}
665
666	void BindingSection::writeTo(uint8_t buf) const* {
667	memcpy(dest: buf, src: contents.data(), n: contents.size());
668	}
669
670	WeakBindingSection::WeakBindingSection()
671	: LinkEditSection (segment_names::linkEdit, section_names::weakBinding) {}
672
673	void WeakBindingSection::finalizeContents() {
674	raw_svector_ostream os{contents};
675	Binding lastBinding;
676
677	for (const Defined *defined : definitions)
678	encodeWeakOverride(defined, os);
679
680	for (auto &p : sortBindings(bindingsMap)) {
681	const Symbol *sym = p.first;
682	std::vector<BindingEntry> &bindings = p.second;
683	os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
684	<< sym->getName() << `'\0'`
685	<< static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM \| BIND_TYPE_POINTER);
686	std::vector<BindIR> opcodes;
687	for (const BindingEntry &b : bindings)
688	encodeBinding(osec: b.target.isec->parent,
689	outSecOff: b.target.isec->getOffset(off: b.target.offset), addend: b.addend,
690	lastBinding, opcodes);
691	if (config ->optimize > `1`)
692	optimizeOpcodes(opcodes);
693	for (const auto &op : opcodes)
694	flushOpcodes(op, os);
695	}
696	if (!bindingsMap.empty() \|\| !definitions.empty())
697	os << static_cast<uint8_t>(BIND_OPCODE_DONE);
698	}
699
700	void WeakBindingSection::writeTo(uint8_t buf) const* {
701	memcpy(dest: buf, src: contents.data(), n: contents.size());
702	}
703
704	StubsSection::StubsSection()
705	: SyntheticSection (segment_names::text, section_names::stubs) {
706	flags = S_SYMBOL_STUBS \| S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
707	// The stubs section comprises machine instructions, which are aligned to
708	// 4 bytes on the archs we care about.
709	align = `4`;
710	reserved2 = target->stubSize;
711	}
712
713	uint64_t StubsSection::getSize() const {
714	return entries.size() * target->stubSize;
715	}
716
717	void StubsSection::writeTo(uint8_t buf) const* {
718	size_t off = `0`;
719	for (const Symbol *sym : entries) {
720	uint64_t pointerVA =
721	config ->emitChainedFixups ? sym->getGotVA() : sym->getLazyPtrVA();
722	target->writeStub(buf: buf + off, *sym, pointerVA);
723	off += target->stubSize;
724	}
725	}
726
727	void StubsSection::finalize() { isFinal = true; }
728
729	static void addBindingsForStub(Symbol *sym) {
730	assert(!config ->emitChainedFixups);
731	if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
732	if (sym->isWeakDef()) {
733	in.binding->addEntry(dysym, isec: in.lazyPointers->isec,
734	offset: sym->stubsIndex * target->wordSize);
735	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
736	offset: sym->stubsIndex * target->wordSize);
737	} else {
738	in.lazyBinding->addEntry(dysym);
739	}
740	} else if (auto *defined = dyn_cast<Defined>(Val: sym)) {
741	if (defined->isExternalWeakDef()) {
742	in.rebase->addEntry(isec: in.lazyPointers->isec,
743	offset: sym->stubsIndex * target->wordSize);
744	in.weakBinding->addEntry(symbol: sym, isec: in.lazyPointers->isec,
745	offset: sym->stubsIndex * target->wordSize);
746	} else if (defined->interposable) {
747	in.lazyBinding->addEntry(dysym: sym);
748	} else {
749	llvm_unreachable("invalid stub target");
750	}
751	} else {
752	llvm_unreachable("invalid stub target symbol type");
753	}
754	}
755
756	void StubsSection::addEntry(Symbol *sym) {
757	bool inserted = entries.insert(X: sym);
758	if (inserted) {
759	sym->stubsIndex = entries.size() - `1`;
760
761	if (config ->emitChainedFixups)
762	in.got->addEntry(sym);
763	else
764	addBindingsForStub(sym);
765	}
766	}
767
768	StubHelperSection::StubHelperSection()
769	: SyntheticSection (segment_names::text, section_names::stubHelper) {
770	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
771	align = `4`; // This section comprises machine instructions
772	}
773
774	uint64_t StubHelperSection::getSize() const {
775	return target->stubHelperHeaderSize +
776	in.lazyBinding->getEntries().size() * target->stubHelperEntrySize;
777	}
778
779	bool StubHelperSection::isNeeded() const { return in.lazyBinding->isNeeded(); }
780
781	void StubHelperSection::writeTo(uint8_t buf) const* {
782	target->writeStubHelperHeader(buf);
783	size_t off = target->stubHelperHeaderSize;
784	for (const Symbol *sym : in.lazyBinding->getEntries()) {
785	target->writeStubHelperEntry(buf: buf + off, *sym, entryAddr: addr + off);
786	off += target->stubHelperEntrySize;
787	}
788	}
789
790	void StubHelperSection::setUp() {
791	Symbol binder = symtab ->addUndefined(name: "dyld_stub_binder", /file=/*nullptr,
792	/isWeakRef=/false);
793	if (auto *undefined = dyn_cast<Undefined>(Val: binder))
794	treatUndefinedSymbol(*undefined,
795	source: "lazy binding (normally in libSystem.dylib)");
796
797	// treatUndefinedSymbol() can replace binder with a DylibSymbol; re-check.
798	stubBinder = dyn_cast_or_null<DylibSymbol>(Val: binder);
799	if (stubBinder == nullptr)
800	return;
801
802	in.got->addEntry(sym: stubBinder);
803
804	in.imageLoaderCache->parent =
805	ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
806	addInputSection(inputSection: in.imageLoaderCache);
807	// Since this isn't in the symbol table or in any input file, the noDeadStrip
808	// argument doesn't matter.
809	dyldPrivate =
810	make<Defined>(args: "__dyld_private", args: nullptr, args&: in.imageLoaderCache, args: `0`, args: `0`,
811	/isWeakDef=/args: false,
812	/isExternal=/args: false, /isPrivateExtern=/args: false,
813	/includeInSymtab=/args: true,
814	/isReferencedDynamically=/args: false,
815	/noDeadStrip=/args: false);
816	dyldPrivate->used = true;
817	}
818
819	llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
820	ObjCSelRefsHelper::methnameToSelref;
821	void ObjCSelRefsHelper::initialize() {
822	// Do not fold selrefs without ICF.
823	if (config ->icfLevel == ICFLevel::none)
824	return;
825
826	// Search methnames already referenced in __objc_selrefs
827	// Map the name to the corresponding selref entry
828	// which we will reuse when creating objc stubs.
829	for (ConcatInputSection *isec : inputSections) {
830	if (isec->shouldOmitFromOutput())
831	continue;
832	if (isec->getName() != section_names::objcSelrefs)
833	continue;
834	// We expect a single relocation per selref entry to __objc_methname that
835	// might be aggregated.
836	assert(isec->relocs.size() == `1`);
837	auto Reloc = isec->relocs [`0`];
838	if (const auto sym = Reloc.referent.dyn_cast<Symbol >()) {
839	if (const auto *d = dyn_cast<Defined>(Val: sym)) {
840	auto *cisec = cast<CStringInputSection>(Val: d->isec());
841	auto methname = cisec->getStringRefAtOffset(off: d->value);
842	methnameToSelref [CachedHashStringRef (methname)] = isec;
843	}
844	}
845	}
846	}
847
848	void ObjCSelRefsHelper::cleanup() { methnameToSelref.clear(); }
849
850	ConcatInputSection *ObjCSelRefsHelper::makeSelRef(StringRef methname) {
851	auto methnameOffset =
852	in.objcMethnameSection->getStringOffset(str: methname).outSecOff;
853
854	size_t wordSize = target->wordSize;
855	uint8_t *selrefData = bAlloc().Allocate<uint8_t>(Num: wordSize);
856	write64le(P: selrefData, V: methnameOffset);
857	ConcatInputSection *objcSelref =
858	makeSyntheticInputSection(segName: segment_names::data, sectName: section_names::objcSelrefs,
859	flags: S_LITERAL_POINTERS \| S_ATTR_NO_DEAD_STRIP,
860	data: ArrayRef<uint8_t>{selrefData, wordSize},
861	/align=/wordSize);
862	assert(objcSelref->live);
863	objcSelref->relocs.push_back(x: {/type=/target->unsignedRelocType,
864	/pcrel=/false, /length=/`3`,
865	/offset=/`0`,
866	/addend=/static_cast<int64_t>(methnameOffset),
867	/referent=/in.objcMethnameSection->isec});
868	objcSelref->parent = ConcatOutputSection::getOrCreateForInput(objcSelref);
869	addInputSection(inputSection: objcSelref);
870	objcSelref->isFinal = true;
871	methnameToSelref [CachedHashStringRef (methname)] = objcSelref;
872	return objcSelref;
873	}
874
875	ConcatInputSection *ObjCSelRefsHelper::getSelRef(StringRef methname) {
876	auto it = methnameToSelref.find(Val: CachedHashStringRef (methname));
877	if (it == methnameToSelref.end())
878	return nullptr;
879	return it ->second;
880	}
881
882	ObjCStubsSection::ObjCStubsSection()
883	: SyntheticSection (segment_names::text, section_names::objcStubs) {
884	flags = S_ATTR_SOME_INSTRUCTIONS \| S_ATTR_PURE_INSTRUCTIONS;
885	align = config ->objcStubsMode == ObjCStubsMode::fast
886	? target->objcStubsFastAlignment
887	: target->objcStubsSmallAlignment;
888	}
889
890	bool ObjCStubsSection::isObjCStubSymbol(Symbol *sym) {
891	return sym->getName().starts_with(Prefix: symbolPrefix);
892	}
893
894	StringRef ObjCStubsSection::getMethname(Symbol *sym) {
895	assert(isObjCStubSymbol(sym) && "not an objc stub");
896	auto name = sym->getName();
897	StringRef methname = name.drop_front(N: symbolPrefix.size());
898	return methname;
899	}
900
901	void ObjCStubsSection::addEntry(Symbol *sym) {
902	StringRef methname = getMethname(sym);
903	// We create a selref entry for each unique methname.
904	if (!ObjCSelRefsHelper::getSelRef(methname))
905	ObjCSelRefsHelper::makeSelRef(methname);
906
907	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
908	? target->objcStubsFastSize
909	: target->objcStubsSmallSize;
910	Defined *newSym = replaceSymbol<Defined>(
911	s: sym, arg: sym->getName(), arg: nullptr, arg&: isec,
912	/value=/arg: symbols.size() * stubSize,
913	/size=/arg&: stubSize,
914	/isWeakDef=/arg: false, /isExternal=/arg: true, /isPrivateExtern=/arg: true,
915	/includeInSymtab=/arg: true, /isReferencedDynamically=/arg: false,
916	/noDeadStrip=/arg: false);
917	symbols.push_back(x: newSym);
918	}
919
920	void ObjCStubsSection::setUp() {
921	objcMsgSend = symtab ->addUndefined(name: "_objc_msgSend", /file=/nullptr,
922	/isWeakRef=/false);
923	if (auto *undefined = dyn_cast<Undefined>(Val: objcMsgSend))
924	treatUndefinedSymbol(*undefined,
925	source: "lazy binding (normally in libobjc.dylib)");
926	objcMsgSend->used = true;
927	if (config ->objcStubsMode == ObjCStubsMode::fast) {
928	in.got->addEntry(sym: objcMsgSend);
929	assert(objcMsgSend->isInGot());
930	} else {
931	assert(config ->objcStubsMode == ObjCStubsMode::small);
932	// In line with ld64's behavior, when objc_msgSend is a direct symbol,
933	// we directly reference it.
934	// In other cases, typically when binding in libobjc.dylib,
935	// we generate a stub to invoke objc_msgSend.
936	if (!isa<Defined>(Val: objcMsgSend))
937	in.stubs->addEntry(sym: objcMsgSend);
938	}
939	}
940
941	uint64_t ObjCStubsSection::getSize() const {
942	auto stubSize = config ->objcStubsMode == ObjCStubsMode::fast
943	? target->objcStubsFastSize
944	: target->objcStubsSmallSize;
945	return stubSize * symbols.size();
946	}
947
948	void ObjCStubsSection::writeTo(uint8_t buf) const* {
949	uint64_t stubOffset = `0`;
950	for (size_t i = `0`, n = symbols.size(); i < n; ++i) {
951	Defined *sym = symbols [i];
952
953	auto methname = getMethname(sym);
954	InputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
955	assert(selRef != nullptr && "no selref for methname");
956	auto selrefAddr = selRef->getVA(off: `0`);
957	target->writeObjCMsgSendStub(buf: buf + stubOffset, sym, stubsAddr: in.objcStubs->addr,
958	stubOffset, selrefVA: selrefAddr, objcMsgSend);
959	}
960	}
961
962	LazyPointerSection::LazyPointerSection()
963	: SyntheticSection (segment_names::data, section_names::lazySymbolPtr) {
964	align = target->wordSize;
965	flags = S_LAZY_SYMBOL_POINTERS;
966	}
967
968	uint64_t LazyPointerSection::getSize() const {
969	return in.stubs->getEntries().size() * target->wordSize;
970	}
971
972	bool LazyPointerSection::isNeeded() const {
973	return !in.stubs->getEntries().empty();
974	}
975
976	void LazyPointerSection::writeTo(uint8_t buf) const* {
977	size_t off = `0`;
978	for (const Symbol *sym : in.stubs->getEntries()) {
979	if (const auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
980	if (dysym->hasStubsHelper()) {
981	uint64_t stubHelperOffset =
982	target->stubHelperHeaderSize +
983	dysym->stubsHelperIndex * target->stubHelperEntrySize;
984	write64le(P: buf + off, V: in.stubHelper->addr + stubHelperOffset);
985	}
986	} else {
987	write64le(P: buf + off, V: sym->getVA());
988	}
989	off += target->wordSize;
990	}
991	}
992
993	LazyBindingSection::LazyBindingSection()
994	: LinkEditSection (segment_names::linkEdit, section_names::lazyBinding) {}
995
996	void LazyBindingSection::finalizeContents() {
997	// TODO: Just precompute output size here instead of writing to a temporary
998	// buffer
999	for (Symbol *sym : entries)
1000	sym->lazyBindOffset = encode(*sym);
1001	}
1002
1003	void LazyBindingSection::writeTo(uint8_t buf) const* {
1004	memcpy(dest: buf, src: contents.data(), n: contents.size());
1005	}
1006
1007	void LazyBindingSection::addEntry(Symbol *sym) {
1008	assert(!config ->emitChainedFixups && "Chained fixups always bind eagerly");
1009	if (entries.insert(X: sym)) {
1010	sym->stubsHelperIndex = entries.size() - `1`;
1011	in.rebase->addEntry(isec: in.lazyPointers->isec,
1012	offset: sym->stubsIndex * target->wordSize);
1013	}
1014	}
1015
1016	// Unlike the non-lazy binding section, the bind opcodes in this section aren't
1017	// interpreted all at once. Rather, dyld will start interpreting opcodes at a
1018	// given offset, typically only binding a single symbol before it finds a
1019	// BIND_OPCODE_DONE terminator. As such, unlike in the non-lazy-binding case,
1020	// we cannot encode just the differences between symbols; we have to emit the
1021	// complete bind information for each symbol.
1022	uint32_t LazyBindingSection::encode(const Symbol &sym) {
1023	uint32_t opstreamOffset = contents.size();
1024	OutputSegment *dataSeg = in.lazyPointers->parent;
1025	os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB \|
1026	dataSeg->index);
1027	uint64_t offset =
1028	in.lazyPointers->addr - dataSeg->addr + sym.stubsIndex * target->wordSize;
1029	encodeULEB128(Value: offset, OS&: os);
1030	encodeDylibOrdinal(ordinal: ordinalForSymbol(sym), os);
1031
1032	uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
1033	if (sym.isWeakRef())
1034	flags \|= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
1035
1036	os << flags << sym.getName() << `'\0'`
1037	<< static_cast<uint8_t>(BIND_OPCODE_DO_BIND)
1038	<< static_cast<uint8_t>(BIND_OPCODE_DONE);
1039	return opstreamOffset;
1040	}
1041
1042	ExportSection::ExportSection()
1043	: LinkEditSection (segment_names::linkEdit, section_names::export_) {}
1044
1045	void ExportSection::finalizeContents() {
1046	trieBuilder.setImageBase(in.header->addr);
1047	for (const Symbol *sym : symtab ->getSymbols()) {
1048	if (const auto *defined = dyn_cast<Defined>(Val: sym)) {
1049	if (defined->privateExtern \|\| !defined->isLive())
1050	continue;
1051	trieBuilder.addSymbol(sym: *defined);
1052	hasWeakSymbol = hasWeakSymbol \|\| sym->isWeakDef();
1053	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1054	if (dysym->shouldReexport)
1055	trieBuilder.addSymbol(sym: *dysym);
1056	}
1057	}
1058	size = trieBuilder.build();
1059	}
1060
1061	void ExportSection::writeTo(uint8_t buf) const* { trieBuilder.writeTo(buf); }
1062
1063	DataInCodeSection::DataInCodeSection()
1064	: LinkEditSection (segment_names::linkEdit, section_names::dataInCode) {}
1065
1066	template <class LP>
1067	static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
1068	std::vector<MachO::data_in_code_entry> dataInCodeEntries;
1069	for (const InputFile *inputFile : inputFiles) {
1070	if (!isa<ObjFile>(Val: inputFile))
1071	continue;
1072	const ObjFile *objFile = cast<ObjFile>(Val: inputFile);
1073	ArrayRef<MachO::data_in_code_entry> entries = objFile->getDataInCode();
1074	if (entries.empty())
1075	continue;
1076
1077	std::vector<MachO::data_in_code_entry> sortedEntries;
1078	sortedEntries.assign(first: entries.begin(), last: entries.end());
1079	llvm::sort(sortedEntries, [](const data_in_code_entry &lhs,
1080	const data_in_code_entry &rhs) {
1081	return lhs.offset < rhs.offset;
1082	});
1083
1084	// For each code subsection find 'data in code' entries residing in it.
1085	// Compute the new offset values as
1086	// <offset within subsection> + <subsection address> - <__TEXT address>.
1087	for (const Section *section : objFile->sections) {
1088	for (const Subsection &subsec : section->subsections) {
1089	const InputSection *isec = subsec.isec;
1090	if (!isCodeSection(isec))
1091	continue;
1092	if (cast<ConcatInputSection>(Val: isec)->shouldOmitFromOutput())
1093	continue;
1094	const uint64_t beginAddr = section->addr + subsec.offset;
1095	auto it = llvm::lower_bound(
1096	sortedEntries, beginAddr,
1097	[](const MachO::data_in_code_entry &entry, uint64_t addr) {
1098	return entry.offset < addr;
1099	});
1100	const uint64_t endAddr = beginAddr + isec->getSize();
1101	for (const auto end = sortedEntries.end();
1102	it != end && it->offset + it->length <= endAddr; ++it)
1103	dataInCodeEntries.push_back(
1104	{static_cast<uint32_t>(isec->getVA(off: it->offset - beginAddr) -
1105	in.header->addr),
1106	it->length, it->kind});
1107	}
1108	}
1109	}
1110
1111	// ld64 emits the table in sorted order too.
1112	llvm::sort(dataInCodeEntries,
1113	[](const data_in_code_entry &lhs, const data_in_code_entry &rhs) {
1114	return lhs.offset < rhs.offset;
1115	});
1116	return dataInCodeEntries;
1117	}
1118
1119	void DataInCodeSection::finalizeContents() {
1120	entries = target->wordSize == `8` ? collectDataInCodeEntries<LP64>()
1121	: collectDataInCodeEntries<ILP32>();
1122	}
1123
1124	void DataInCodeSection::writeTo(uint8_t buf) const* {
1125	if (!entries.empty())
1126	memcpy(dest: buf, src: entries.data(), n: getRawSize());
1127	}
1128
1129	FunctionStartsSection::FunctionStartsSection()
1130	: LinkEditSection (segment_names::linkEdit, section_names::functionStarts) {}
1131
1132	void FunctionStartsSection::finalizeContents() {
1133	raw_svector_ostream os{contents};
1134	std::vector<uint64_t> addrs;
1135	for (const InputFile *file : inputFiles) {
1136	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1137	for (const Symbol *sym : objFile->symbols) {
1138	if (const auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1139	if (!defined->isec() \|\| !isCodeSection(defined->isec()) \|\|
1140	!defined->isLive())
1141	continue;
1142	addrs.push_back(x: defined->getVA());
1143	}
1144	}
1145	}
1146	}
1147	llvm::sort(C&: addrs);
1148	uint64_t addr = in.header->addr;
1149	for (uint64_t nextAddr : addrs) {
1150	uint64_t delta = nextAddr - addr;
1151	if (delta == `0`)
1152	continue;
1153	encodeULEB128(Value: delta, OS&: os);
1154	addr = nextAddr;
1155	}
1156	os << `'\0'`;
1157	}
1158
1159	void FunctionStartsSection::writeTo(uint8_t buf) const* {
1160	memcpy(dest: buf, src: contents.data(), n: contents.size());
1161	}
1162
1163	SymtabSection::SymtabSection(StringTableSection &stringTableSection)
1164	: LinkEditSection (segment_names::linkEdit, section_names::symbolTable),
1165	stringTableSection(stringTableSection) {}
1166
1167	void SymtabSection::emitBeginSourceStab(StringRef sourceFile) {
1168	StabsEntry stab(N_SO);
1169	stab.strx = stringTableSection.addString(saver().save(S: sourceFile));
1170	stabs.emplace_back(args: std::move(stab));
1171	}
1172
1173	void SymtabSection::emitEndSourceStab() {
1174	StabsEntry stab(N_SO);
1175	stab.sect = `1`;
1176	stabs.emplace_back(args: std::move(stab));
1177	}
1178
1179	void SymtabSection::emitObjectFileStab(ObjFile *file) {
1180	StabsEntry stab(N_OSO);
1181	stab.sect = target->cpuSubtype;
1182	SmallString<`261`> path(!file->archiveName.empty() ? file->archiveName
1183	: file->getName());
1184	std::error_code ec = sys::fs::make_absolute(path);
1185	if (ec)
1186	fatal(msg: "failed to get absolute path for " + path);
1187
1188	if (!file->archiveName.empty())
1189	path.append(Refs: {"(", file->getName(), ")"});
1190
1191	StringRef adjustedPath = saver().save(S: path.str());
1192	adjustedPath.consume_front(Prefix: config ->osoPrefix);
1193
1194	stab.strx = stringTableSection.addString(adjustedPath);
1195	stab.desc = `1`;
1196	stab.value = file->modTime;
1197	stabs.emplace_back(args: std::move(stab));
1198	}
1199
1200	void SymtabSection::emitEndFunStab(Defined *defined) {
1201	StabsEntry stab(N_FUN);
1202	stab.value = defined->size;
1203	stabs.emplace_back(args: std::move(stab));
1204	}
1205
1206	void SymtabSection::emitStabs() {
1207	if (config ->omitDebugInfo)
1208	return;
1209
1210	for (const std::string &s : config ->astPaths) {
1211	StabsEntry astStab(N_AST);
1212	astStab.strx = stringTableSection.addString(s);
1213	stabs.emplace_back(args: std::move(astStab));
1214	}
1215
1216	// Cache the file ID for each symbol in an std::pair for faster sorting.
1217	using SortingPair = std::pair<Defined , int*>;
1218	std::vector<SortingPair> symbolsNeedingStabs;
1219	for (const SymtabEntry &entry :
1220	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols)) {
1221	Symbol *sym = entry.sym;
1222	assert(sym->isLive() &&
1223	"dead symbols should not be in localSymbols, externalSymbols");
1224	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1225	// Excluded symbols should have been filtered out in finalizeContents().
1226	assert(defined->includeInSymtab);
1227
1228	if (defined->isAbsolute())
1229	continue;
1230
1231	// Constant-folded symbols go in the executable's symbol table, but don't
1232	// get a stabs entry unless --keep-icf-stabs flag is specified.
1233	if (!config ->keepICFStabs &&
1234	defined->identicalCodeFoldingKind != Symbol::ICFFoldKind::None)
1235	continue;
1236
1237	ObjFile *file = defined->getObjectFile();
1238	if (!file \|\| !file->compileUnit)
1239	continue;
1240
1241	// We use the symbol's original InputSection to get the file id,
1242	// even for ICF folded symbols, to ensure STABS entries point to the
1243	// correct object file where the symbol was originally defined
1244	symbolsNeedingStabs.emplace_back(args&: defined,
1245	args: defined->originalIsec->getFile()->id);
1246	}
1247	}
1248
1249	llvm::stable_sort(Range&: symbolsNeedingStabs, C: llvm::less_second ());
1250
1251	// Emit STABS symbols so that dsymutil and/or the debugger can map address
1252	// regions in the final binary to the source and object files from which they
1253	// originated.
1254	InputFile lastFile = nullptr*;
1255	for (SortingPair &pair : symbolsNeedingStabs) {
1256	Defined *defined = pair.first;
1257	// When emitting STABS entries for a symbol, always use the original
1258	// InputSection of the defined symbol, not the section of the function body
1259	// (which might be a different function entirely if ICF folded this
1260	// function). This ensures STABS entries point back to the original object
1261	// file.
1262	InputSection *isec = defined->originalIsec;
1263	ObjFile *file = cast<ObjFile>(Val: isec->getFile());
1264
1265	if (lastFile == nullptr \|\| lastFile != file) {
1266	if (lastFile != nullptr)
1267	emitEndSourceStab();
1268	lastFile = file;
1269
1270	emitBeginSourceStab(sourceFile: file->sourceFile());
1271	emitObjectFileStab(file);
1272	}
1273
1274	StabsEntry symStab;
1275	symStab.sect = isec->parent->index;
1276	symStab.strx = stringTableSection.addString(defined->getName());
1277
1278	// When using --keep-icf-stabs, we need to use the VA of the actual function
1279	// body that the linker will place in the binary. This is the function that
1280	// the symbol refers to after ICF folding.
1281	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1282	// For thunks, we need to get the function they point to
1283	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1284	symStab.value = target->getVA();
1285	} else {
1286	symStab.value = defined->getVA();
1287	}
1288
1289	if (isCodeSection(isec)) {
1290	symStab.type = N_FUN;
1291	stabs.emplace_back(args: std::move(symStab));
1292	// For the end function marker in STABS, we need to use the size of the
1293	// actual function body that exists in the output binary
1294	if (defined->identicalCodeFoldingKind == Symbol::ICFFoldKind::Thunk) {
1295	// For thunks, we use the target's size
1296	Defined *target = getBodyForThunkFoldedSym(foldedSym: defined);
1297	emitEndFunStab(defined: target);
1298	} else {
1299	emitEndFunStab(defined);
1300	}
1301	} else {
1302	symStab.type = defined->isExternal() ? N_GSYM : N_STSYM;
1303	stabs.emplace_back(args: std::move(symStab));
1304	}
1305	}
1306
1307	if (!stabs.empty())
1308	emitEndSourceStab();
1309	}
1310
1311	void SymtabSection::finalizeContents() {
1312	auto addSymbol = [&](std::vector<SymtabEntry> &symbols, Symbol *sym) {
1313	uint32_t strx = stringTableSection.addString(sym->getName());
1314	symbols.push_back(x: {.sym: sym, .strx: strx});
1315	};
1316
1317	std::function<void(Symbol *)> localSymbolsHandler;
1318	switch (config ->localSymbolsPresence) {
1319	case SymtabPresence::All:
1320	localSymbolsHandler = [&](Symbol *sym) { addSymbol (localSymbols, sym); };
1321	break;
1322	case SymtabPresence::None:
1323	localSymbolsHandler = [&](Symbol ) { /* Do nothing/ };
1324	break;
1325	case SymtabPresence::SelectivelyIncluded:
1326	localSymbolsHandler = [&](Symbol *sym) {
1327	if (config ->localSymbolPatterns.match(symbolName: sym->getName()))
1328	addSymbol (localSymbols, sym);
1329	};
1330	break;
1331	case SymtabPresence::SelectivelyExcluded:
1332	localSymbolsHandler = [&](Symbol *sym) {
1333	if (!config ->localSymbolPatterns.match(symbolName: sym->getName()))
1334	addSymbol (localSymbols, sym);
1335	};
1336	break;
1337	}
1338
1339	// Local symbols aren't in the SymbolTable, so we walk the list of object
1340	// files to gather them.
1341	// But if `-x` is set, then we don't need to. localSymbolsHandler() will do
1342	// the right thing regardless, but this check is a perf optimization because
1343	// iterating through all the input files and their symbols is expensive.
1344	if (config ->localSymbolsPresence != SymtabPresence::None) {
1345	for (const InputFile *file : inputFiles) {
1346	if (auto *objFile = dyn_cast<ObjFile>(Val: file)) {
1347	for (Symbol *sym : objFile->symbols) {
1348	if (auto *defined = dyn_cast_or_null<Defined>(Val: sym)) {
1349	if (defined->isExternal() \|\| !defined->isLive() \|\|
1350	!defined->includeInSymtab)
1351	continue;
1352	localSymbolsHandler (sym);
1353	}
1354	}
1355	}
1356	}
1357	}
1358
1359	// __dyld_private is a local symbol too. It's linker-created and doesn't
1360	// exist in any object file.
1361	if (in.stubHelper && in.stubHelper->dyldPrivate)
1362	localSymbolsHandler (in.stubHelper->dyldPrivate);
1363
1364	for (Symbol *sym : symtab ->getSymbols()) {
1365	if (!sym->isLive())
1366	continue;
1367	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
1368	if (!defined->includeInSymtab)
1369	continue;
1370	assert(defined->isExternal());
1371	if (defined->privateExtern)
1372	localSymbolsHandler (defined);
1373	else
1374	addSymbol (externalSymbols, defined);
1375	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: sym)) {
1376	if (dysym->isReferenced())
1377	addSymbol (undefinedSymbols, sym);
1378	}
1379	}
1380
1381	emitStabs();
1382	uint32_t symtabIndex = stabs.size();
1383	for (const SymtabEntry &entry :
1384	concat<SymtabEntry>(Ranges&: localSymbols, Ranges&: externalSymbols, Ranges&: undefinedSymbols)) {
1385	entry.sym->symtabIndex = symtabIndex++;
1386	}
1387	}
1388
1389	uint32_t SymtabSection::getNumSymbols() const {
1390	return stabs.size() + localSymbols.size() + externalSymbols.size() +
1391	undefinedSymbols.size();
1392	}
1393
1394	// This serves to hide (type-erase) the template parameter from SymtabSection.
1395	template <class LP> class SymtabSectionImpl final : public SymtabSection {
1396	public:
1397	SymtabSectionImpl(StringTableSection &stringTableSection)
1398	: SymtabSection(stringTableSection) {}
1399	uint64_t getRawSize() const override;
1400	void writeTo(uint8_t buf) const* override;
1401	};
1402
1403	template <class LP> uint64_t SymtabSectionImpl<LP>::getRawSize() const {
1404	return getNumSymbols() * sizeof(typename LP::nlist);
1405	}
1406
1407	template <class LP> void SymtabSectionImpl<LP>::writeTo(uint8_t buf) const* {
1408	auto nList = reinterpret_cast<typename* LP::nlist *>(buf);
1409	// Emit the stabs entries before the "real" symbols. We cannot emit them
1410	// after as that would render Symbol::symtabIndex inaccurate.
1411	for (const StabsEntry &entry : stabs) {
1412	nList->n_strx = entry.strx;
1413	nList->n_type = entry.type;
1414	nList->n_sect = entry.sect;
1415	nList->n_desc = entry.desc;
1416	nList->n_value = entry.value;
1417	++nList;
1418	}
1419
1420	for (const SymtabEntry &entry : concat<const SymtabEntry>(
1421	localSymbols, externalSymbols, undefinedSymbols)) {
1422	nList->n_strx = entry.strx;
1423	// TODO populate n_desc with more flags
1424	if (auto *defined = dyn_cast<Defined>(Val: entry.sym)) {
1425	uint8_t scope = `0`;
1426	if (defined->privateExtern) {
1427	// Private external -- dylib scoped symbol.
1428	// Promote to non-external at link time.
1429	scope = N_PEXT;
1430	} else if (defined->isExternal()) {
1431	// Normal global symbol.
1432	scope = N_EXT;
1433	} else {
1434	// TU-local symbol from localSymbols.
1435	scope = `0`;
1436	}
1437
1438	if (defined->isAbsolute()) {
1439	nList->n_type = scope \| N_ABS;
1440	nList->n_sect = NO_SECT;
1441	nList->n_value = defined->value;
1442	} else {
1443	nList->n_type = scope \| N_SECT;
1444	nList->n_sect = defined->isec()->parent->index;
1445	// For the N_SECT symbol type, n_value is the address of the symbol
1446	nList->n_value = defined->getVA();
1447	}
1448	nList->n_desc \|= defined->isExternalWeakDef() ? N_WEAK_DEF : `0`;
1449	nList->n_desc \|=
1450	defined->referencedDynamically ? REFERENCED_DYNAMICALLY : `0`;
1451	} else if (auto *dysym = dyn_cast<DylibSymbol>(Val: entry.sym)) {
1452	uint16_t n_desc = nList->n_desc;
1453	int16_t ordinal = ordinalForDylibSymbol(dysym: *dysym);
1454	if (ordinal == BIND_SPECIAL_DYLIB_FLAT_LOOKUP)
1455	SET_LIBRARY_ORDINAL(n_desc, ordinal: DYNAMIC_LOOKUP_ORDINAL);
1456	else if (ordinal == BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE)
1457	SET_LIBRARY_ORDINAL(n_desc, ordinal: EXECUTABLE_ORDINAL);
1458	else {
1459	assert(ordinal > `0`);
1460	SET_LIBRARY_ORDINAL(n_desc, ordinal: static_cast<uint8_t>(ordinal));
1461	}
1462
1463	nList->n_type = N_EXT;
1464	n_desc \|= dysym->isWeakDef() ? N_WEAK_DEF : `0`;
1465	n_desc \|= dysym->isWeakRef() ? N_WEAK_REF : `0`;
1466	nList->n_desc = n_desc;
1467	}
1468	++nList;
1469	}
1470	}
1471
1472	template <class LP>
1473	SymtabSection *
1474	macho::makeSymtabSection(StringTableSection &stringTableSection) {
1475	return make<SymtabSectionImpl<LP>>(stringTableSection);
1476	}
1477
1478	IndirectSymtabSection::IndirectSymtabSection()
1479	: LinkEditSection (segment_names::linkEdit,
1480	section_names::indirectSymbolTable) {}
1481
1482	uint32_t IndirectSymtabSection::getNumSymbols() const {
1483	uint32_t size = in.got->getEntries().size() +
1484	in.tlvPointers->getEntries().size() +
1485	in.stubs->getEntries().size();
1486	if (!config ->emitChainedFixups)
1487	size += in.stubs->getEntries().size();
1488	return size;
1489	}
1490
1491	bool IndirectSymtabSection::isNeeded() const {
1492	return in.got->isNeeded() \|\| in.tlvPointers->isNeeded() \|\|
1493	in.stubs->isNeeded();
1494	}
1495
1496	void IndirectSymtabSection::finalizeContents() {
1497	uint32_t off = `0`;
1498	in.got->reserved1 = off;
1499	off += in.got->getEntries().size();
1500	in.tlvPointers->reserved1 = off;
1501	off += in.tlvPointers->getEntries().size();
1502	in.stubs->reserved1 = off;
1503	if (in.lazyPointers) {
1504	off += in.stubs->getEntries().size();
1505	in.lazyPointers->reserved1 = off;
1506	}
1507	}
1508
1509	static uint32_t indirectValue(const Symbol *sym) {
1510	if (sym->symtabIndex == UINT32_MAX \|\| !needsBinding(sym))
1511	return INDIRECT_SYMBOL_LOCAL;
1512	return sym->symtabIndex;
1513	}
1514
1515	void IndirectSymtabSection::writeTo(uint8_t buf) const* {
1516	uint32_t off = `0`;
1517	for (const Symbol *sym : in.got->getEntries()) {
1518	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1519	++off;
1520	}
1521	for (const Symbol *sym : in.tlvPointers->getEntries()) {
1522	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1523	++off;
1524	}
1525	for (const Symbol *sym : in.stubs->getEntries()) {
1526	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1527	++off;
1528	}
1529
1530	if (in.lazyPointers) {
1531	// There is a 1:1 correspondence between stubs and LazyPointerSection
1532	// entries. But giving __stubs and __la_symbol_ptr the same reserved1
1533	// (the offset into the indirect symbol table) so that they both refer
1534	// to the same range of offsets confuses `strip`, so write the stubs
1535	// symbol table offsets a second time.
1536	for (const Symbol *sym : in.stubs->getEntries()) {
1537	write32le(P: buf + off * sizeof(uint32_t), V: indirectValue(sym));
1538	++off;
1539	}
1540	}
1541	}
1542
1543	StringTableSection::StringTableSection()
1544	: LinkEditSection (segment_names::linkEdit, section_names::stringTable) {}
1545
1546	uint32_t StringTableSection::addString(StringRef str) {
1547	uint32_t strx = size;
1548	if (config ->dedupSymbolStrings) {
1549	llvm::CachedHashStringRef hashedStr(str);
1550	auto [it, inserted] = stringMap.try_emplace(Key: hashedStr, Args&: strx);
1551	if (!inserted)
1552	return it ->second;
1553	}
1554
1555	strings.push_back(x: str);
1556	size += str.size() + `1`; // account for null terminator
1557	return strx;
1558	}
1559
1560	void StringTableSection::writeTo(uint8_t buf) const* {
1561	uint32_t off = `0`;
1562	for (StringRef str : strings) {
1563	memcpy(dest: buf + off, src: str.data(), n: str.size());
1564	off += str.size() + `1`; // account for null terminator
1565	}
1566	}
1567
1568	static_assert((CodeSignatureSection::blobHeadersSize % `8`) == `0`);
1569	static_assert((CodeSignatureSection::fixedHeadersSize % `8`) == `0`);
1570
1571	CodeSignatureSection::CodeSignatureSection()
1572	: LinkEditSection (segment_names::linkEdit, section_names::codeSignature) {
1573	align = `16`; // required by libstuff
1574
1575	// XXX: This mimics LD64, where it uses the install-name as codesign
1576	// identifier, if available.
1577	if (!config ->installName.empty())
1578	fileName = config ->installName;
1579	else
1580	// FIXME: Consider using finalOutput instead of outputFile.
1581	fileName = config ->outputFile;
1582
1583	size_t slashIndex = fileName.rfind(Str: "/");
1584	if (slashIndex != std::string::npos)
1585	fileName = fileName.drop_front(N: slashIndex + `1`);
1586
1587	// NOTE: Any changes to these calculations should be repeated
1588	// in llvm-objcopy's MachOLayoutBuilder::layoutTail.
1589	allHeadersSize = alignTo<`16`>(Value: fixedHeadersSize + fileName.size() + `1`);
1590	fileNamePad = allHeadersSize - fixedHeadersSize - fileName.size();
1591	}
1592
1593	uint32_t CodeSignatureSection::getBlockCount() const {
1594	return (fileOff + blockSize - `1`) / blockSize;
1595	}
1596
1597	uint64_t CodeSignatureSection::getRawSize() const {
1598	return allHeadersSize + getBlockCount() * hashSize;
1599	}
1600
1601	void CodeSignatureSection::writeHashes(uint8_t buf) const* {
1602	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1603	// MachOWriter::writeSignatureData.
1604	uint8_t *hashes = buf + fileOff + allHeadersSize;
1605	parallelFor(Begin: `0`, End: getBlockCount(), Fn: [&](size_t i) {
1606	sha256(data: buf + i * blockSize,
1607	len: std::min(a: static_cast<size_t>(fileOff - i * blockSize), b: blockSize),
1608	output: hashes + i * hashSize);
1609	});
1610	#if defined(__APPLE__)
1611	// This is macOS-specific work-around and makes no sense for any
1612	// other host OS. See https://openradar.appspot.com/FB8914231
1613	//
1614	// The macOS kernel maintains a signature-verification cache to
1615	// quickly validate applications at time of execve(2). The trouble
1616	// is that for the kernel creates the cache entry at the time of the
1617	// mmap(2) call, before we have a chance to write either the code to
1618	// sign or the signature header+hashes. The fix is to invalidate
1619	// all cached data associated with the output file, thus discarding
1620	// the bogus prematurely-cached signature.
1621	msync(buf, fileOff + getSize(), MS_INVALIDATE);
1622	#endif
1623	}
1624
1625	void CodeSignatureSection::writeTo(uint8_t buf) const* {
1626	// NOTE: Changes to this functionality should be repeated in llvm-objcopy's
1627	// MachOWriter::writeSignatureData.
1628	uint32_t signatureSize = static_cast<uint32_t>(getSize());
1629	auto superBlob = reinterpret_cast<CS_SuperBlob >(buf);
1630	write32be(P: &superBlob->magic, V: CSMAGIC_EMBEDDED_SIGNATURE);
1631	write32be(P: &superBlob->length, V: signatureSize);
1632	write32be(P: &superBlob->count, V: `1`);
1633	auto blobIndex = reinterpret_cast<CS_BlobIndex >(&superBlob[`1`]);
1634	write32be(P: &blobIndex->type, V: CSSLOT_CODEDIRECTORY);
1635	write32be(P: &blobIndex->offset, V: blobHeadersSize);
1636	auto *codeDirectory =
1637	reinterpret_cast<CS_CodeDirectory *>(buf + blobHeadersSize);
1638	write32be(P: &codeDirectory->magic, V: CSMAGIC_CODEDIRECTORY);
1639	write32be(P: &codeDirectory->length, V: signatureSize - blobHeadersSize);
1640	write32be(P: &codeDirectory->version, V: CS_SUPPORTSEXECSEG);
1641	write32be(P: &codeDirectory->flags, V: CS_ADHOC \| CS_LINKER_SIGNED);
1642	write32be(P: &codeDirectory->hashOffset,
1643	V: sizeof(CS_CodeDirectory) + fileName.size() + fileNamePad);
1644	write32be(P: &codeDirectory->identOffset, V: sizeof(CS_CodeDirectory));
1645	codeDirectory->nSpecialSlots = `0`;
1646	write32be(P: &codeDirectory->nCodeSlots, V: getBlockCount());
1647	write32be(P: &codeDirectory->codeLimit, V: fileOff);
1648	codeDirectory->hashSize = static_cast<uint8_t>(hashSize);
1649	codeDirectory->hashType = kSecCodeSignatureHashSHA256;
1650	codeDirectory->platform = `0`;
1651	codeDirectory->pageSize = blockSizeShift;
1652	codeDirectory->spare2 = `0`;
1653	codeDirectory->scatterOffset = `0`;
1654	codeDirectory->teamOffset = `0`;
1655	codeDirectory->spare3 = `0`;
1656	codeDirectory->codeLimit64 = `0`;
1657	OutputSegment *textSeg = getOrCreateOutputSegment(name: segment_names::text);
1658	write64be(P: &codeDirectory->execSegBase, V: textSeg->fileOff);
1659	write64be(P: &codeDirectory->execSegLimit, V: textSeg->fileSize);
1660	write64be(P: &codeDirectory->execSegFlags,
1661	V: config ->outputType == MH_EXECUTE ? CS_EXECSEG_MAIN_BINARY : `0`);
1662	auto id = reinterpret_cast<char* *>(&codeDirectory[`1`]);
1663	memcpy(dest: id, src: fileName.begin(), n: fileName.size());
1664	memset(s: id + fileName.size(), c: `0`, n: fileNamePad);
1665	}
1666
1667	CStringSection::CStringSection(const char *name)
1668	: SyntheticSection (segment_names::text, name) {
1669	flags = S_CSTRING_LITERALS;
1670	}
1671
1672	void CStringSection::addInput(CStringInputSection *isec) {
1673	isec->parent = this;
1674	inputs.push_back(x: isec);
1675	if (isec->align > align)
1676	align = isec->align;
1677	}
1678
1679	void CStringSection::writeTo(uint8_t buf) const* {
1680	for (const CStringInputSection *isec : inputs) {
1681	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1682	if (!piece.live)
1683	continue;
1684	StringRef string = isec->getStringRef(i);
1685	memcpy(dest: buf + piece.outSecOff, src: string.data(), n: string.size());
1686	}
1687	}
1688	}
1689
1690	void CStringSection::finalizeContents() {
1691	uint64_t offset = `0`;
1692	for (CStringInputSection *isec : inputs) {
1693	for (const auto &[i, piece] : llvm::enumerate(First&: isec->pieces)) {
1694	if (!piece.live)
1695	continue;
1696	// See comment above DeduplicatedCStringSection for how alignment is
1697	// handled.
1698	uint32_t pieceAlign = `1`
1699	<< llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1700	offset = alignToPowerOf2(Value: offset, Align: pieceAlign);
1701	piece.outSecOff = offset;
1702	isec->isFinal = true;
1703	StringRef string = isec->getStringRef(i);
1704	offset += string.size() + `1`; // account for null terminator
1705	}
1706	}
1707	size = offset;
1708	}
1709
1710	// Mergeable cstring literals are found under the __TEXT,__cstring section. In
1711	// contrast to ELF, which puts strings that need different alignments into
1712	// different sections, clang's Mach-O backend puts them all in one section.
1713	// Strings that need to be aligned have the .p2align directive emitted before
1714	// them, which simply translates into zero padding in the object file. In other
1715	// words, we have to infer the desired alignment of these cstrings from their
1716	// addresses.
1717	//
1718	// We differ slightly from ld64 in how we've chosen to align these cstrings.
1719	// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
1720	// address in the input object files. When deduplicating identical cstrings,
1721	// both linkers pick the cstring whose address has more trailing zeros, and
1722	// preserve the alignment of that address in the final binary. However, ld64
1723	// goes a step further and also preserves the offset of the cstring from the
1724	// last section-aligned address. I.e. if a cstring is at offset 18 in the
1725	// input, with a section alignment of 16, then both LLD and ld64 will ensure the
1726	// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
1727	// ensure that the final address is of the form 16 k + 2 for some k.*
1728	//
1729	// Note that ld64's heuristic means that a dedup'ed cstring's final address is
1730	// dependent on the order of the input object files. E.g. if in addition to the
1731	// cstring at offset 18 above, we have a duplicate one in another file with a
1732	// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
1733	// the cstring from the object file earlier on the command line (since both have
1734	// the same number of trailing zeros in their address). So the final cstring may
1735	// either be at some address `16 k + 2` or at some address `2 * k`.*
1736	//
1737	// I've opted not to follow this behavior primarily for implementation
1738	// simplicity, and secondarily to save a few more bytes. It's not clear to me
1739	// that preserving the section alignment + offset is ever necessary, and there
1740	// are many cases that are clearly redundant. In particular, if an x86_64 object
1741	// file contains some strings that are accessed via SIMD instructions, then the
1742	// .cstring section in the object file will be 16-byte-aligned (since SIMD
1743	// requires its operand addresses to be 16-byte aligned). However, there will
1744	// typically also be other cstrings in the same file that aren't used via SIMD
1745	// and don't need this alignment. They will be emitted at some arbitrary address
1746	// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
1747	// % A`.
1748	void DeduplicatedCStringSection::finalizeContents() {
1749	// Find the largest alignment required for each string.
1750	for (const CStringInputSection *isec : inputs) {
1751	for (const auto &[i, piece] : llvm::enumerate(First: isec->pieces)) {
1752	if (!piece.live)
1753	continue;
1754	auto s = isec->getCachedHashStringRef(i);
1755	assert(isec->align != `0`);
1756	uint8_t trailingZeros = llvm::countr_zero(Val: isec->align \| piece.inSecOff);
1757	auto it = stringOffsetMap.insert(
1758	KV: std::make_pair(x&: s, y: StringOffset (trailingZeros)));
1759	if (!it.second && it.first ->second.trailingZeros < trailingZeros)
1760	it.first ->second.trailingZeros = trailingZeros;
1761	}
1762	}
1763
1764	// Sort the strings for performance and compression size win, and then
1765	// assign an offset for each string and save it to the corresponding
1766	// StringPieces for easy access.
1767	for (auto &[isec, i] : priorityBuilder.buildCStringPriorities(inputs)) {
1768	auto &piece = isec->pieces [i];
1769	auto s = isec->getCachedHashStringRef(i);
1770	auto it = stringOffsetMap.find(Val: s);
1771	assert(it != stringOffsetMap.end());
1772	lld::macho::DeduplicatedCStringSection::StringOffset &offsetInfo =
1773	it ->second;
1774	if (offsetInfo.outSecOff == UINT64_MAX) {
1775	offsetInfo.outSecOff =
1776	alignToPowerOf2(Value: size, Align: `1ULL` << offsetInfo.trailingZeros);
1777	size = offsetInfo.outSecOff + s.size() + `1`; // account for null terminator
1778	}
1779	piece.outSecOff = offsetInfo.outSecOff;
1780	}
1781	for (CStringInputSection *isec : inputs)
1782	isec->isFinal = true;
1783	}
1784
1785	void DeduplicatedCStringSection::writeTo(uint8_t buf) const* {
1786	for (const auto &p : stringOffsetMap) {
1787	StringRef data = p.first.val();
1788	uint64_t off = p.second.outSecOff;
1789	if (!data.empty())
1790	memcpy(dest: buf + off, src: data.data(), n: data.size());
1791	}
1792	}
1793
1794	DeduplicatedCStringSection::StringOffset
1795	DeduplicatedCStringSection::getStringOffset(StringRef str) const {
1796	// StringPiece uses 31 bits to store the hashes, so we replicate that
1797	uint32_t hash = xxh3_64bits(data: str) & `0x7fffffff`;
1798	auto offset = stringOffsetMap.find(Val: CachedHashStringRef (str, hash));
1799	assert(offset != stringOffsetMap.end() &&
1800	"Looked-up strings should always exist in section");
1801	return offset ->second;
1802	}
1803
1804	// This section is actually emitted as __TEXT,__const by ld64, but clang may
1805	// emit input sections of that name, and LLD doesn't currently support mixing
1806	// synthetic and concat-type OutputSections. To work around this, I've given
1807	// our merged-literals section a different name.
1808	WordLiteralSection::WordLiteralSection()
1809	: SyntheticSection (segment_names::text, section_names::literals) {
1810	align = `16`;
1811	}
1812
1813	void WordLiteralSection::addInput(WordLiteralInputSection *isec) {
1814	isec->parent = this;
1815	inputs.push_back(x: isec);
1816	}
1817
1818	void WordLiteralSection::finalizeContents() {
1819	for (WordLiteralInputSection *isec : inputs) {
1820	// We do all processing of the InputSection here, so it will be effectively
1821	// finalized.
1822	isec->isFinal = true;
1823	const uint8_t *buf = isec->data.data();
1824	switch (sectionType(flags: isec->getFlags())) {
1825	case S_4BYTE_LITERALS: {
1826	for (size_t off = `0`, e = isec->data.size(); off < e; off += `4`) {
1827	if (!isec->isLive(off))
1828	continue;
1829	uint32_t value = *reinterpret_cast<const uint32_t *>(buf + off);
1830	literal4Map.emplace(args&: value, args: literal4Map.size());
1831	}
1832	break;
1833	}
1834	case S_8BYTE_LITERALS: {
1835	for (size_t off = `0`, e = isec->data.size(); off < e; off += `8`) {
1836	if (!isec->isLive(off))
1837	continue;
1838	uint64_t value = *reinterpret_cast<const uint64_t *>(buf + off);
1839	literal8Map.emplace(args&: value, args: literal8Map.size());
1840	}
1841	break;
1842	}
1843	case S_16BYTE_LITERALS: {
1844	for (size_t off = `0`, e = isec->data.size(); off < e; off += `16`) {
1845	if (!isec->isLive(off))
1846	continue;
1847	UInt128 value = *reinterpret_cast<const UInt128 *>(buf + off);
1848	literal16Map.emplace(args&: value, args: literal16Map.size());
1849	}
1850	break;
1851	}
1852	default:
1853	llvm_unreachable("invalid literal section type");
1854	}
1855	}
1856	}
1857
1858	void WordLiteralSection::writeTo(uint8_t buf) const* {
1859	// Note that we don't attempt to do any endianness conversion in addInput(),
1860	// so we don't do it here either -- just write out the original value,
1861	// byte-for-byte.
1862	for (const auto &p : literal16Map)
1863	memcpy(dest: buf + p.second * `16`, src: &p.first, n: `16`);
1864	buf += literal16Map.size() * `16`;
1865
1866	for (const auto &p : literal8Map)
1867	memcpy(dest: buf + p.second * `8`, src: &p.first, n: `8`);
1868	buf += literal8Map.size() * `8`;
1869
1870	for (const auto &p : literal4Map)
1871	memcpy(dest: buf + p.second * `4`, src: &p.first, n: `4`);
1872	}
1873
1874	ObjCImageInfoSection::ObjCImageInfoSection()
1875	: SyntheticSection (segment_names::data, section_names::objCImageInfo) {}
1876
1877	ObjCImageInfoSection::ImageInfo
1878	ObjCImageInfoSection::parseImageInfo(const InputFile *file) {
1879	ImageInfo info;
1880	ArrayRef<uint8_t> data = file->objCImageInfo;
1881	// The image info struct has the following layout:
1882	// struct {
1883	// uint32_t version;
1884	// uint32_t flags;
1885	// };
1886	if (data.size() < `8`) {
1887	warn(msg: toString(file) + ": invalid __objc_imageinfo size");
1888	return info;
1889	}
1890
1891	auto buf = reinterpret_cast<const* uint32_t *>(data.data());
1892	if (read32le(P: buf) != `0`) {
1893	warn(msg: toString(file) + ": invalid __objc_imageinfo version");
1894	return info;
1895	}
1896
1897	uint32_t flags = read32le(P: buf + `1`);
1898	info.swiftVersion = (flags >> `8`) & `0xff`;
1899	info.hasCategoryClassProperties = flags & `0x40`;
1900	return info;
1901	}
1902
1903	static std::string swiftVersionString(uint8_t version) {
1904	switch (version) {
1905	case `1`:
1906	return "1.0";
1907	case `2`:
1908	return "1.1";
1909	case `3`:
1910	return "2.0";
1911	case `4`:
1912	return "3.0";
1913	case `5`:
1914	return "4.0";
1915	default:
1916	return ("0x" + Twine::utohexstr(Val: version)).str();
1917	}
1918	}
1919
1920	// Validate each object file's __objc_imageinfo and use them to generate the
1921	// image info for the output binary. Only two pieces of info are relevant:
1922	// 1. The Swift version (should be identical across inputs)
1923	// 2. `bool hasCategoryClassProperties` (true only if true for all inputs)
1924	void ObjCImageInfoSection::finalizeContents() {
1925	assert(files.size() != `0`); // should have already been checked via isNeeded()
1926
1927	info.hasCategoryClassProperties = true;
1928	const InputFile *firstFile;
1929	for (const InputFile *file : files) {
1930	ImageInfo inputInfo = parseImageInfo(file);
1931	info.hasCategoryClassProperties &= inputInfo.hasCategoryClassProperties;
1932
1933	// swiftVersion 0 means no Swift is present, so no version checking required
1934	if (inputInfo.swiftVersion == `0`)
1935	continue;
1936
1937	if (info.swiftVersion != `0` && info.swiftVersion != inputInfo.swiftVersion) {
1938	error(msg: "Swift version mismatch: " + toString(file: firstFile) + " has version " +
1939	swiftVersionString(version: info.swiftVersion) + " but " + toString(file) +
1940	" has version " + swiftVersionString(version: inputInfo.swiftVersion));
1941	} else {
1942	info.swiftVersion = inputInfo.swiftVersion;
1943	firstFile = file;
1944	}
1945	}
1946	}
1947
1948	void ObjCImageInfoSection::writeTo(uint8_t buf) const* {
1949	uint32_t flags = info.hasCategoryClassProperties ? `0x40` : `0x0`;
1950	flags \|= info.swiftVersion << `8`;
1951	write32le(P: buf + `4`, V: flags);
1952	}
1953
1954	InitOffsetsSection::InitOffsetsSection()
1955	: SyntheticSection (segment_names::text, section_names::initOffsets) {
1956	flags = S_INIT_FUNC_OFFSETS;
1957	align = `4`; // This section contains 32-bit integers.
1958	}
1959
1960	uint64_t InitOffsetsSection::getSize() const {
1961	size_t count = `0`;
1962	for (const ConcatInputSection *isec : sections)
1963	count += isec->relocs.size();
1964	return count * sizeof(uint32_t);
1965	}
1966
1967	void InitOffsetsSection::writeTo(uint8_t buf) const* {
1968	// FIXME: Add function specified by -init when that argument is implemented.
1969	for (ConcatInputSection *isec : sections) {
1970	for (const Reloc &rel : isec->relocs) {
1971	const Symbol referent = cast<Symbol >(Val: rel.referent);
1972	assert(referent && "section relocation should have been rejected");
1973	uint64_t offset = referent->getVA() - in.header->addr;
1974	// FIXME: Can we handle this gracefully?
1975	if (offset > UINT32_MAX)
1976	fatal(msg: isec->getLocation(off: rel.offset) + ": offset to initializer " +
1977	referent->getName() + " (" + utohexstr(X: offset) +
1978	") does not fit in 32 bits");
1979
1980	// Entries need to be added in the order they appear in the section, but
1981	// relocations aren't guaranteed to be sorted.
1982	size_t index = rel.offset >> target->p2WordSize;
1983	write32le(P: &buf[index * sizeof(uint32_t)], V: offset);
1984	}
1985	buf += isec->relocs.size() * sizeof(uint32_t);
1986	}
1987	}
1988
1989	// The inputs are __mod_init_func sections, which contain pointers to
1990	// initializer functions, therefore all relocations should be of the UNSIGNED
1991	// type. InitOffsetsSection stores offsets, so if the initializer's address is
1992	// not known at link time, stub-indirection has to be used.
1993	void InitOffsetsSection::setUp() {
1994	for (const ConcatInputSection *isec : sections) {
1995	for (const Reloc &rel : isec->relocs) {
1996	RelocAttrs attrs = target->getRelocAttrs(type: rel.type);
1997	if (!attrs.hasAttr(b: RelocAttrBits::UNSIGNED))
1998	error(msg: isec->getLocation(off: rel.offset) +
1999	": unsupported relocation type: " + attrs.name);
2000	if (rel.addend != `0`)
2001	error(msg: isec->getLocation(off: rel.offset) +
2002	": relocation addend is not representable in __init_offsets");
2003	if (isa<InputSection *>(Val: rel.referent))
2004	error(msg: isec->getLocation(off: rel.offset) +
2005	": unexpected section relocation");
2006
2007	Symbol sym = rel.referent.dyn_cast<Symbol >();
2008	if (auto *undefined = dyn_cast<Undefined>(Val: sym))
2009	treatUndefinedSymbol(*undefined, isec, offset: rel.offset);
2010	if (needsBinding(sym))
2011	in.stubs->addEntry(sym);
2012	}
2013	}
2014	}
2015
2016	ObjCMethListSection::ObjCMethListSection()
2017	: SyntheticSection (segment_names::text, section_names::objcMethList) {
2018	flags = S_ATTR_NO_DEAD_STRIP;
2019	align = relativeOffsetSize;
2020	}
2021
2022	// Go through all input method lists and ensure that we have selrefs for all
2023	// their method names. The selrefs will be needed later by ::writeTo. We need to
2024	// create them early on here to ensure they are processed correctly by the lld
2025	// pipeline.
2026	void ObjCMethListSection::setUp() {
2027	for (const ConcatInputSection *isec : inputs) {
2028	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2029	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2030	uint32_t originalStructSize = structSizeAndFlags & structSizeMask;
2031	// Method name is immediately after header
2032	uint32_t methodNameOff = methodListHeaderSize;
2033
2034	// Loop through all methods, and ensure a selref for each of them exists.
2035	while (methodNameOff < isec->data.size()) {
2036	const Reloc *reloc = isec->getRelocAt(off: methodNameOff);
2037	assert(reloc && "Relocation expected at method list name slot");
2038
2039	StringRef methname = reloc->getReferentString();
2040	if (!ObjCSelRefsHelper::getSelRef(methname))
2041	ObjCSelRefsHelper::makeSelRef(methname);
2042
2043	// Jump to method name offset in next struct
2044	methodNameOff += originalStructSize;
2045	}
2046	}
2047	}
2048
2049	// Calculate section size and final offsets for where InputSection's need to be
2050	// written.
2051	void ObjCMethListSection::finalize() {
2052	// sectionSize will be the total size of the __objc_methlist section
2053	sectionSize = `0`;
2054	for (ConcatInputSection *isec : inputs) {
2055	// We can also use sectionSize as write offset for isec
2056	assert(sectionSize == alignToPowerOf2(sectionSize, relativeOffsetSize) &&
2057	"expected __objc_methlist to be aligned by default with the "
2058	"required section alignment");
2059	isec->outSecOff = sectionSize;
2060
2061	isec->isFinal = true;
2062	uint32_t relativeListSize =
2063	computeRelativeMethodListSize(absoluteMethodListSize: isec->data.size());
2064	sectionSize += relativeListSize;
2065
2066	// If encoding the method list in relative offset format shrinks the size,
2067	// then we also need to adjust symbol sizes to match the new size. Note that
2068	// on 32bit platforms the size of the method list will remain the same when
2069	// encoded in relative offset format.
2070	if (relativeListSize != isec->data.size()) {
2071	for (Symbol *sym : isec->symbols) {
2072	assert(isa<Defined>(sym) &&
2073	"Unexpected undefined symbol in ObjC method list");
2074	auto *def = cast<Defined>(Val: sym);
2075	// There can be 0-size symbols, check if this is the case and ignore
2076	// them.
2077	if (def->size) {
2078	assert(
2079	def->size == isec->data.size() &&
2080	"Invalid ObjC method list symbol size: expected symbol size to "
2081	"match isec size");
2082	def->size = relativeListSize;
2083	}
2084	}
2085	}
2086	}
2087	}
2088
2089	void ObjCMethListSection::writeTo(uint8_t bufStart) const* {
2090	uint8_t *buf = bufStart;
2091	for (const ConcatInputSection *isec : inputs) {
2092	assert(buf - bufStart == std::ptrdiff_t(isec->outSecOff) &&
2093	"Writing at unexpected offset");
2094	uint32_t writtenSize = writeRelativeMethodList(isec, buf);
2095	buf += writtenSize;
2096	}
2097	assert(buf - bufStart == std::ptrdiff_t(sectionSize) &&
2098	"Written size does not match expected section size");
2099	}
2100
2101	// Check if an InputSection is a method list. To do this we scan the
2102	// InputSection for any symbols who's names match the patterns we expect clang
2103	// to generate for method lists.
2104	bool ObjCMethListSection::isMethodList(const ConcatInputSection *isec) {
2105	const char *symPrefixes[] = {objc::symbol_names::classMethods,
2106	objc::symbol_names::instanceMethods,
2107	objc::symbol_names::categoryInstanceMethods,
2108	objc::symbol_names::categoryClassMethods};
2109	if (!isec)
2110	return false;
2111	for (const Symbol *sym : isec->symbols) {
2112	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2113	if (!def)
2114	continue;
2115	for (const char *prefix : symPrefixes) {
2116	if (def->getName().starts_with(Prefix: prefix)) {
2117	assert(def->size == isec->data.size() &&
2118	"Invalid ObjC method list symbol size: expected symbol size to "
2119	"match isec size");
2120	assert(def->value == `0` &&
2121	"Offset of ObjC method list symbol must be 0");
2122	return true;
2123	}
2124	}
2125	}
2126
2127	return false;
2128	}
2129
2130	// Encode a single relative offset value. The input is the data/symbol at
2131	// (&isec->data[inSecOff]). The output is written to (&buf[outSecOff]).
2132	// 'createSelRef' indicates that we should not directly use the specified
2133	// symbol, but instead get the selRef for the symbol and use that instead.
2134	void ObjCMethListSection::writeRelativeOffsetForIsec(
2135	const ConcatInputSection isec, uint8_t buf, uint32_t &inSecOff,
2136	uint32_t &outSecOff, bool useSelRef) const {
2137	const Reloc *reloc = isec->getRelocAt(off: inSecOff);
2138	assert(reloc && "Relocation expected at __objc_methlist Offset");
2139
2140	uint32_t symVA = `0`;
2141	if (useSelRef) {
2142	StringRef methname = reloc->getReferentString();
2143	ConcatInputSection *selRef = ObjCSelRefsHelper::getSelRef(methname);
2144	assert(selRef && "Expected all selector names to already be already be "
2145	"present in __objc_selrefs");
2146	symVA = selRef->getVA();
2147	assert(selRef->data.size() == target->wordSize &&
2148	"Expected one selref per ConcatInputSection");
2149	} else if (auto sym = dyn_cast<Symbol >(Val: reloc->referent)) {
2150	auto *def = dyn_cast_or_null<Defined>(Val: sym);
2151	assert(def && "Expected all syms in __objc_methlist to be defined");
2152	symVA = def->getVA();
2153	} else {
2154	auto isec = cast<InputSection >(Val: reloc->referent);
2155	symVA = isec->getVA(off: reloc->addend);
2156	}
2157
2158	uint32_t currentVA = isec->getVA() + outSecOff;
2159	uint32_t delta = symVA - currentVA;
2160	write32le(P: buf + outSecOff, V: delta);
2161
2162	// Move one pointer forward in the absolute method list
2163	inSecOff += target->wordSize;
2164	// Move one relative offset forward in the relative method list (32 bits)
2165	outSecOff += relativeOffsetSize;
2166	}
2167
2168	// Write a relative method list to buf, return the size of the written
2169	// information
2170	uint32_t
2171	ObjCMethListSection::writeRelativeMethodList(const ConcatInputSection *isec,
2172	uint8_t buf) const* {
2173	// Copy over the header, and add the "this is a relative method list" magic
2174	// value flag
2175	uint32_t structSizeAndFlags = `0`, structCount = `0`;
2176	readMethodListHeader(buf: isec->data.data(), structSizeAndFlags, structCount);
2177	// Set the struct size for the relative method list
2178	uint32_t relativeStructSizeAndFlags =
2179	(relativeOffsetSize * pointersPerStruct) & structSizeMask;
2180	// Carry over the old flags from the input struct
2181	relativeStructSizeAndFlags \|= structSizeAndFlags & structFlagsMask;
2182	// Set the relative method list flag
2183	relativeStructSizeAndFlags \|= relMethodHeaderFlag;
2184
2185	writeMethodListHeader(buf, structSizeAndFlags: relativeStructSizeAndFlags, structCount);
2186
2187	assert(methodListHeaderSize +
2188	(structCount * pointersPerStruct * target->wordSize) ==
2189	isec->data.size() &&
2190	"Invalid computed ObjC method list size");
2191
2192	uint32_t inSecOff = methodListHeaderSize;
2193	uint32_t outSecOff = methodListHeaderSize;
2194
2195	// Go through the method list and encode input absolute pointers as relative
2196	// offsets. writeRelativeOffsetForIsec will be incrementing inSecOff and
2197	// outSecOff
2198	for (uint32_t i = `0`; i < structCount; i++) {
2199	// Write the name of the method
2200	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: true);
2201	// Write the type of the method
2202	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2203	// Write reference to the selector of the method
2204	writeRelativeOffsetForIsec(isec, buf, inSecOff, outSecOff, useSelRef: false);
2205	}
2206
2207	// Expecting to have read all the data in the isec
2208	assert(inSecOff == isec->data.size() &&
2209	"Invalid actual ObjC method list size");
2210	assert(
2211	outSecOff == computeRelativeMethodListSize(inSecOff) &&
2212	"Mismatch between input & output size when writing relative method list");
2213	return outSecOff;
2214	}
2215
2216	// Given the size of an ObjC method list InputSection, return the size of the
2217	// method list when encoded in relative offsets format. We can do this without
2218	// decoding the actual data, as it can be directly inferred from the size of the
2219	// isec.
2220	uint32_t ObjCMethListSection::computeRelativeMethodListSize(
2221	uint32_t absoluteMethodListSize) const {
2222	uint32_t oldPointersSize = absoluteMethodListSize - methodListHeaderSize;
2223	uint32_t pointerCount = oldPointersSize / target->wordSize;
2224	assert(((pointerCount % pointersPerStruct) == `0`) &&
2225	"__objc_methlist expects method lists to have multiple-of-3 pointers");
2226
2227	uint32_t newPointersSize = pointerCount * relativeOffsetSize;
2228	uint32_t newTotalSize = methodListHeaderSize + newPointersSize;
2229
2230	assert((newTotalSize <= absoluteMethodListSize) &&
2231	"Expected relative method list size to be smaller or equal than "
2232	"original size");
2233	return newTotalSize;
2234	}
2235
2236	// Read a method list header from buf
2237	void ObjCMethListSection::readMethodListHeader(const uint8_t *buf,
2238	uint32_t &structSizeAndFlags,
2239	uint32_t &structCount) const {
2240	structSizeAndFlags = read32le(P: buf);
2241	structCount = read32le(P: buf + sizeof(uint32_t));
2242	}
2243
2244	// Write a method list header to buf
2245	void ObjCMethListSection::writeMethodListHeader(uint8_t *buf,
2246	uint32_t structSizeAndFlags,
2247	uint32_t structCount) const {
2248	write32le(P: buf, V: structSizeAndFlags);
2249	write32le(P: buf + sizeof(structSizeAndFlags), V: structCount);
2250	}
2251
2252	void macho::createSyntheticSymbols() {
2253	auto addHeaderSymbol = [](const char *name) {
2254	symtab ->addSynthetic(name, in.header->isec, /value=/`0`,
2255	/isPrivateExtern=/true, /includeInSymtab=/false,
2256	/referencedDynamically=/false);
2257	};
2258
2259	switch (config ->outputType) {
2260	// FIXME: Assign the right address value for these symbols
2261	// (rather than 0). But we need to do that after assignAddresses().
2262	case MH_EXECUTE:
2263	// If linking PIE, __mh_execute_header is a defined symbol in
2264	// __TEXT, __text)
2265	// Otherwise, it's an absolute symbol.
2266	if (config ->isPic)
2267	symtab ->addSynthetic(name: "__mh_execute_header", in.header->isec, /value=/`0`,
2268	/isPrivateExtern=/false, /includeInSymtab=/true,
2269	/referencedDynamically=/true);
2270	else
2271	symtab ->addSynthetic(name: "__mh_execute_header", /isec=/nullptr, /value=/`0`,
2272	/isPrivateExtern=/false, /includeInSymtab=/true,
2273	/referencedDynamically=/true);
2274	break;
2275
2276	// The following symbols are N_SECT symbols, even though the header is not
2277	// part of any section and that they are private to the bundle/dylib/object
2278	// they are part of.
2279	case MH_BUNDLE:
2280	addHeaderSymbol ("__mh_bundle_header");
2281	break;
2282	case MH_DYLIB:
2283	addHeaderSymbol ("__mh_dylib_header");
2284	break;
2285	case MH_DYLINKER:
2286	addHeaderSymbol ("__mh_dylinker_header");
2287	break;
2288	case MH_OBJECT:
2289	addHeaderSymbol ("__mh_object_header");
2290	break;
2291	default:
2292	llvm_unreachable("unexpected outputType");
2293	break;
2294	}
2295
2296	// The Itanium C++ ABI requires dylibs to pass a pointer to __cxa_atexit
2297	// which does e.g. cleanup of static global variables. The ABI document
2298	// says that the pointer can point to any address in one of the dylib's
2299	// segments, but in practice ld64 seems to set it to point to the header,
2300	// so that's what's implemented here.
2301	addHeaderSymbol ("___dso_handle");
2302	}
2303
2304	ChainedFixupsSection::ChainedFixupsSection()
2305	: LinkEditSection (segment_names::linkEdit, section_names::chainFixups) {}
2306
2307	bool ChainedFixupsSection::isNeeded() const {
2308	assert(config ->emitChainedFixups);
2309	// dyld always expects LC_DYLD_CHAINED_FIXUPS to point to a valid
2310	// dyld_chained_fixups_header, so we create this section even if there aren't
2311	// any fixups.
2312	return true;
2313	}
2314
2315	void ChainedFixupsSection::addBinding(const Symbol *sym,
2316	const InputSection *isec, uint64_t offset,
2317	int64_t addend) {
2318	locations.emplace_back(args&: isec, args&: offset);
2319	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2320	auto [it, inserted] = bindings.insert(
2321	KV: {{sym, outlineAddend}, static_cast<uint32_t>(bindings.size())});
2322
2323	if (inserted) {
2324	symtabSize += sym->getName().size() + `1`;
2325	hasWeakBind = hasWeakBind \|\| needsWeakBind(sym: *sym);
2326	if (!isInt<`23`>(x: outlineAddend))
2327	needsLargeAddend = true;
2328	else if (outlineAddend != `0`)
2329	needsAddend = true;
2330	}
2331	}
2332
2333	std::pair<uint32_t, uint8_t>
2334	ChainedFixupsSection::getBinding(const Symbol sym, int64_t addend) const* {
2335	int64_t outlineAddend = (addend < `0` \|\| addend > `0xFF`) ? addend : `0`;
2336	auto it = bindings.find(Key: {sym, outlineAddend});
2337	assert(it != bindings.end() && "binding not found in the imports table");
2338	if (outlineAddend == `0`)
2339	return {it->second, addend};
2340	return {it->second, `0`};
2341	}
2342
2343	static size_t writeImport(uint8_t buf, int* format, int16_t libOrdinal,
2344	bool weakRef, uint32_t nameOffset, int64_t addend) {
2345	switch (format) {
2346	case DYLD_CHAINED_IMPORT: {
2347	auto import = reinterpret_cast<dyld_chained_import >(buf);
2348	import->lib_ordinal = libOrdinal;
2349	import->weak_import = weakRef;
2350	import->name_offset = nameOffset;
2351	return sizeof(dyld_chained_import);
2352	}
2353	case DYLD_CHAINED_IMPORT_ADDEND: {
2354	auto import = reinterpret_cast<dyld_chained_import_addend >(buf);
2355	import->lib_ordinal = libOrdinal;
2356	import->weak_import = weakRef;
2357	import->name_offset = nameOffset;
2358	import->addend = addend;
2359	return sizeof(dyld_chained_import_addend);
2360	}
2361	case DYLD_CHAINED_IMPORT_ADDEND64: {
2362	auto import = reinterpret_cast<dyld_chained_import_addend64 >(buf);
2363	import->lib_ordinal = libOrdinal;
2364	import->weak_import = weakRef;
2365	import->name_offset = nameOffset;
2366	import->addend = addend;
2367	return sizeof(dyld_chained_import_addend64);
2368	}
2369	default:
2370	llvm_unreachable("Unknown import format");
2371	}
2372	}
2373
2374	size_t ChainedFixupsSection::SegmentInfo::getSize() const {
2375	assert(pageStarts.size() > `0` && "SegmentInfo for segment with no fixups?");
2376	return alignTo<`8`>(Value: sizeof(dyld_chained_starts_in_segment) +
2377	pageStarts.back().first * sizeof(uint16_t));
2378	}
2379
2380	size_t ChainedFixupsSection::SegmentInfo::writeTo(uint8_t buf) const* {
2381	auto segInfo = reinterpret_cast<dyld_chained_starts_in_segment >(buf);
2382	segInfo->size = getSize();
2383	segInfo->page_size = target->getPageSize();
2384	// FIXME: Use DYLD_CHAINED_PTR_64_OFFSET on newer OS versions.
2385	segInfo->pointer_format = DYLD_CHAINED_PTR_64;
2386	segInfo->segment_offset = oseg->addr - in.header->addr;
2387	segInfo->max_valid_pointer = `0`; // not used on 64-bit
2388	segInfo->page_count = pageStarts.back().first + `1`;
2389
2390	uint16_t *starts = segInfo->page_start;
2391	for (size_t i = `0`; i < segInfo->page_count; ++i)
2392	starts[i] = DYLD_CHAINED_PTR_START_NONE;
2393
2394	for (auto [pageIdx, startAddr] : pageStarts)
2395	starts[pageIdx] = startAddr;
2396	return segInfo->size;
2397	}
2398
2399	static size_t importEntrySize(int format) {
2400	switch (format) {
2401	case DYLD_CHAINED_IMPORT:
2402	return sizeof(dyld_chained_import);
2403	case DYLD_CHAINED_IMPORT_ADDEND:
2404	return sizeof(dyld_chained_import_addend);
2405	case DYLD_CHAINED_IMPORT_ADDEND64:
2406	return sizeof(dyld_chained_import_addend64);
2407	default:
2408	llvm_unreachable("Unknown import format");
2409	}
2410	}
2411
2412	// This is step 3 of the algorithm described in the class comment of
2413	// ChainedFixupsSection.
2414	//
2415	// LC_DYLD_CHAINED_FIXUPS data consists of (in this order):
2416	// A dyld_chained_fixups_header*
2417	// A dyld_chained_starts_in_image*
2418	// One dyld_chained_starts_in_segment per segment*
2419	// List of all imports (dyld_chained_import, dyld_chained_import_addend, or*
2420	// dyld_chained_import_addend64)
2421	// Names of imported symbols*
2422	void ChainedFixupsSection::writeTo(uint8_t buf) const* {
2423	auto header = reinterpret_cast<dyld_chained_fixups_header >(buf);
2424	header->fixups_version = `0`;
2425	header->imports_count = bindings.size();
2426	header->imports_format = importFormat;
2427	header->symbols_format = `0`;
2428
2429	buf += alignTo<`8`>(Value: sizeof(*header));
2430
2431	auto curOffset = [&buf, &header]() -> uint32_t {
2432	return buf - reinterpret_cast<uint8_t *>(header);
2433	};
2434
2435	header->starts_offset = curOffset ();
2436
2437	auto imageInfo = reinterpret_cast<dyld_chained_starts_in_image >(buf);
2438	imageInfo->seg_count = outputSegments.size();
2439	uint32_t *segStarts = imageInfo->seg_info_offset;
2440
2441	// dyld_chained_starts_in_image ends in a flexible array member containing an
2442	// uint32_t for each segment. Leave room for it, and fill it via segStarts.
2443	buf += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2444	outputSegments.size() * sizeof(uint32_t));
2445
2446	// Initialize all offsets to 0, which indicates that the segment does not have
2447	// fixups. Those that do have them will be filled in below.
2448	for (size_t i = `0`; i < outputSegments.size(); ++i)
2449	segStarts[i] = `0`;
2450
2451	for (const SegmentInfo &seg : fixupSegments) {
2452	segStarts[seg.oseg->index] = curOffset () - header->starts_offset;
2453	buf += seg.writeTo(buf);
2454	}
2455
2456	// Write imports table.
2457	header->imports_offset = curOffset ();
2458	uint64_t nameOffset = `0`;
2459	for (auto [import, idx] : bindings) {
2460	const Symbol &sym = *import.first;
2461	buf += writeImport(buf, format: importFormat, libOrdinal: ordinalForSymbol(sym),
2462	weakRef: sym.isWeakRef(), nameOffset, addend: import.second);
2463	nameOffset += sym.getName().size() + `1`;
2464	}
2465
2466	// Write imported symbol names.
2467	header->symbols_offset = curOffset ();
2468	for (auto [import, idx] : bindings) {
2469	StringRef name = import.first->getName();
2470	memcpy(dest: buf, src: name.data(), n: name.size());
2471	buf += name.size() + `1`; // account for null terminator
2472	}
2473
2474	assert(curOffset() == getRawSize());
2475	}
2476
2477	// This is step 2 of the algorithm described in the class comment of
2478	// ChainedFixupsSection.
2479	void ChainedFixupsSection::finalizeContents() {
2480	assert(target->wordSize == `8` && "Only 64-bit platforms are supported");
2481	assert(config ->emitChainedFixups);
2482
2483	if (!isUInt<`32`>(x: symtabSize))
2484	error(msg: "cannot encode chained fixups: imported symbols table size " +
2485	Twine(symtabSize) + " exceeds 4 GiB");
2486
2487	bool needsLargeOrdinal = any_of(Range&: bindings, P: [](const auto &p) {
2488	// 0xF1 - 0xFF are reserved for special ordinals in the 8-bit encoding.
2489	return ordinalForSymbol(*p.first.first) > `0xF0`;
2490	});
2491
2492	if (needsLargeAddend \|\| !isUInt<`23`>(x: symtabSize) \|\| needsLargeOrdinal)
2493	importFormat = DYLD_CHAINED_IMPORT_ADDEND64;
2494	else if (needsAddend)
2495	importFormat = DYLD_CHAINED_IMPORT_ADDEND;
2496	else
2497	importFormat = DYLD_CHAINED_IMPORT;
2498
2499	for (Location &loc : locations)
2500	loc.offset =
2501	loc.isec->parent->getSegmentOffset() + loc.isec->getOffset(off: loc.offset);
2502
2503	llvm::sort(C&: locations, Comp: [](const Location &a, const Location &b) {
2504	const OutputSegment *segA = a.isec->parent->parent;
2505	const OutputSegment *segB = b.isec->parent->parent;
2506	if (segA == segB)
2507	return a.offset < b.offset;
2508	return segA->addr < segB->addr;
2509	});
2510
2511	auto sameSegment = [](const Location &a, const Location &b) {
2512	return a.isec->parent->parent == b.isec->parent->parent;
2513	};
2514
2515	const uint64_t pageSize = target->getPageSize();
2516	for (size_t i = `0`, count = locations.size(); i < count;) {
2517	const Location &firstLoc = locations [i];
2518	fixupSegments.emplace_back(Args&: firstLoc.isec->parent->parent);
2519	while (i < count && sameSegment (locations [i], firstLoc)) {
2520	uint32_t pageIdx = locations [i].offset / pageSize;
2521	fixupSegments.back().pageStarts.emplace_back(
2522	Args&: pageIdx, Args: locations [i].offset % pageSize);
2523	++i;
2524	while (i < count && sameSegment (locations [i], firstLoc) &&
2525	locations [i].offset / pageSize == pageIdx)
2526	++i;
2527	}
2528	}
2529
2530	// Compute expected encoded size.
2531	size = alignTo<`8`>(Value: sizeof(dyld_chained_fixups_header));
2532	size += alignTo<`8`>(offsetof(dyld_chained_starts_in_image, seg_info_offset) +
2533	outputSegments.size() * sizeof(uint32_t));
2534	for (const SegmentInfo &seg : fixupSegments)
2535	size += seg.getSize();
2536	size += importEntrySize(format: importFormat) * bindings.size();
2537	size += symtabSize;
2538	}
2539
2540	template SymtabSection *macho::makeSymtabSection<LP64>(StringTableSection &);
2541	template SymtabSection *macho::makeSymtabSection<ILP32>(StringTableSection &);
2542

Provided by KDAB

Definitions

sha256
in
syntheticSections
SyntheticSection
MachHeaderSection
addLoadCommand
getSize
cpuSubtype
hasWeakBinding
hasNonWeakDefinition
writeTo
PageZeroSection
RebaseSection
RebaseState
emitIncrement
flushRebase
encodeRebases
finalizeContents
writeTo
NonLazyPointerSectionBase
addNonLazyBindingEntries
addEntry
writeChainedRebase
writeChainedBind
writeChainedFixup
writeTo
GotSection
TlvPointerSection
BindingSection
Binding
BindIR
encodeBinding
optimizeOpcodes
flushOpcodes
needsWeakBind
ordinalForDylibSymbol
ordinalForSymbol
encodeDylibOrdinal
encodeWeakOverride
sortBindings
finalizeContents
writeTo
WeakBindingSection
finalizeContents
writeTo
StubsSection
getSize
writeTo
finalize
addBindingsForStub
addEntry
StubHelperSection
getSize
isNeeded
writeTo
setUp
methnameToSelref
initialize
cleanup
makeSelRef
getSelRef
ObjCStubsSection
isObjCStubSymbol
getMethname
addEntry
setUp
getSize
writeTo
LazyPointerSection
getSize
isNeeded
writeTo
LazyBindingSection
finalizeContents
writeTo
addEntry
encode
ExportSection
finalizeContents
writeTo
DataInCodeSection
collectDataInCodeEntries
finalizeContents
writeTo
FunctionStartsSection
finalizeContents
writeTo
SymtabSection
emitBeginSourceStab
emitEndSourceStab
emitObjectFileStab
emitEndFunStab
emitStabs
finalizeContents
getNumSymbols
SymtabSectionImpl
SymtabSectionImpl
getRawSize
writeTo
makeSymtabSection
IndirectSymtabSection
getNumSymbols
isNeeded
finalizeContents
indirectValue
writeTo
StringTableSection
addString
writeTo
CodeSignatureSection
getBlockCount
getRawSize
writeHashes
writeTo
CStringSection
addInput
writeTo
finalizeContents
finalizeContents
writeTo
getStringOffset
WordLiteralSection
addInput
finalizeContents
writeTo
ObjCImageInfoSection
parseImageInfo
swiftVersionString
finalizeContents
writeTo
InitOffsetsSection
getSize
writeTo
setUp
ObjCMethListSection
setUp
finalize
writeTo
isMethodList
writeRelativeOffsetForIsec
writeRelativeMethodList
computeRelativeMethodListSize
readMethodListHeader
writeMethodListHeader
createSyntheticSymbols
ChainedFixupsSection
isNeeded
addBinding
getBinding
writeImport
getSize
writeTo
importEntrySize
writeTo

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

Definitions

source code of lld/MachO/SyntheticSections.cpp