ICF.cpp source code [lld/MachO/ICF.cpp]

1	//===- ICF.cpp ------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "ICF.h"
10	#include "ConcatOutputSection.h"
11	#include "Config.h"
12	#include "InputSection.h"
13	#include "SymbolTable.h"
14	#include "Symbols.h"
15	#include "UnwindInfoSection.h"
16
17	#include "lld/Common/CommonLinkerContext.h"
18	#include "llvm/Support/LEB128.h"
19	#include "llvm/Support/Parallel.h"
20	#include "llvm/Support/TimeProfiler.h"
21	#include "llvm/Support/xxhash.h"
22
23	#include <atomic>
24
25	using namespace llvm;
26	using namespace lld;
27	using namespace lld::macho;
28
29	static constexpr bool verboseDiagnostics = false;
30
31	class ICF {
32	public:
33	ICF(std::vector<ConcatInputSection *> &inputs);
34	void run();
35
36	using EqualsFn = bool (ICF::)(const* ConcatInputSection *,
37	const ConcatInputSection *);
38	void segregate(size_t begin, size_t end, EqualsFn);
39	size_t findBoundary(size_t begin, size_t end);
40	void forEachClassRange(size_t begin, size_t end,
41	llvm::function_ref<void(size_t, size_t)> func);
42	void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
43
44	bool equalsConstant(const ConcatInputSection *ia,
45	const ConcatInputSection *ib);
46	bool equalsVariable(const ConcatInputSection *ia,
47	const ConcatInputSection *ib);
48
49	// ICF needs a copy of the inputs vector because its equivalence-class
50	// segregation algorithm destroys the proper sequence.
51	std::vector<ConcatInputSection *> icfInputs;
52
53	unsigned icfPass = `0`;
54	std::atomic<bool> icfRepeat{false};
55	std::atomic<uint64_t> equalsConstantCount{`0`};
56	std::atomic<uint64_t> equalsVariableCount{`0`};
57	};
58
59	ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
60	icfInputs.assign(first: inputs.begin(), last: inputs.end());
61	}
62
63	// ICF = Identical Code Folding
64	//
65	// We only fold __TEXT,__text, so this is really "code" folding, and not
66	// "COMDAT" folding. String and scalar constant literals are deduplicated
67	// elsewhere.
68	//
69	// Summary of segments & sections:
70	//
71	// The __TEXT segment is readonly at the MMU. Some sections are already
72	// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal) and some are*
73	// synthetic and inherently free of duplicates (__TEXT,__stubs &
74	// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
75	// because doing so induces many test failures.
76	//
77	// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
78	// thus ineligible for ICF.
79	//
80	// The __DATA_CONST segment is read/write at the MMU, but is logically const to
81	// the application after dyld applies fixups to pointer data. We currently
82	// fold only the __DATA_CONST,__cfstring section.
83	//
84	// The __DATA segment is read/write at the MMU, and as application-writeable
85	// data, none of its sections are eligible for ICF.
86	//
87	// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
88	// of the segregation algorithm.
89	//
90	// FIXME(gkm): implement keep-unique attributes
91	// FIXME(gkm): implement address-significance tables for MachO object files
92
93	// Compare "non-moving" parts of two ConcatInputSections, namely everything
94	// except references to other ConcatInputSections.
95	bool ICF::equalsConstant(const ConcatInputSection *ia,
96	const ConcatInputSection *ib) {
97	if (verboseDiagnostics)
98	++equalsConstantCount;
99	// We can only fold within the same OutputSection.
100	if (ia->parent != ib->parent)
101	return false;
102	if (ia->data.size() != ib->data.size())
103	return false;
104	if (ia->data != ib->data)
105	return false;
106	if (ia->relocs.size() != ib->relocs.size())
107	return false;
108	auto f = [](const Reloc &ra, const Reloc &rb) {
109	if (ra.type != rb.type)
110	return false;
111	if (ra.pcrel != rb.pcrel)
112	return false;
113	if (ra.length != rb.length)
114	return false;
115	if (ra.offset != rb.offset)
116	return false;
117	if (ra.referent.is<Symbol >() != rb.referent.is<Symbol >())
118	return false;
119
120	InputSection isecA, isecB;
121
122	uint64_t valueA = `0`;
123	uint64_t valueB = `0`;
124	if (ra.referent.is<Symbol *>()) {
125	const auto sa = ra.referent.get<Symbol >();
126	const auto sb = rb.referent.get<Symbol >();
127	if (sa->kind() != sb->kind())
128	return false;
129	// ICF runs before Undefineds are treated (and potentially converted into
130	// DylibSymbols).
131	if (isa<DylibSymbol>(Val: sa) \|\| isa<Undefined>(Val: sa))
132	return sa == sb && ra.addend == rb.addend;
133	assert(isa<Defined>(sa));
134	const auto *da = cast<Defined>(Val: sa);
135	const auto *db = cast<Defined>(Val: sb);
136	if (!da->isec() \|\| !db->isec()) {
137	assert(da->isAbsolute() && db->isAbsolute());
138	return da->value + ra.addend == db->value + rb.addend;
139	}
140	isecA = da->isec();
141	valueA = da->value;
142	isecB = db->isec();
143	valueB = db->value;
144	} else {
145	isecA = ra.referent.get<InputSection *>();
146	isecB = rb.referent.get<InputSection *>();
147	}
148
149	if (isecA->parent != isecB->parent)
150	return false;
151	// Sections with identical parents should be of the same kind.
152	assert(isecA->kind() == isecB->kind());
153	// We will compare ConcatInputSection contents in equalsVariable.
154	if (isa<ConcatInputSection>(Val: isecA))
155	return ra.addend == rb.addend;
156	// Else we have two literal sections. References to them are equal iff their
157	// offsets in the output section are equal.
158	if (ra.referent.is<Symbol *>())
159	// For symbol relocs, we compare the contents at the symbol address. We
160	// don't do `getOffset(value + addend)` because value + addend may not be
161	// a valid offset in the literal section.
162	return isecA->getOffset(off: valueA) == isecB->getOffset(off: valueB) &&
163	ra.addend == rb.addend;
164	else {
165	assert(valueA == `0` && valueB == `0`);
166	// For section relocs, we compare the content at the section offset.
167	return isecA->getOffset(off: ra.addend) == isecB->getOffset(off: rb.addend);
168	}
169	};
170	return std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(),
171	binary_pred: f);
172	}
173
174	// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
175	// handled by equalsConstant().
176	bool ICF::equalsVariable(const ConcatInputSection *ia,
177	const ConcatInputSection *ib) {
178	if (verboseDiagnostics)
179	++equalsVariableCount;
180	assert(ia->relocs.size() == ib->relocs.size());
181	auto f = [this](const Reloc &ra, const Reloc &rb) {
182	// We already filtered out mismatching values/addends in equalsConstant.
183	if (ra.referent == rb.referent)
184	return true;
185	const ConcatInputSection isecA, isecB;
186	if (ra.referent.is<Symbol *>()) {
187	// Matching DylibSymbols are already filtered out by the
188	// identical-referent check above. Non-matching DylibSymbols were filtered
189	// out in equalsConstant(). So we can safely cast to Defined here.
190	const auto da = cast<Defined>(Val: ra.referent.get<Symbol >());
191	const auto db = cast<Defined>(Val: rb.referent.get<Symbol >());
192	if (da->isAbsolute())
193	return true;
194	isecA = dyn_cast<ConcatInputSection>(Val: da->isec());
195	if (!isecA)
196	return true; // literal sections were checked in equalsConstant.
197	isecB = cast<ConcatInputSection>(Val: db->isec());
198	} else {
199	const auto sa = ra.referent.get<InputSection >();
200	const auto sb = rb.referent.get<InputSection >();
201	isecA = dyn_cast<ConcatInputSection>(Val: sa);
202	if (!isecA)
203	return true;
204	isecB = cast<ConcatInputSection>(Val: sb);
205	}
206	return isecA->icfEqClass[icfPass % `2`] == isecB->icfEqClass[icfPass % `2`];
207	};
208	if (!std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), binary_pred: f))
209	return false;
210
211	// If there are symbols with associated unwind info, check that the unwind
212	// info matches. For simplicity, we only handle the case where there are only
213	// symbols at offset zero within the section (which is typically the case with
214	// .subsections_via_symbols.)
215	auto hasUnwind = [](Defined d) { return* d->unwindEntry() != nullptr; };
216	const auto *itA = llvm::find_if(Range: ia->symbols, P: hasUnwind);
217	const auto *itB = llvm::find_if(Range: ib->symbols, P: hasUnwind);
218	if (itA == ia->symbols.end())
219	return itB == ib->symbols.end();
220	if (itB == ib->symbols.end())
221	return false;
222	const Defined da = itA;
223	const Defined db = itB;
224	if (da->unwindEntry()->icfEqClass[icfPass % `2`] !=
225	db->unwindEntry()->icfEqClass[icfPass % `2`] \|\|
226	da->value != `0` \|\| db->value != `0`)
227	return false;
228	auto isZero = [](Defined d) { return* d->value == `0`; };
229	return std::find_if_not(first: std::next(x: itA), last: ia->symbols.end(), pred: isZero) ==
230	ia->symbols.end() &&
231	std::find_if_not(first: std::next(x: itB), last: ib->symbols.end(), pred: isZero) ==
232	ib->symbols.end();
233	}
234
235	// Find the first InputSection after BEGIN whose equivalence class differs
236	size_t ICF::findBoundary(size_t begin, size_t end) {
237	uint64_t beginHash = icfInputs [begin]->icfEqClass[icfPass % `2`];
238	for (size_t i = begin + `1`; i < end; ++i)
239	if (beginHash != icfInputs [i]->icfEqClass[icfPass % `2`])
240	return i;
241	return end;
242	}
243
244	// Invoke FUNC on subranges with matching equivalence class
245	void ICF::forEachClassRange(size_t begin, size_t end,
246	llvm::function_ref<void(size_t, size_t)> func) {
247	while (begin < end) {
248	size_t mid = findBoundary(begin, end);
249	func (begin, mid);
250	begin = mid;
251	}
252	}
253
254	// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255	// with matching equivalence class
256	void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
257	// Only use threads when the benefits outweigh the overhead.
258	const size_t threadingThreshold = `1024`;
259	if (icfInputs.size() < threadingThreshold) {
260	forEachClassRange(begin: `0`, end: icfInputs.size(), func);
261	++icfPass;
262	return;
263	}
264
265	// Shard into non-overlapping intervals, and call FUNC in parallel. The
266	// sharding must be completed before any calls to FUNC are made so that FUNC
267	// can modify the InputSection in its shard without causing data races.
268	const size_t shards = `256`;
269	size_t step = icfInputs.size() / shards;
270	size_t boundaries[shards + `1`];
271	boundaries[`0`] = `0`;
272	boundaries[shards] = icfInputs.size();
273	parallelFor(Begin: `1`, End: shards, Fn: [&](size_t i) {
274	boundaries[i] = findBoundary(begin: (i - `1`) * step, end: icfInputs.size());
275	});
276	parallelFor(Begin: `1`, End: shards + `1`, Fn: [&](size_t i) {
277	if (boundaries[i - `1`] < boundaries[i]) {
278	forEachClassRange(begin: boundaries[i - `1`], end: boundaries[i], func);
279	}
280	});
281	++icfPass;
282	}
283
284	void ICF::run() {
285	// Into each origin-section hash, combine all reloc referent section hashes.
286	for (icfPass = `0`; icfPass < `2`; ++icfPass) {
287	parallelForEach(R&: icfInputs, Fn: [&](ConcatInputSection *isec) {
288	uint32_t hash = isec->icfEqClass[icfPass % `2`];
289	for (const Reloc &r : isec->relocs) {
290	if (auto sym = r.referent.dyn_cast<Symbol >()) {
291	if (auto *defined = dyn_cast<Defined>(Val: sym)) {
292	if (defined->isec()) {
293	if (auto *referentIsec =
294	dyn_cast<ConcatInputSection>(Val: defined->isec()))
295	hash += defined->value + referentIsec->icfEqClass[icfPass % `2`];
296	else
297	hash += defined->isec()->kind() +
298	defined->isec()->getOffset(off: defined->value);
299	} else {
300	hash += defined->value;
301	}
302	} else {
303	// ICF runs before Undefined diags
304	assert(isa<Undefined>(sym) \|\| isa<DylibSymbol>(sym));
305	}
306	}
307	}
308	// Set MSB to 1 to avoid collisions with non-hashed classes.
309	isec->icfEqClass[(icfPass + `1`) % `2`] = hash \| (`1ull` << `31`);
310	});
311	}
312
313	llvm::stable_sort(
314	Range&: icfInputs, C: [](const ConcatInputSection a, const* ConcatInputSection *b) {
315	return a->icfEqClass[`0`] < b->icfEqClass[`0`];
316	});
317	forEachClass(func: [&](size_t begin, size_t end) {
318	segregate(begin, end, &ICF::equalsConstant);
319	});
320
321	// Split equivalence groups by comparing relocations until convergence
322	do {
323	icfRepeat = false;
324	forEachClass(func: [&](size_t begin, size_t end) {
325	segregate(begin, end, &ICF::equalsVariable);
326	});
327	} while (icfRepeat);
328	log(msg: "ICF needed " + Twine (icfPass) + " iterations");
329	if (verboseDiagnostics) {
330	log(msg: "equalsConstant() called " + Twine (equalsConstantCount) + " times");
331	log(msg: "equalsVariable() called " + Twine (equalsVariableCount) + " times");
332	}
333
334	// Fold sections within equivalence classes
335	forEachClass(func: [&](size_t begin, size_t end) {
336	if (end - begin < `2`)
337	return;
338	ConcatInputSection *beginIsec = icfInputs [begin];
339	for (size_t i = begin + `1`; i < end; ++i)
340	beginIsec->foldIdentical(redundant: icfInputs [i]);
341	});
342	}
343
344	// Split an equivalence class into smaller classes.
345	void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
346	while (begin < end) {
347	// Divide [begin, end) into two. Let mid be the start index of the
348	// second group.
349	auto bound = std::stable_partition(
350	first: icfInputs.begin() + begin + `1`, last: icfInputs.begin() + end,
351	pred: [&](ConcatInputSection *isec) {
352	return (this->*equals)(icfInputs [begin], isec);
353	});
354	size_t mid = bound - icfInputs.begin();
355
356	// Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
357	// equivalence class ID because every group ends with a unique index.
358	for (size_t i = begin; i < mid; ++i)
359	icfInputs [i]->icfEqClass[(icfPass + `1`) % `2`] = mid;
360
361	// If we created a group, we need to iterate the main loop again.
362	if (mid != end)
363	icfRepeat = true;
364
365	begin = mid;
366	}
367	}
368
369	void macho::markSymAsAddrSig(Symbol *s) {
370	if (auto *d = dyn_cast_or_null<Defined>(Val: s))
371	if (d->isec())
372	d->isec()->keepUnique = true;
373	}
374
375	void macho::markAddrSigSymbols() {
376	TimeTraceScope timeScope("Mark addrsig symbols");
377	for (InputFile *file : inputFiles) {
378	ObjFile *obj = dyn_cast<ObjFile>(Val: file);
379	if (!obj)
380	continue;
381
382	Section *addrSigSection = obj->addrSigSection;
383	if (!addrSigSection)
384	continue;
385	assert(addrSigSection->subsections.size() == `1`);
386
387	const InputSection *isec = addrSigSection->subsections [`0`].isec;
388
389	for (const Reloc &r : isec->relocs) {
390	if (auto sym = r.referent.dyn_cast<Symbol >())
391	markSymAsAddrSig(s: sym);
392	else
393	error(msg: toString(isec) + ": unexpected section relocation");
394	}
395	}
396	}
397
398	void macho::foldIdenticalSections(bool onlyCfStrings) {
399	TimeTraceScope timeScope("Fold Identical Code Sections");
400	// The ICF equivalence-class segregation algorithm relies on pre-computed
401	// hashes of InputSection::data for the ConcatOutputSection::inputs and all
402	// sections referenced by their relocs. We could recursively traverse the
403	// relocs to find every referenced InputSection, but that precludes easy
404	// parallelization. Therefore, we hash every InputSection here where we have
405	// them all accessible as simple vectors.
406
407	// If an InputSection is ineligible for ICF, we give it a unique ID to force
408	// it into an unfoldable singleton equivalence class. Begin the unique-ID
409	// space at inputSections.size(), so that it will never intersect with
410	// equivalence-class IDs which begin at 0. Since hashes & unique IDs never
411	// coexist with equivalence-class IDs, this is not necessary, but might help
412	// someone keep the numbers straight in case we ever need to debug the
413	// ICF::segregate()
414	std::vector<ConcatInputSection *> foldable;
415	uint64_t icfUniqueID = inputSections.size();
416	for (ConcatInputSection *isec : inputSections) {
417	bool isFoldableWithAddendsRemoved = isCfStringSection(isec) \|\|
418	isClassRefsSection(isec) \|\|
419	isSelRefsSection(isec);
420	// NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
421	// can still fold it.
422	bool hasFoldableFlags = (isSelRefsSection(isec) \|\|
423	sectionType(flags: isec->getFlags()) == MachO::S_REGULAR);
424	// FIXME: consider non-code __text sections as foldable?
425	bool isFoldable = (!onlyCfStrings \|\| isCfStringSection(isec)) &&
426	(isCodeSection(isec) \|\| isFoldableWithAddendsRemoved \|\|
427	isGccExceptTabSection(isec)) &&
428	!isec->keepUnique && !isec->hasAltEntry &&
429	!isec->shouldOmitFromOutput() && hasFoldableFlags;
430	if (isFoldable) {
431	foldable.push_back(x: isec);
432	for (Defined *d : isec->symbols)
433	if (d->unwindEntry())
434	foldable.push_back(x: d->unwindEntry());
435
436	// Some sections have embedded addends that foil ICF's hashing / equality
437	// checks. (We can ignore embedded addends when doing ICF because the same
438	// information gets recorded in our Reloc structs.) We therefore create a
439	// mutable copy of the section data and zero out the embedded addends
440	// before performing any hashing / equality checks.
441	if (isFoldableWithAddendsRemoved) {
442	// We have to do this copying serially as the BumpPtrAllocator is not
443	// thread-safe. FIXME: Make a thread-safe allocator.
444	MutableArrayRef<uint8_t> copy = isec->data.copy(A&: bAlloc());
445	for (const Reloc &r : isec->relocs)
446	target->relocateOne(loc: copy.data() + r.offset, r, /va=/`0`,
447	/relocVA=/`0`);
448	isec->data = copy;
449	}
450	} else if (!isEhFrameSection(isec)) {
451	// EH frames are gathered as foldables from unwindEntry above; give a
452	// unique ID to everything else.
453	isec->icfEqClass[`0`] = ++icfUniqueID;
454	}
455	}
456	parallelForEach(R&: foldable, Fn: [](ConcatInputSection *isec) {
457	assert(isec->icfEqClass[`0`] == `0`); // don't overwrite a unique ID!
458	// Turn-on the top bit to guarantee that valid hashes have no collisions
459	// with the small-integer unique IDs for ICF-ineligible sections
460	isec->icfEqClass[`0`] = xxh3_64bits(data: isec->data) \| (`1ull` << `31`);
461	});
462	// Now that every input section is either hashed or marked as unique, run the
463	// segregation algorithm to detect foldable subsections.
464	ICF (foldable).run();
465	}
466

source code of lld/MachO/ICF.cpp