1 | //===- ICF.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ICF.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputSection.h" |
13 | #include "SymbolTable.h" |
14 | #include "Symbols.h" |
15 | #include "UnwindInfoSection.h" |
16 | |
17 | #include "lld/Common/CommonLinkerContext.h" |
18 | #include "llvm/Support/LEB128.h" |
19 | #include "llvm/Support/Parallel.h" |
20 | #include "llvm/Support/TimeProfiler.h" |
21 | #include "llvm/Support/xxhash.h" |
22 | |
23 | #include <atomic> |
24 | |
25 | using namespace llvm; |
26 | using namespace lld; |
27 | using namespace lld::macho; |
28 | |
29 | static constexpr bool verboseDiagnostics = false; |
30 | |
31 | class ICF { |
32 | public: |
33 | ICF(std::vector<ConcatInputSection *> &inputs); |
34 | void run(); |
35 | |
36 | using EqualsFn = bool (ICF::*)(const ConcatInputSection *, |
37 | const ConcatInputSection *); |
38 | void segregate(size_t begin, size_t end, EqualsFn); |
39 | size_t findBoundary(size_t begin, size_t end); |
40 | void forEachClassRange(size_t begin, size_t end, |
41 | llvm::function_ref<void(size_t, size_t)> func); |
42 | void forEachClass(llvm::function_ref<void(size_t, size_t)> func); |
43 | |
44 | bool equalsConstant(const ConcatInputSection *ia, |
45 | const ConcatInputSection *ib); |
46 | bool equalsVariable(const ConcatInputSection *ia, |
47 | const ConcatInputSection *ib); |
48 | |
49 | // ICF needs a copy of the inputs vector because its equivalence-class |
50 | // segregation algorithm destroys the proper sequence. |
51 | std::vector<ConcatInputSection *> icfInputs; |
52 | |
53 | unsigned icfPass = 0; |
54 | std::atomic<bool> icfRepeat{false}; |
55 | std::atomic<uint64_t> equalsConstantCount{0}; |
56 | std::atomic<uint64_t> equalsVariableCount{0}; |
57 | }; |
58 | |
59 | ICF::ICF(std::vector<ConcatInputSection *> &inputs) { |
60 | icfInputs.assign(first: inputs.begin(), last: inputs.end()); |
61 | } |
62 | |
63 | // ICF = Identical Code Folding |
64 | // |
65 | // We only fold __TEXT,__text, so this is really "code" folding, and not |
66 | // "COMDAT" folding. String and scalar constant literals are deduplicated |
67 | // elsewhere. |
68 | // |
69 | // Summary of segments & sections: |
70 | // |
71 | // The __TEXT segment is readonly at the MMU. Some sections are already |
72 | // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are |
73 | // synthetic and inherently free of duplicates (__TEXT,__stubs & |
74 | // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, |
75 | // because doing so induces many test failures. |
76 | // |
77 | // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and |
78 | // thus ineligible for ICF. |
79 | // |
80 | // The __DATA_CONST segment is read/write at the MMU, but is logically const to |
81 | // the application after dyld applies fixups to pointer data. We currently |
82 | // fold only the __DATA_CONST,__cfstring section. |
83 | // |
84 | // The __DATA segment is read/write at the MMU, and as application-writeable |
85 | // data, none of its sections are eligible for ICF. |
86 | // |
87 | // Please see the large block comment in lld/ELF/ICF.cpp for an explanation |
88 | // of the segregation algorithm. |
89 | // |
90 | // FIXME(gkm): implement keep-unique attributes |
91 | // FIXME(gkm): implement address-significance tables for MachO object files |
92 | |
93 | // Compare "non-moving" parts of two ConcatInputSections, namely everything |
94 | // except references to other ConcatInputSections. |
95 | bool ICF::equalsConstant(const ConcatInputSection *ia, |
96 | const ConcatInputSection *ib) { |
97 | if (verboseDiagnostics) |
98 | ++equalsConstantCount; |
99 | // We can only fold within the same OutputSection. |
100 | if (ia->parent != ib->parent) |
101 | return false; |
102 | if (ia->data.size() != ib->data.size()) |
103 | return false; |
104 | if (ia->data != ib->data) |
105 | return false; |
106 | if (ia->relocs.size() != ib->relocs.size()) |
107 | return false; |
108 | auto f = [](const Reloc &ra, const Reloc &rb) { |
109 | if (ra.type != rb.type) |
110 | return false; |
111 | if (ra.pcrel != rb.pcrel) |
112 | return false; |
113 | if (ra.length != rb.length) |
114 | return false; |
115 | if (ra.offset != rb.offset) |
116 | return false; |
117 | if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>()) |
118 | return false; |
119 | |
120 | InputSection *isecA, *isecB; |
121 | |
122 | uint64_t valueA = 0; |
123 | uint64_t valueB = 0; |
124 | if (ra.referent.is<Symbol *>()) { |
125 | const auto *sa = ra.referent.get<Symbol *>(); |
126 | const auto *sb = rb.referent.get<Symbol *>(); |
127 | if (sa->kind() != sb->kind()) |
128 | return false; |
129 | // ICF runs before Undefineds are treated (and potentially converted into |
130 | // DylibSymbols). |
131 | if (isa<DylibSymbol>(Val: sa) || isa<Undefined>(Val: sa)) |
132 | return sa == sb && ra.addend == rb.addend; |
133 | assert(isa<Defined>(sa)); |
134 | const auto *da = cast<Defined>(Val: sa); |
135 | const auto *db = cast<Defined>(Val: sb); |
136 | if (!da->isec() || !db->isec()) { |
137 | assert(da->isAbsolute() && db->isAbsolute()); |
138 | return da->value + ra.addend == db->value + rb.addend; |
139 | } |
140 | isecA = da->isec(); |
141 | valueA = da->value; |
142 | isecB = db->isec(); |
143 | valueB = db->value; |
144 | } else { |
145 | isecA = ra.referent.get<InputSection *>(); |
146 | isecB = rb.referent.get<InputSection *>(); |
147 | } |
148 | |
149 | if (isecA->parent != isecB->parent) |
150 | return false; |
151 | // Sections with identical parents should be of the same kind. |
152 | assert(isecA->kind() == isecB->kind()); |
153 | // We will compare ConcatInputSection contents in equalsVariable. |
154 | if (isa<ConcatInputSection>(Val: isecA)) |
155 | return ra.addend == rb.addend; |
156 | // Else we have two literal sections. References to them are equal iff their |
157 | // offsets in the output section are equal. |
158 | if (ra.referent.is<Symbol *>()) |
159 | // For symbol relocs, we compare the contents at the symbol address. We |
160 | // don't do `getOffset(value + addend)` because value + addend may not be |
161 | // a valid offset in the literal section. |
162 | return isecA->getOffset(off: valueA) == isecB->getOffset(off: valueB) && |
163 | ra.addend == rb.addend; |
164 | else { |
165 | assert(valueA == 0 && valueB == 0); |
166 | // For section relocs, we compare the content at the section offset. |
167 | return isecA->getOffset(off: ra.addend) == isecB->getOffset(off: rb.addend); |
168 | } |
169 | }; |
170 | return std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), |
171 | binary_pred: f); |
172 | } |
173 | |
174 | // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not |
175 | // handled by equalsConstant(). |
176 | bool ICF::equalsVariable(const ConcatInputSection *ia, |
177 | const ConcatInputSection *ib) { |
178 | if (verboseDiagnostics) |
179 | ++equalsVariableCount; |
180 | assert(ia->relocs.size() == ib->relocs.size()); |
181 | auto f = [this](const Reloc &ra, const Reloc &rb) { |
182 | // We already filtered out mismatching values/addends in equalsConstant. |
183 | if (ra.referent == rb.referent) |
184 | return true; |
185 | const ConcatInputSection *isecA, *isecB; |
186 | if (ra.referent.is<Symbol *>()) { |
187 | // Matching DylibSymbols are already filtered out by the |
188 | // identical-referent check above. Non-matching DylibSymbols were filtered |
189 | // out in equalsConstant(). So we can safely cast to Defined here. |
190 | const auto *da = cast<Defined>(Val: ra.referent.get<Symbol *>()); |
191 | const auto *db = cast<Defined>(Val: rb.referent.get<Symbol *>()); |
192 | if (da->isAbsolute()) |
193 | return true; |
194 | isecA = dyn_cast<ConcatInputSection>(Val: da->isec()); |
195 | if (!isecA) |
196 | return true; // literal sections were checked in equalsConstant. |
197 | isecB = cast<ConcatInputSection>(Val: db->isec()); |
198 | } else { |
199 | const auto *sa = ra.referent.get<InputSection *>(); |
200 | const auto *sb = rb.referent.get<InputSection *>(); |
201 | isecA = dyn_cast<ConcatInputSection>(Val: sa); |
202 | if (!isecA) |
203 | return true; |
204 | isecB = cast<ConcatInputSection>(Val: sb); |
205 | } |
206 | return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; |
207 | }; |
208 | if (!std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), binary_pred: f)) |
209 | return false; |
210 | |
211 | // If there are symbols with associated unwind info, check that the unwind |
212 | // info matches. For simplicity, we only handle the case where there are only |
213 | // symbols at offset zero within the section (which is typically the case with |
214 | // .subsections_via_symbols.) |
215 | auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; |
216 | const auto *itA = llvm::find_if(Range: ia->symbols, P: hasUnwind); |
217 | const auto *itB = llvm::find_if(Range: ib->symbols, P: hasUnwind); |
218 | if (itA == ia->symbols.end()) |
219 | return itB == ib->symbols.end(); |
220 | if (itB == ib->symbols.end()) |
221 | return false; |
222 | const Defined *da = *itA; |
223 | const Defined *db = *itB; |
224 | if (da->unwindEntry()->icfEqClass[icfPass % 2] != |
225 | db->unwindEntry()->icfEqClass[icfPass % 2] || |
226 | da->value != 0 || db->value != 0) |
227 | return false; |
228 | auto isZero = [](Defined *d) { return d->value == 0; }; |
229 | return std::find_if_not(first: std::next(x: itA), last: ia->symbols.end(), pred: isZero) == |
230 | ia->symbols.end() && |
231 | std::find_if_not(first: std::next(x: itB), last: ib->symbols.end(), pred: isZero) == |
232 | ib->symbols.end(); |
233 | } |
234 | |
235 | // Find the first InputSection after BEGIN whose equivalence class differs |
236 | size_t ICF::findBoundary(size_t begin, size_t end) { |
237 | uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; |
238 | for (size_t i = begin + 1; i < end; ++i) |
239 | if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) |
240 | return i; |
241 | return end; |
242 | } |
243 | |
244 | // Invoke FUNC on subranges with matching equivalence class |
245 | void ICF::forEachClassRange(size_t begin, size_t end, |
246 | llvm::function_ref<void(size_t, size_t)> func) { |
247 | while (begin < end) { |
248 | size_t mid = findBoundary(begin, end); |
249 | func(begin, mid); |
250 | begin = mid; |
251 | } |
252 | } |
253 | |
254 | // Split icfInputs into shards, then parallelize invocation of FUNC on subranges |
255 | // with matching equivalence class |
256 | void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { |
257 | // Only use threads when the benefits outweigh the overhead. |
258 | const size_t threadingThreshold = 1024; |
259 | if (icfInputs.size() < threadingThreshold) { |
260 | forEachClassRange(begin: 0, end: icfInputs.size(), func); |
261 | ++icfPass; |
262 | return; |
263 | } |
264 | |
265 | // Shard into non-overlapping intervals, and call FUNC in parallel. The |
266 | // sharding must be completed before any calls to FUNC are made so that FUNC |
267 | // can modify the InputSection in its shard without causing data races. |
268 | const size_t shards = 256; |
269 | size_t step = icfInputs.size() / shards; |
270 | size_t boundaries[shards + 1]; |
271 | boundaries[0] = 0; |
272 | boundaries[shards] = icfInputs.size(); |
273 | parallelFor(Begin: 1, End: shards, Fn: [&](size_t i) { |
274 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: icfInputs.size()); |
275 | }); |
276 | parallelFor(Begin: 1, End: shards + 1, Fn: [&](size_t i) { |
277 | if (boundaries[i - 1] < boundaries[i]) { |
278 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], func); |
279 | } |
280 | }); |
281 | ++icfPass; |
282 | } |
283 | |
284 | void ICF::run() { |
285 | // Into each origin-section hash, combine all reloc referent section hashes. |
286 | for (icfPass = 0; icfPass < 2; ++icfPass) { |
287 | parallelForEach(R&: icfInputs, Fn: [&](ConcatInputSection *isec) { |
288 | uint32_t hash = isec->icfEqClass[icfPass % 2]; |
289 | for (const Reloc &r : isec->relocs) { |
290 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
291 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
292 | if (defined->isec()) { |
293 | if (auto *referentIsec = |
294 | dyn_cast<ConcatInputSection>(Val: defined->isec())) |
295 | hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; |
296 | else |
297 | hash += defined->isec()->kind() + |
298 | defined->isec()->getOffset(off: defined->value); |
299 | } else { |
300 | hash += defined->value; |
301 | } |
302 | } else { |
303 | // ICF runs before Undefined diags |
304 | assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); |
305 | } |
306 | } |
307 | } |
308 | // Set MSB to 1 to avoid collisions with non-hashed classes. |
309 | isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); |
310 | }); |
311 | } |
312 | |
313 | llvm::stable_sort( |
314 | Range&: icfInputs, C: [](const ConcatInputSection *a, const ConcatInputSection *b) { |
315 | return a->icfEqClass[0] < b->icfEqClass[0]; |
316 | }); |
317 | forEachClass(func: [&](size_t begin, size_t end) { |
318 | segregate(begin, end, &ICF::equalsConstant); |
319 | }); |
320 | |
321 | // Split equivalence groups by comparing relocations until convergence |
322 | do { |
323 | icfRepeat = false; |
324 | forEachClass(func: [&](size_t begin, size_t end) { |
325 | segregate(begin, end, &ICF::equalsVariable); |
326 | }); |
327 | } while (icfRepeat); |
328 | log(msg: "ICF needed " + Twine(icfPass) + " iterations" ); |
329 | if (verboseDiagnostics) { |
330 | log(msg: "equalsConstant() called " + Twine(equalsConstantCount) + " times" ); |
331 | log(msg: "equalsVariable() called " + Twine(equalsVariableCount) + " times" ); |
332 | } |
333 | |
334 | // Fold sections within equivalence classes |
335 | forEachClass(func: [&](size_t begin, size_t end) { |
336 | if (end - begin < 2) |
337 | return; |
338 | ConcatInputSection *beginIsec = icfInputs[begin]; |
339 | for (size_t i = begin + 1; i < end; ++i) |
340 | beginIsec->foldIdentical(redundant: icfInputs[i]); |
341 | }); |
342 | } |
343 | |
344 | // Split an equivalence class into smaller classes. |
345 | void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { |
346 | while (begin < end) { |
347 | // Divide [begin, end) into two. Let mid be the start index of the |
348 | // second group. |
349 | auto bound = std::stable_partition( |
350 | first: icfInputs.begin() + begin + 1, last: icfInputs.begin() + end, |
351 | pred: [&](ConcatInputSection *isec) { |
352 | return (this->*equals)(icfInputs[begin], isec); |
353 | }); |
354 | size_t mid = bound - icfInputs.begin(); |
355 | |
356 | // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an |
357 | // equivalence class ID because every group ends with a unique index. |
358 | for (size_t i = begin; i < mid; ++i) |
359 | icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; |
360 | |
361 | // If we created a group, we need to iterate the main loop again. |
362 | if (mid != end) |
363 | icfRepeat = true; |
364 | |
365 | begin = mid; |
366 | } |
367 | } |
368 | |
369 | void macho::markSymAsAddrSig(Symbol *s) { |
370 | if (auto *d = dyn_cast_or_null<Defined>(Val: s)) |
371 | if (d->isec()) |
372 | d->isec()->keepUnique = true; |
373 | } |
374 | |
375 | void macho::markAddrSigSymbols() { |
376 | TimeTraceScope timeScope("Mark addrsig symbols" ); |
377 | for (InputFile *file : inputFiles) { |
378 | ObjFile *obj = dyn_cast<ObjFile>(Val: file); |
379 | if (!obj) |
380 | continue; |
381 | |
382 | Section *addrSigSection = obj->addrSigSection; |
383 | if (!addrSigSection) |
384 | continue; |
385 | assert(addrSigSection->subsections.size() == 1); |
386 | |
387 | const InputSection *isec = addrSigSection->subsections[0].isec; |
388 | |
389 | for (const Reloc &r : isec->relocs) { |
390 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) |
391 | markSymAsAddrSig(s: sym); |
392 | else |
393 | error(msg: toString(isec) + ": unexpected section relocation" ); |
394 | } |
395 | } |
396 | } |
397 | |
398 | void macho::foldIdenticalSections(bool onlyCfStrings) { |
399 | TimeTraceScope timeScope("Fold Identical Code Sections" ); |
400 | // The ICF equivalence-class segregation algorithm relies on pre-computed |
401 | // hashes of InputSection::data for the ConcatOutputSection::inputs and all |
402 | // sections referenced by their relocs. We could recursively traverse the |
403 | // relocs to find every referenced InputSection, but that precludes easy |
404 | // parallelization. Therefore, we hash every InputSection here where we have |
405 | // them all accessible as simple vectors. |
406 | |
407 | // If an InputSection is ineligible for ICF, we give it a unique ID to force |
408 | // it into an unfoldable singleton equivalence class. Begin the unique-ID |
409 | // space at inputSections.size(), so that it will never intersect with |
410 | // equivalence-class IDs which begin at 0. Since hashes & unique IDs never |
411 | // coexist with equivalence-class IDs, this is not necessary, but might help |
412 | // someone keep the numbers straight in case we ever need to debug the |
413 | // ICF::segregate() |
414 | std::vector<ConcatInputSection *> foldable; |
415 | uint64_t icfUniqueID = inputSections.size(); |
416 | for (ConcatInputSection *isec : inputSections) { |
417 | bool isFoldableWithAddendsRemoved = isCfStringSection(isec) || |
418 | isClassRefsSection(isec) || |
419 | isSelRefsSection(isec); |
420 | // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we |
421 | // can still fold it. |
422 | bool hasFoldableFlags = (isSelRefsSection(isec) || |
423 | sectionType(flags: isec->getFlags()) == MachO::S_REGULAR); |
424 | // FIXME: consider non-code __text sections as foldable? |
425 | bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) && |
426 | (isCodeSection(isec) || isFoldableWithAddendsRemoved || |
427 | isGccExceptTabSection(isec)) && |
428 | !isec->keepUnique && !isec->hasAltEntry && |
429 | !isec->shouldOmitFromOutput() && hasFoldableFlags; |
430 | if (isFoldable) { |
431 | foldable.push_back(x: isec); |
432 | for (Defined *d : isec->symbols) |
433 | if (d->unwindEntry()) |
434 | foldable.push_back(x: d->unwindEntry()); |
435 | |
436 | // Some sections have embedded addends that foil ICF's hashing / equality |
437 | // checks. (We can ignore embedded addends when doing ICF because the same |
438 | // information gets recorded in our Reloc structs.) We therefore create a |
439 | // mutable copy of the section data and zero out the embedded addends |
440 | // before performing any hashing / equality checks. |
441 | if (isFoldableWithAddendsRemoved) { |
442 | // We have to do this copying serially as the BumpPtrAllocator is not |
443 | // thread-safe. FIXME: Make a thread-safe allocator. |
444 | MutableArrayRef<uint8_t> copy = isec->data.copy(A&: bAlloc()); |
445 | for (const Reloc &r : isec->relocs) |
446 | target->relocateOne(loc: copy.data() + r.offset, r, /*va=*/0, |
447 | /*relocVA=*/0); |
448 | isec->data = copy; |
449 | } |
450 | } else if (!isEhFrameSection(isec)) { |
451 | // EH frames are gathered as foldables from unwindEntry above; give a |
452 | // unique ID to everything else. |
453 | isec->icfEqClass[0] = ++icfUniqueID; |
454 | } |
455 | } |
456 | parallelForEach(R&: foldable, Fn: [](ConcatInputSection *isec) { |
457 | assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! |
458 | // Turn-on the top bit to guarantee that valid hashes have no collisions |
459 | // with the small-integer unique IDs for ICF-ineligible sections |
460 | isec->icfEqClass[0] = xxh3_64bits(data: isec->data) | (1ull << 31); |
461 | }); |
462 | // Now that every input section is either hashed or marked as unique, run the |
463 | // segregation algorithm to detect foldable subsections. |
464 | ICF(foldable).run(); |
465 | } |
466 | |