1 | //===- ICF.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ICF.h" |
10 | #include "ConcatOutputSection.h" |
11 | #include "Config.h" |
12 | #include "InputSection.h" |
13 | #include "SymbolTable.h" |
14 | #include "Symbols.h" |
15 | |
16 | #include "lld/Common/CommonLinkerContext.h" |
17 | #include "llvm/Support/Parallel.h" |
18 | #include "llvm/Support/TimeProfiler.h" |
19 | #include "llvm/Support/xxhash.h" |
20 | |
21 | #include <atomic> |
22 | |
23 | using namespace llvm; |
24 | using namespace lld; |
25 | using namespace lld::macho; |
26 | |
27 | static constexpr bool verboseDiagnostics = false; |
28 | // This counter is used to generate unique thunk names. |
29 | static uint64_t icfThunkCounter = 0; |
30 | |
31 | class ICF { |
32 | public: |
33 | ICF(std::vector<ConcatInputSection *> &inputs); |
34 | void run(); |
35 | |
36 | using EqualsFn = bool (ICF::*)(const ConcatInputSection *, |
37 | const ConcatInputSection *); |
38 | void segregate(size_t begin, size_t end, EqualsFn); |
39 | size_t findBoundary(size_t begin, size_t end); |
40 | void forEachClassRange(size_t begin, size_t end, |
41 | llvm::function_ref<void(size_t, size_t)> func); |
42 | void forEachClass(llvm::function_ref<void(size_t, size_t)> func); |
43 | |
44 | bool equalsConstant(const ConcatInputSection *ia, |
45 | const ConcatInputSection *ib); |
46 | bool equalsVariable(const ConcatInputSection *ia, |
47 | const ConcatInputSection *ib); |
48 | void applySafeThunksToRange(size_t begin, size_t end); |
49 | |
50 | // ICF needs a copy of the inputs vector because its equivalence-class |
51 | // segregation algorithm destroys the proper sequence. |
52 | std::vector<ConcatInputSection *> icfInputs; |
53 | |
54 | unsigned icfPass = 0; |
55 | std::atomic<bool> icfRepeat{false}; |
56 | std::atomic<uint64_t> equalsConstantCount{0}; |
57 | std::atomic<uint64_t> equalsVariableCount{0}; |
58 | }; |
59 | |
60 | ICF::ICF(std::vector<ConcatInputSection *> &inputs) { |
61 | icfInputs.assign(first: inputs.begin(), last: inputs.end()); |
62 | } |
63 | |
64 | // ICF = Identical Code Folding |
65 | // |
66 | // We only fold __TEXT,__text, so this is really "code" folding, and not |
67 | // "COMDAT" folding. String and scalar constant literals are deduplicated |
68 | // elsewhere. |
69 | // |
70 | // Summary of segments & sections: |
71 | // |
72 | // The __TEXT segment is readonly at the MMU. Some sections are already |
73 | // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are |
74 | // synthetic and inherently free of duplicates (__TEXT,__stubs & |
75 | // __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const, |
76 | // because doing so induces many test failures. |
77 | // |
78 | // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and |
79 | // thus ineligible for ICF. |
80 | // |
81 | // The __DATA_CONST segment is read/write at the MMU, but is logically const to |
82 | // the application after dyld applies fixups to pointer data. We currently |
83 | // fold only the __DATA_CONST,__cfstring section. |
84 | // |
85 | // The __DATA segment is read/write at the MMU, and as application-writeable |
86 | // data, none of its sections are eligible for ICF. |
87 | // |
88 | // Please see the large block comment in lld/ELF/ICF.cpp for an explanation |
89 | // of the segregation algorithm. |
90 | // |
91 | // FIXME(gkm): implement keep-unique attributes |
92 | // FIXME(gkm): implement address-significance tables for MachO object files |
93 | |
94 | // Compare "non-moving" parts of two ConcatInputSections, namely everything |
95 | // except references to other ConcatInputSections. |
96 | bool ICF::equalsConstant(const ConcatInputSection *ia, |
97 | const ConcatInputSection *ib) { |
98 | if (verboseDiagnostics) |
99 | ++equalsConstantCount; |
100 | // We can only fold within the same OutputSection. |
101 | if (ia->parent != ib->parent) |
102 | return false; |
103 | if (ia->data.size() != ib->data.size()) |
104 | return false; |
105 | if (ia->data != ib->data) |
106 | return false; |
107 | if (ia->relocs.size() != ib->relocs.size()) |
108 | return false; |
109 | auto f = [](const Reloc &ra, const Reloc &rb) { |
110 | if (ra.type != rb.type) |
111 | return false; |
112 | if (ra.pcrel != rb.pcrel) |
113 | return false; |
114 | if (ra.length != rb.length) |
115 | return false; |
116 | if (ra.offset != rb.offset) |
117 | return false; |
118 | if (isa<Symbol *>(Val: ra.referent) != isa<Symbol *>(Val: rb.referent)) |
119 | return false; |
120 | |
121 | InputSection *isecA, *isecB; |
122 | |
123 | uint64_t valueA = 0; |
124 | uint64_t valueB = 0; |
125 | if (isa<Symbol *>(Val: ra.referent)) { |
126 | const auto *sa = cast<Symbol *>(Val: ra.referent); |
127 | const auto *sb = cast<Symbol *>(Val: rb.referent); |
128 | if (sa->kind() != sb->kind()) |
129 | return false; |
130 | // ICF runs before Undefineds are treated (and potentially converted into |
131 | // DylibSymbols). |
132 | if (isa<DylibSymbol>(Val: sa) || isa<Undefined>(Val: sa)) |
133 | return sa == sb && ra.addend == rb.addend; |
134 | assert(isa<Defined>(sa)); |
135 | const auto *da = cast<Defined>(Val: sa); |
136 | const auto *db = cast<Defined>(Val: sb); |
137 | if (!da->isec() || !db->isec()) { |
138 | assert(da->isAbsolute() && db->isAbsolute()); |
139 | return da->value + ra.addend == db->value + rb.addend; |
140 | } |
141 | isecA = da->isec(); |
142 | valueA = da->value; |
143 | isecB = db->isec(); |
144 | valueB = db->value; |
145 | } else { |
146 | isecA = cast<InputSection *>(Val: ra.referent); |
147 | isecB = cast<InputSection *>(Val: rb.referent); |
148 | } |
149 | |
150 | // Typically, we should not encounter sections marked with `keepUnique` at |
151 | // this point as they would have resulted in different hashes and therefore |
152 | // no need for a full comparison. |
153 | // However, in `safe_thunks` mode, it's possible for two different |
154 | // relocations to reference identical `keepUnique` functions that will be |
155 | // distinguished later via thunks - so we need to handle this case |
156 | // explicitly. |
157 | if ((isecA != isecB) && ((isecA->keepUnique && isCodeSection(isecA)) || |
158 | (isecB->keepUnique && isCodeSection(isecB)))) |
159 | return false; |
160 | |
161 | if (isecA->parent != isecB->parent) |
162 | return false; |
163 | // Sections with identical parents should be of the same kind. |
164 | assert(isecA->kind() == isecB->kind()); |
165 | // We will compare ConcatInputSection contents in equalsVariable. |
166 | if (isa<ConcatInputSection>(Val: isecA)) |
167 | return ra.addend == rb.addend; |
168 | // Else we have two literal sections. References to them are equal iff their |
169 | // offsets in the output section are equal. |
170 | if (isa<Symbol *>(Val: ra.referent)) |
171 | // For symbol relocs, we compare the contents at the symbol address. We |
172 | // don't do `getOffset(value + addend)` because value + addend may not be |
173 | // a valid offset in the literal section. |
174 | return isecA->getOffset(off: valueA) == isecB->getOffset(off: valueB) && |
175 | ra.addend == rb.addend; |
176 | else { |
177 | assert(valueA == 0 && valueB == 0); |
178 | // For section relocs, we compare the content at the section offset. |
179 | return isecA->getOffset(off: ra.addend) == isecB->getOffset(off: rb.addend); |
180 | } |
181 | }; |
182 | return std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), |
183 | binary_pred: f); |
184 | } |
185 | |
186 | // Compare the "moving" parts of two ConcatInputSections -- i.e. everything not |
187 | // handled by equalsConstant(). |
188 | bool ICF::equalsVariable(const ConcatInputSection *ia, |
189 | const ConcatInputSection *ib) { |
190 | if (verboseDiagnostics) |
191 | ++equalsVariableCount; |
192 | assert(ia->relocs.size() == ib->relocs.size()); |
193 | auto f = [this](const Reloc &ra, const Reloc &rb) { |
194 | // We already filtered out mismatching values/addends in equalsConstant. |
195 | if (ra.referent == rb.referent) |
196 | return true; |
197 | const ConcatInputSection *isecA, *isecB; |
198 | if (isa<Symbol *>(Val: ra.referent)) { |
199 | // Matching DylibSymbols are already filtered out by the |
200 | // identical-referent check above. Non-matching DylibSymbols were filtered |
201 | // out in equalsConstant(). So we can safely cast to Defined here. |
202 | const auto *da = cast<Defined>(Val: cast<Symbol *>(Val: ra.referent)); |
203 | const auto *db = cast<Defined>(Val: cast<Symbol *>(Val: rb.referent)); |
204 | if (da->isAbsolute()) |
205 | return true; |
206 | isecA = dyn_cast<ConcatInputSection>(Val: da->isec()); |
207 | if (!isecA) |
208 | return true; // literal sections were checked in equalsConstant. |
209 | isecB = cast<ConcatInputSection>(Val: db->isec()); |
210 | } else { |
211 | const auto *sa = cast<InputSection *>(Val: ra.referent); |
212 | const auto *sb = cast<InputSection *>(Val: rb.referent); |
213 | isecA = dyn_cast<ConcatInputSection>(Val: sa); |
214 | if (!isecA) |
215 | return true; |
216 | isecB = cast<ConcatInputSection>(Val: sb); |
217 | } |
218 | return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2]; |
219 | }; |
220 | if (!std::equal(first1: ia->relocs.begin(), last1: ia->relocs.end(), first2: ib->relocs.begin(), binary_pred: f)) |
221 | return false; |
222 | |
223 | // If there are symbols with associated unwind info, check that the unwind |
224 | // info matches. For simplicity, we only handle the case where there are only |
225 | // symbols at offset zero within the section (which is typically the case with |
226 | // .subsections_via_symbols.) |
227 | auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; }; |
228 | const auto *itA = llvm::find_if(Range: ia->symbols, P: hasUnwind); |
229 | const auto *itB = llvm::find_if(Range: ib->symbols, P: hasUnwind); |
230 | if (itA == ia->symbols.end()) |
231 | return itB == ib->symbols.end(); |
232 | if (itB == ib->symbols.end()) |
233 | return false; |
234 | const Defined *da = *itA; |
235 | const Defined *db = *itB; |
236 | if (da->unwindEntry()->icfEqClass[icfPass % 2] != |
237 | db->unwindEntry()->icfEqClass[icfPass % 2] || |
238 | da->value != 0 || db->value != 0) |
239 | return false; |
240 | auto isZero = [](Defined *d) { return d->value == 0; }; |
241 | return std::find_if_not(first: std::next(x: itA), last: ia->symbols.end(), pred: isZero) == |
242 | ia->symbols.end() && |
243 | std::find_if_not(first: std::next(x: itB), last: ib->symbols.end(), pred: isZero) == |
244 | ib->symbols.end(); |
245 | } |
246 | |
247 | // Find the first InputSection after BEGIN whose equivalence class differs |
248 | size_t ICF::findBoundary(size_t begin, size_t end) { |
249 | uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2]; |
250 | for (size_t i = begin + 1; i < end; ++i) |
251 | if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2]) |
252 | return i; |
253 | return end; |
254 | } |
255 | |
256 | // Invoke FUNC on subranges with matching equivalence class |
257 | void ICF::forEachClassRange(size_t begin, size_t end, |
258 | llvm::function_ref<void(size_t, size_t)> func) { |
259 | while (begin < end) { |
260 | size_t mid = findBoundary(begin, end); |
261 | func(begin, mid); |
262 | begin = mid; |
263 | } |
264 | } |
265 | |
266 | // Find or create a symbol at offset 0 in the given section |
267 | static Symbol *getThunkTargetSymbol(ConcatInputSection *isec) { |
268 | for (Symbol *sym : isec->symbols) |
269 | if (auto *d = dyn_cast<Defined>(Val: sym)) |
270 | if (d->value == 0) |
271 | return sym; |
272 | |
273 | std::string thunkName; |
274 | if (isec->symbols.size() == 0) |
275 | thunkName = isec->getName().str() + ".icf.0" ; |
276 | else |
277 | thunkName = isec->getName().str() + "icf.thunk.target" + |
278 | std::to_string(val: icfThunkCounter++); |
279 | |
280 | // If no symbol found at offset 0, create one |
281 | auto *sym = make<Defined>(args&: thunkName, /*file=*/args: nullptr, args&: isec, |
282 | /*value=*/args: 0, /*size=*/args: isec->getSize(), |
283 | /*isWeakDef=*/args: false, /*isExternal=*/args: false, |
284 | /*isPrivateExtern=*/args: false, /*isThumb=*/args: false, |
285 | /*isReferencedDynamically=*/args: false, |
286 | /*noDeadStrip=*/args: false); |
287 | isec->symbols.push_back(NewVal: sym); |
288 | return sym; |
289 | } |
290 | |
291 | // Given a range of identical icfInputs, replace address significant functions |
292 | // with a thunk that is just a direct branch to the first function in the |
293 | // series. This way we keep only one main body of the function but we still |
294 | // retain the address uniqueness of relevant functions by having them be a |
295 | // direct branch thunk rather than containing a full copy of the actual function |
296 | // body. |
297 | void ICF::applySafeThunksToRange(size_t begin, size_t end) { |
298 | // When creating a unique ICF thunk, use the first section as the section that |
299 | // all thunks will branch to. |
300 | ConcatInputSection *masterIsec = icfInputs[begin]; |
301 | |
302 | // If the first section is not address significant, sorting guarantees that |
303 | // there are no address significant functions. So we can skip this range. |
304 | if (!masterIsec->keepUnique) |
305 | return; |
306 | |
307 | // Skip anything that is not a code section. |
308 | if (!isCodeSection(masterIsec)) |
309 | return; |
310 | |
311 | // If the functions we're dealing with are smaller than the thunk size, then |
312 | // just leave them all as-is - creating thunks would be a net loss. |
313 | uint32_t thunkSize = target->getICFSafeThunkSize(); |
314 | if (masterIsec->data.size() <= thunkSize) |
315 | return; |
316 | |
317 | // Get the symbol that all thunks will branch to. |
318 | Symbol *masterSym = getThunkTargetSymbol(isec: masterIsec); |
319 | |
320 | for (size_t i = begin + 1; i < end; ++i) { |
321 | ConcatInputSection *isec = icfInputs[i]; |
322 | // When we're done processing keepUnique entries, we can stop. Sorting |
323 | // guaratees that all keepUnique will be at the front. |
324 | if (!isec->keepUnique) |
325 | break; |
326 | |
327 | ConcatInputSection *thunk = |
328 | makeSyntheticInputSection(segName: isec->getSegName(), sectName: isec->getName()); |
329 | addInputSection(inputSection: thunk); |
330 | |
331 | target->initICFSafeThunkBody(thunk, targetSym: masterSym); |
332 | thunk->foldIdentical(redundant: isec, foldKind: Symbol::ICFFoldKind::Thunk); |
333 | |
334 | // Since we're folding the target function into a thunk, we need to adjust |
335 | // the symbols that now got relocated from the target function to the thunk. |
336 | // Since the thunk is only one branch, we move all symbols to offset 0 and |
337 | // make sure that the size of all non-zero-size symbols is equal to the size |
338 | // of the branch. |
339 | for (auto *sym : thunk->symbols) { |
340 | sym->value = 0; |
341 | if (sym->size != 0) |
342 | sym->size = thunkSize; |
343 | } |
344 | } |
345 | } |
346 | |
347 | // Split icfInputs into shards, then parallelize invocation of FUNC on subranges |
348 | // with matching equivalence class |
349 | void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) { |
350 | // Only use threads when the benefits outweigh the overhead. |
351 | const size_t threadingThreshold = 1024; |
352 | if (icfInputs.size() < threadingThreshold) { |
353 | forEachClassRange(begin: 0, end: icfInputs.size(), func); |
354 | ++icfPass; |
355 | return; |
356 | } |
357 | |
358 | // Shard into non-overlapping intervals, and call FUNC in parallel. The |
359 | // sharding must be completed before any calls to FUNC are made so that FUNC |
360 | // can modify the InputSection in its shard without causing data races. |
361 | const size_t shards = 256; |
362 | size_t step = icfInputs.size() / shards; |
363 | size_t boundaries[shards + 1]; |
364 | boundaries[0] = 0; |
365 | boundaries[shards] = icfInputs.size(); |
366 | parallelFor(Begin: 1, End: shards, Fn: [&](size_t i) { |
367 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: icfInputs.size()); |
368 | }); |
369 | parallelFor(Begin: 1, End: shards + 1, Fn: [&](size_t i) { |
370 | if (boundaries[i - 1] < boundaries[i]) { |
371 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], func); |
372 | } |
373 | }); |
374 | ++icfPass; |
375 | } |
376 | |
377 | void ICF::run() { |
378 | // Into each origin-section hash, combine all reloc referent section hashes. |
379 | for (icfPass = 0; icfPass < 2; ++icfPass) { |
380 | parallelForEach(R&: icfInputs, Fn: [&](ConcatInputSection *isec) { |
381 | uint32_t hash = isec->icfEqClass[icfPass % 2]; |
382 | for (const Reloc &r : isec->relocs) { |
383 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) { |
384 | if (auto *defined = dyn_cast<Defined>(Val: sym)) { |
385 | if (defined->isec()) { |
386 | if (auto *referentIsec = |
387 | dyn_cast<ConcatInputSection>(Val: defined->isec())) |
388 | hash += defined->value + referentIsec->icfEqClass[icfPass % 2]; |
389 | else |
390 | hash += defined->isec()->kind() + |
391 | defined->isec()->getOffset(off: defined->value); |
392 | } else { |
393 | hash += defined->value; |
394 | } |
395 | } else { |
396 | // ICF runs before Undefined diags |
397 | assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym)); |
398 | } |
399 | } |
400 | } |
401 | // Set MSB to 1 to avoid collisions with non-hashed classes. |
402 | isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31); |
403 | }); |
404 | } |
405 | |
406 | llvm::stable_sort( |
407 | Range&: icfInputs, C: [](const ConcatInputSection *a, const ConcatInputSection *b) { |
408 | // When using safe_thunks, ensure that we first sort by icfEqClass and |
409 | // then by keepUnique (descending). This guarantees that within an |
410 | // equivalence class, the keepUnique inputs are always first. |
411 | if (config->icfLevel == ICFLevel::safe_thunks) |
412 | if (a->icfEqClass[0] == b->icfEqClass[0]) |
413 | return a->keepUnique > b->keepUnique; |
414 | return a->icfEqClass[0] < b->icfEqClass[0]; |
415 | }); |
416 | forEachClass(func: [&](size_t begin, size_t end) { |
417 | segregate(begin, end, &ICF::equalsConstant); |
418 | }); |
419 | |
420 | // Split equivalence groups by comparing relocations until convergence |
421 | do { |
422 | icfRepeat = false; |
423 | forEachClass(func: [&](size_t begin, size_t end) { |
424 | segregate(begin, end, &ICF::equalsVariable); |
425 | }); |
426 | } while (icfRepeat); |
427 | log(msg: "ICF needed " + Twine(icfPass) + " iterations" ); |
428 | if (verboseDiagnostics) { |
429 | log(msg: "equalsConstant() called " + Twine(equalsConstantCount) + " times" ); |
430 | log(msg: "equalsVariable() called " + Twine(equalsVariableCount) + " times" ); |
431 | } |
432 | |
433 | // When using safe_thunks, we need to create thunks for all keepUnique |
434 | // functions that can be deduplicated. Since we're creating / adding new |
435 | // InputSections, we can't paralellize this. |
436 | if (config->icfLevel == ICFLevel::safe_thunks) |
437 | forEachClassRange(begin: 0, end: icfInputs.size(), func: [&](size_t begin, size_t end) { |
438 | applySafeThunksToRange(begin, end); |
439 | }); |
440 | |
441 | // Fold sections within equivalence classes |
442 | forEachClass(func: [&](size_t begin, size_t end) { |
443 | if (end - begin < 2) |
444 | return; |
445 | bool useSafeThunks = config->icfLevel == ICFLevel::safe_thunks; |
446 | |
447 | // For ICF level safe_thunks, replace keepUnique function bodies with |
448 | // thunks. For all other ICF levles, directly merge the functions. |
449 | |
450 | ConcatInputSection *beginIsec = icfInputs[begin]; |
451 | for (size_t i = begin + 1; i < end; ++i) { |
452 | // Skip keepUnique inputs when using safe_thunks (already handeled above) |
453 | if (useSafeThunks && icfInputs[i]->keepUnique) { |
454 | // Assert keepUnique sections are either small or replaced with thunks. |
455 | assert(!icfInputs[i]->live || |
456 | icfInputs[i]->data.size() <= target->getICFSafeThunkSize()); |
457 | assert(!icfInputs[i]->replacement || |
458 | icfInputs[i]->replacement->data.size() == |
459 | target->getICFSafeThunkSize()); |
460 | continue; |
461 | } |
462 | beginIsec->foldIdentical(redundant: icfInputs[i]); |
463 | } |
464 | }); |
465 | } |
466 | |
467 | // Split an equivalence class into smaller classes. |
468 | void ICF::segregate(size_t begin, size_t end, EqualsFn equals) { |
469 | while (begin < end) { |
470 | // Divide [begin, end) into two. Let mid be the start index of the |
471 | // second group. |
472 | auto bound = std::stable_partition( |
473 | first: icfInputs.begin() + begin + 1, last: icfInputs.begin() + end, |
474 | pred: [&](ConcatInputSection *isec) { |
475 | return (this->*equals)(icfInputs[begin], isec); |
476 | }); |
477 | size_t mid = bound - icfInputs.begin(); |
478 | |
479 | // Split [begin, end) into [begin, mid) and [mid, end). We use mid as an |
480 | // equivalence class ID because every group ends with a unique index. |
481 | for (size_t i = begin; i < mid; ++i) |
482 | icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid; |
483 | |
484 | // If we created a group, we need to iterate the main loop again. |
485 | if (mid != end) |
486 | icfRepeat = true; |
487 | |
488 | begin = mid; |
489 | } |
490 | } |
491 | |
492 | void macho::markSymAsAddrSig(Symbol *s) { |
493 | if (auto *d = dyn_cast_or_null<Defined>(Val: s)) |
494 | if (d->isec()) |
495 | d->isec()->keepUnique = true; |
496 | } |
497 | |
498 | void macho::markAddrSigSymbols() { |
499 | TimeTraceScope timeScope("Mark addrsig symbols" ); |
500 | for (InputFile *file : inputFiles) { |
501 | ObjFile *obj = dyn_cast<ObjFile>(Val: file); |
502 | if (!obj) |
503 | continue; |
504 | |
505 | Section *addrSigSection = obj->addrSigSection; |
506 | if (!addrSigSection) |
507 | continue; |
508 | assert(addrSigSection->subsections.size() == 1); |
509 | |
510 | const InputSection *isec = addrSigSection->subsections[0].isec; |
511 | |
512 | for (const Reloc &r : isec->relocs) { |
513 | if (auto *sym = r.referent.dyn_cast<Symbol *>()) |
514 | markSymAsAddrSig(s: sym); |
515 | else |
516 | error(msg: toString(isec) + ": unexpected section relocation" ); |
517 | } |
518 | } |
519 | } |
520 | |
521 | // Given a symbol that was folded into a thunk, return the symbol pointing to |
522 | // the actual body of the function. We use this approach rather than storing the |
523 | // needed info in the Defined itself in order to minimize memory usage. |
524 | Defined *macho::getBodyForThunkFoldedSym(Defined *foldedSym) { |
525 | assert(isa<ConcatInputSection>(foldedSym->originalIsec) && |
526 | "thunk-folded ICF symbol expected to be on a ConcatInputSection" ); |
527 | // foldedSec is the InputSection that was marked as deleted upon fold |
528 | ConcatInputSection *foldedSec = |
529 | cast<ConcatInputSection>(Val: foldedSym->originalIsec); |
530 | |
531 | // thunkBody is the actual live thunk, containing the code that branches to |
532 | // the actual body of the function. |
533 | InputSection *thunkBody = foldedSec->replacement; |
534 | |
535 | // The symbol of the merged body of the function that the thunk jumps to. This |
536 | // will end up in the final binary. |
537 | Symbol *targetSym = target->getThunkBranchTarget(thunk: thunkBody); |
538 | |
539 | return cast<Defined>(Val: targetSym); |
540 | } |
541 | void macho::foldIdenticalSections(bool onlyCfStrings) { |
542 | TimeTraceScope timeScope("Fold Identical Code Sections" ); |
543 | // The ICF equivalence-class segregation algorithm relies on pre-computed |
544 | // hashes of InputSection::data for the ConcatOutputSection::inputs and all |
545 | // sections referenced by their relocs. We could recursively traverse the |
546 | // relocs to find every referenced InputSection, but that precludes easy |
547 | // parallelization. Therefore, we hash every InputSection here where we have |
548 | // them all accessible as simple vectors. |
549 | |
550 | // If an InputSection is ineligible for ICF, we give it a unique ID to force |
551 | // it into an unfoldable singleton equivalence class. Begin the unique-ID |
552 | // space at inputSections.size(), so that it will never intersect with |
553 | // equivalence-class IDs which begin at 0. Since hashes & unique IDs never |
554 | // coexist with equivalence-class IDs, this is not necessary, but might help |
555 | // someone keep the numbers straight in case we ever need to debug the |
556 | // ICF::segregate() |
557 | std::vector<ConcatInputSection *> foldable; |
558 | uint64_t icfUniqueID = inputSections.size(); |
559 | // Reset the thunk counter for each run of ICF. |
560 | icfThunkCounter = 0; |
561 | for (ConcatInputSection *isec : inputSections) { |
562 | bool isFoldableWithAddendsRemoved = isCfStringSection(isec) || |
563 | isClassRefsSection(isec) || |
564 | isSelRefsSection(isec); |
565 | // NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we |
566 | // can still fold it. |
567 | bool hasFoldableFlags = (isSelRefsSection(isec) || |
568 | sectionType(flags: isec->getFlags()) == MachO::S_REGULAR); |
569 | |
570 | bool isCodeSec = isCodeSection(isec); |
571 | |
572 | // When keepUnique is true, the section is not foldable. Unless we are at |
573 | // icf level safe_thunks, in which case we still want to fold code sections. |
574 | // When using safe_thunks we'll apply the safe_thunks logic at merge time |
575 | // based on the 'keepUnique' flag. |
576 | bool noUniqueRequirement = |
577 | !isec->keepUnique || |
578 | ((config->icfLevel == ICFLevel::safe_thunks) && isCodeSec); |
579 | |
580 | // FIXME: consider non-code __text sections as foldable? |
581 | bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) && |
582 | (isCodeSec || isFoldableWithAddendsRemoved || |
583 | isGccExceptTabSection(isec)) && |
584 | noUniqueRequirement && !isec->hasAltEntry && |
585 | !isec->shouldOmitFromOutput() && hasFoldableFlags; |
586 | if (isFoldable) { |
587 | foldable.push_back(x: isec); |
588 | for (Defined *d : isec->symbols) |
589 | if (d->unwindEntry()) |
590 | foldable.push_back(x: d->unwindEntry()); |
591 | |
592 | // Some sections have embedded addends that foil ICF's hashing / equality |
593 | // checks. (We can ignore embedded addends when doing ICF because the same |
594 | // information gets recorded in our Reloc structs.) We therefore create a |
595 | // mutable copy of the section data and zero out the embedded addends |
596 | // before performing any hashing / equality checks. |
597 | if (isFoldableWithAddendsRemoved) { |
598 | // We have to do this copying serially as the BumpPtrAllocator is not |
599 | // thread-safe. FIXME: Make a thread-safe allocator. |
600 | MutableArrayRef<uint8_t> copy = isec->data.copy(A&: bAlloc()); |
601 | for (const Reloc &r : isec->relocs) |
602 | target->relocateOne(loc: copy.data() + r.offset, r, /*va=*/0, |
603 | /*relocVA=*/0); |
604 | isec->data = copy; |
605 | } |
606 | } else if (!isEhFrameSection(isec)) { |
607 | // EH frames are gathered as foldables from unwindEntry above; give a |
608 | // unique ID to everything else. |
609 | isec->icfEqClass[0] = ++icfUniqueID; |
610 | } |
611 | } |
612 | parallelForEach(R&: foldable, Fn: [](ConcatInputSection *isec) { |
613 | assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID! |
614 | // Turn-on the top bit to guarantee that valid hashes have no collisions |
615 | // with the small-integer unique IDs for ICF-ineligible sections |
616 | isec->icfEqClass[0] = xxh3_64bits(data: isec->data) | (1ull << 31); |
617 | }); |
618 | // Now that every input section is either hashed or marked as unique, run the |
619 | // segregation algorithm to detect foldable subsections. |
620 | ICF(foldable).run(); |
621 | } |
622 | |