1 | //===- ICF.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // ICF is short for Identical Code Folding. That is a size optimization to |
10 | // identify and merge two or more read-only sections (typically functions) |
11 | // that happened to have the same contents. It usually reduces output size |
12 | // by a few percent. |
13 | // |
14 | // On Windows, ICF is enabled by default. |
15 | // |
16 | // See ELF/ICF.cpp for the details about the algorithm. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "ICF.h" |
21 | #include "COFFLinkerContext.h" |
22 | #include "Chunks.h" |
23 | #include "Symbols.h" |
24 | #include "lld/Common/Timer.h" |
25 | #include "llvm/Support/Parallel.h" |
26 | #include "llvm/Support/TimeProfiler.h" |
27 | #include "llvm/Support/xxhash.h" |
28 | #include <algorithm> |
29 | #include <atomic> |
30 | #include <vector> |
31 | |
32 | using namespace llvm; |
33 | |
34 | namespace lld::coff { |
35 | |
36 | class ICF { |
37 | public: |
38 | ICF(COFFLinkerContext &c) : ctx(c){}; |
39 | void run(); |
40 | |
41 | private: |
42 | void segregate(size_t begin, size_t end, bool constant); |
43 | |
44 | bool assocEquals(const SectionChunk *a, const SectionChunk *b); |
45 | |
46 | bool equalsConstant(const SectionChunk *a, const SectionChunk *b); |
47 | bool equalsVariable(const SectionChunk *a, const SectionChunk *b); |
48 | |
49 | bool isEligible(SectionChunk *c); |
50 | |
51 | size_t findBoundary(size_t begin, size_t end); |
52 | |
53 | void forEachClassRange(size_t begin, size_t end, |
54 | std::function<void(size_t, size_t)> fn); |
55 | |
56 | void forEachClass(std::function<void(size_t, size_t)> fn); |
57 | |
58 | std::vector<SectionChunk *> chunks; |
59 | int cnt = 0; |
60 | std::atomic<bool> repeat = {false}; |
61 | |
62 | COFFLinkerContext &ctx; |
63 | }; |
64 | |
65 | // Returns true if section S is subject of ICF. |
66 | // |
67 | // Microsoft's documentation |
68 | // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April |
69 | // 2017) says that /opt:icf folds both functions and read-only data. |
70 | // Despite that, the MSVC linker folds only functions. We found |
71 | // a few instances of programs that are not safe for data merging. |
72 | // Therefore, we merge only functions just like the MSVC tool. However, we also |
73 | // merge read-only sections in a couple of cases where the address of the |
74 | // section is insignificant to the user program and the behaviour matches that |
75 | // of the Visual C++ linker. |
76 | bool ICF::isEligible(SectionChunk *c) { |
77 | // Non-comdat chunks, dead chunks, and writable chunks are not eligible. |
78 | bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; |
79 | if (!c->isCOMDAT() || !c->live || writable) |
80 | return false; |
81 | |
82 | // Under regular (not safe) ICF, all code sections are eligible. |
83 | if ((ctx.config.doICF == ICFLevel::All) && |
84 | c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) |
85 | return true; |
86 | |
87 | // .pdata and .xdata unwind info sections are eligible. |
88 | StringRef outSecName = c->getSectionName().split(Separator: '$').first; |
89 | if (outSecName == ".pdata" || outSecName == ".xdata" ) |
90 | return true; |
91 | |
92 | // So are vtables. |
93 | const char *itaniumVtablePrefix = |
94 | ctx.config.machine == I386 ? "__ZTV" : "_ZTV" ; |
95 | if (c->sym && (c->sym->getName().starts_with(Prefix: "??_7" ) || |
96 | c->sym->getName().starts_with(Prefix: itaniumVtablePrefix))) |
97 | return true; |
98 | |
99 | // Anything else not in an address-significance table is eligible. |
100 | return !c->keepUnique; |
101 | } |
102 | |
103 | // Split an equivalence class into smaller classes. |
104 | void ICF::segregate(size_t begin, size_t end, bool constant) { |
105 | while (begin < end) { |
106 | // Divide [Begin, End) into two. Let Mid be the start index of the |
107 | // second group. |
108 | auto bound = std::stable_partition( |
109 | first: chunks.begin() + begin + 1, last: chunks.begin() + end, pred: [&](SectionChunk *s) { |
110 | if (constant) |
111 | return equalsConstant(a: chunks[begin], b: s); |
112 | return equalsVariable(a: chunks[begin], b: s); |
113 | }); |
114 | size_t mid = bound - chunks.begin(); |
115 | |
116 | // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an |
117 | // equivalence class ID because every group ends with a unique index. |
118 | for (size_t i = begin; i < mid; ++i) |
119 | chunks[i]->eqClass[(cnt + 1) % 2] = mid; |
120 | |
121 | // If we created a group, we need to iterate the main loop again. |
122 | if (mid != end) |
123 | repeat = true; |
124 | |
125 | begin = mid; |
126 | } |
127 | } |
128 | |
129 | // Returns true if two sections' associative children are equal. |
130 | bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { |
131 | // Ignore associated metadata sections that don't participate in ICF, such as |
132 | // debug info and CFGuard metadata. |
133 | auto considerForICF = [](const SectionChunk &assoc) { |
134 | StringRef Name = assoc.getSectionName(); |
135 | return !(Name.starts_with(Prefix: ".debug" ) || Name == ".gfids$y" || |
136 | Name == ".giats$y" || Name == ".gljmp$y" ); |
137 | }; |
138 | auto ra = make_filter_range(Range: a->children(), Pred: considerForICF); |
139 | auto rb = make_filter_range(Range: b->children(), Pred: considerForICF); |
140 | return std::equal(first1: ra.begin(), last1: ra.end(), first2: rb.begin(), last2: rb.end(), |
141 | binary_pred: [&](const SectionChunk &ia, const SectionChunk &ib) { |
142 | return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2]; |
143 | }); |
144 | } |
145 | |
146 | // Compare "non-moving" part of two sections, namely everything |
147 | // except relocation targets. |
148 | bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { |
149 | if (a->relocsSize != b->relocsSize) |
150 | return false; |
151 | |
152 | // Compare relocations. |
153 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
154 | if (r1.Type != r2.Type || |
155 | r1.VirtualAddress != r2.VirtualAddress) { |
156 | return false; |
157 | } |
158 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
159 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
160 | if (b1 == b2) |
161 | return true; |
162 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
163 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
164 | return d1->getValue() == d2->getValue() && |
165 | d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
166 | return false; |
167 | }; |
168 | if (!std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
169 | first2: b->getRelocs().begin(), binary_pred: eq)) |
170 | return false; |
171 | |
172 | // Compare section attributes and contents. |
173 | return a->getOutputCharacteristics() == b->getOutputCharacteristics() && |
174 | a->getSectionName() == b->getSectionName() && |
175 | a->header->SizeOfRawData == b->header->SizeOfRawData && |
176 | a->checksum == b->checksum && a->getContents() == b->getContents() && |
177 | a->getMachine() == b->getMachine() && assocEquals(a, b); |
178 | } |
179 | |
180 | // Compare "moving" part of two sections, namely relocation targets. |
181 | bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) { |
182 | // Compare relocations. |
183 | auto eqSym = [&](Symbol *b1, Symbol *b2) { |
184 | if (b1 == b2) |
185 | return true; |
186 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
187 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
188 | return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
189 | return false; |
190 | }; |
191 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
192 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
193 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
194 | return eqSym(b1, b2); |
195 | }; |
196 | |
197 | Symbol *e1 = a->getEntryThunk(); |
198 | Symbol *e2 = b->getEntryThunk(); |
199 | if ((e1 || e2) && (!e1 || !e2 || !eqSym(e1, e2))) |
200 | return false; |
201 | |
202 | return std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
203 | first2: b->getRelocs().begin(), binary_pred: eq) && |
204 | assocEquals(a, b); |
205 | } |
206 | |
207 | // Find the first Chunk after Begin that has a different class from Begin. |
208 | size_t ICF::findBoundary(size_t begin, size_t end) { |
209 | for (size_t i = begin + 1; i < end; ++i) |
210 | if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2]) |
211 | return i; |
212 | return end; |
213 | } |
214 | |
215 | void ICF::forEachClassRange(size_t begin, size_t end, |
216 | std::function<void(size_t, size_t)> fn) { |
217 | while (begin < end) { |
218 | size_t mid = findBoundary(begin, end); |
219 | fn(begin, mid); |
220 | begin = mid; |
221 | } |
222 | } |
223 | |
224 | // Call Fn on each class group. |
225 | void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { |
226 | // If the number of sections are too small to use threading, |
227 | // call Fn sequentially. |
228 | if (chunks.size() < 1024) { |
229 | forEachClassRange(begin: 0, end: chunks.size(), fn); |
230 | ++cnt; |
231 | return; |
232 | } |
233 | |
234 | // Shard into non-overlapping intervals, and call Fn in parallel. |
235 | // The sharding must be completed before any calls to Fn are made |
236 | // so that Fn can modify the Chunks in its shard without causing data |
237 | // races. |
238 | const size_t numShards = 256; |
239 | size_t step = chunks.size() / numShards; |
240 | size_t boundaries[numShards + 1]; |
241 | boundaries[0] = 0; |
242 | boundaries[numShards] = chunks.size(); |
243 | parallelFor(Begin: 1, End: numShards, Fn: [&](size_t i) { |
244 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: chunks.size()); |
245 | }); |
246 | parallelFor(Begin: 1, End: numShards + 1, Fn: [&](size_t i) { |
247 | if (boundaries[i - 1] < boundaries[i]) { |
248 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], fn); |
249 | } |
250 | }); |
251 | ++cnt; |
252 | } |
253 | |
254 | // Merge identical COMDAT sections. |
255 | // Two sections are considered the same if their section headers, |
256 | // contents and relocations are all the same. |
257 | void ICF::run() { |
258 | llvm::TimeTraceScope timeScope("ICF" ); |
259 | ScopedTimer t(ctx.icfTimer); |
260 | |
261 | // Collect only mergeable sections and group by hash value. |
262 | uint32_t nextId = 1; |
263 | for (Chunk *c : ctx.driver.getChunks()) { |
264 | if (auto *sc = dyn_cast<SectionChunk>(Val: c)) { |
265 | if (isEligible(c: sc)) |
266 | chunks.push_back(x: sc); |
267 | else |
268 | sc->eqClass[0] = nextId++; |
269 | } |
270 | } |
271 | |
272 | // Make sure that ICF doesn't merge sections that are being handled by string |
273 | // tail merging. |
274 | for (MergeChunk *mc : ctx.mergeChunkInstances) |
275 | if (mc) |
276 | for (SectionChunk *sc : mc->sections) |
277 | sc->eqClass[0] = nextId++; |
278 | |
279 | // Initially, we use hash values to partition sections. |
280 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
281 | sc->eqClass[0] = xxh3_64bits(data: sc->getContents()); |
282 | }); |
283 | |
284 | // Combine the hashes of the sections referenced by each section into its |
285 | // hash. |
286 | for (unsigned cnt = 0; cnt != 2; ++cnt) { |
287 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
288 | uint32_t hash = sc->eqClass[cnt % 2]; |
289 | for (Symbol *b : sc->symbols()) |
290 | if (auto *sym = dyn_cast_or_null<DefinedRegular>(Val: b)) |
291 | hash += sym->getChunk()->eqClass[cnt % 2]; |
292 | // Set MSB to 1 to avoid collisions with non-hash classes. |
293 | sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); |
294 | }); |
295 | } |
296 | |
297 | // From now on, sections in Chunks are ordered so that sections in |
298 | // the same group are consecutive in the vector. |
299 | llvm::stable_sort(Range&: chunks, C: [](const SectionChunk *a, const SectionChunk *b) { |
300 | return a->eqClass[0] < b->eqClass[0]; |
301 | }); |
302 | |
303 | // Compare static contents and assign unique IDs for each static content. |
304 | forEachClass(fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: true); }); |
305 | |
306 | // Split groups by comparing relocations until convergence is obtained. |
307 | do { |
308 | repeat = false; |
309 | forEachClass( |
310 | fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: false); }); |
311 | } while (repeat); |
312 | |
313 | Log(ctx) << "ICF needed " << Twine(cnt) << " iterations" ; |
314 | |
315 | // Merge sections in the same classes. |
316 | forEachClass(fn: [&](size_t begin, size_t end) { |
317 | if (end - begin == 1) |
318 | return; |
319 | |
320 | Log(ctx) << "Selected " << chunks[begin]->getDebugName(); |
321 | for (size_t i = begin + 1; i < end; ++i) { |
322 | Log(ctx) << " Removed " << chunks[i]->getDebugName(); |
323 | chunks[begin]->replace(other: chunks[i]); |
324 | } |
325 | }); |
326 | } |
327 | |
328 | // Entry point to ICF. |
329 | void doICF(COFFLinkerContext &ctx) { ICF(ctx).run(); } |
330 | |
331 | } // namespace lld::coff |
332 | |