1 | //===- ICF.cpp ------------------------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // ICF is short for Identical Code Folding. That is a size optimization to |
10 | // identify and merge two or more read-only sections (typically functions) |
11 | // that happened to have the same contents. It usually reduces output size |
12 | // by a few percent. |
13 | // |
14 | // On Windows, ICF is enabled by default. |
15 | // |
16 | // See ELF/ICF.cpp for the details about the algorithm. |
17 | // |
18 | //===----------------------------------------------------------------------===// |
19 | |
20 | #include "ICF.h" |
21 | #include "COFFLinkerContext.h" |
22 | #include "Chunks.h" |
23 | #include "Symbols.h" |
24 | #include "lld/Common/ErrorHandler.h" |
25 | #include "lld/Common/Timer.h" |
26 | #include "llvm/ADT/Hashing.h" |
27 | #include "llvm/Support/Debug.h" |
28 | #include "llvm/Support/Parallel.h" |
29 | #include "llvm/Support/TimeProfiler.h" |
30 | #include "llvm/Support/raw_ostream.h" |
31 | #include "llvm/Support/xxhash.h" |
32 | #include <algorithm> |
33 | #include <atomic> |
34 | #include <vector> |
35 | |
36 | using namespace llvm; |
37 | |
38 | namespace lld::coff { |
39 | |
40 | class ICF { |
41 | public: |
42 | ICF(COFFLinkerContext &c) : ctx(c){}; |
43 | void run(); |
44 | |
45 | private: |
46 | void segregate(size_t begin, size_t end, bool constant); |
47 | |
48 | bool assocEquals(const SectionChunk *a, const SectionChunk *b); |
49 | |
50 | bool equalsConstant(const SectionChunk *a, const SectionChunk *b); |
51 | bool equalsVariable(const SectionChunk *a, const SectionChunk *b); |
52 | |
53 | bool isEligible(SectionChunk *c); |
54 | |
55 | size_t findBoundary(size_t begin, size_t end); |
56 | |
57 | void forEachClassRange(size_t begin, size_t end, |
58 | std::function<void(size_t, size_t)> fn); |
59 | |
60 | void forEachClass(std::function<void(size_t, size_t)> fn); |
61 | |
62 | std::vector<SectionChunk *> chunks; |
63 | int cnt = 0; |
64 | std::atomic<bool> repeat = {false}; |
65 | |
66 | COFFLinkerContext &ctx; |
67 | }; |
68 | |
69 | // Returns true if section S is subject of ICF. |
70 | // |
71 | // Microsoft's documentation |
72 | // (https://msdn.microsoft.com/en-us/library/bxwfs976.aspx; visited April |
73 | // 2017) says that /opt:icf folds both functions and read-only data. |
74 | // Despite that, the MSVC linker folds only functions. We found |
75 | // a few instances of programs that are not safe for data merging. |
76 | // Therefore, we merge only functions just like the MSVC tool. However, we also |
77 | // merge read-only sections in a couple of cases where the address of the |
78 | // section is insignificant to the user program and the behaviour matches that |
79 | // of the Visual C++ linker. |
80 | bool ICF::isEligible(SectionChunk *c) { |
81 | // Non-comdat chunks, dead chunks, and writable chunks are not eligible. |
82 | bool writable = c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; |
83 | if (!c->isCOMDAT() || !c->live || writable) |
84 | return false; |
85 | |
86 | // Under regular (not safe) ICF, all code sections are eligible. |
87 | if ((ctx.config.doICF == ICFLevel::All) && |
88 | c->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) |
89 | return true; |
90 | |
91 | // .pdata and .xdata unwind info sections are eligible. |
92 | StringRef outSecName = c->getSectionName().split(Separator: '$').first; |
93 | if (outSecName == ".pdata" || outSecName == ".xdata" ) |
94 | return true; |
95 | |
96 | // So are vtables. |
97 | const char *itaniumVtablePrefix = |
98 | ctx.config.machine == I386 ? "__ZTV" : "_ZTV" ; |
99 | if (c->sym && (c->sym->getName().starts_with(Prefix: "??_7" ) || |
100 | c->sym->getName().starts_with(Prefix: itaniumVtablePrefix))) |
101 | return true; |
102 | |
103 | // Anything else not in an address-significance table is eligible. |
104 | return !c->keepUnique; |
105 | } |
106 | |
107 | // Split an equivalence class into smaller classes. |
108 | void ICF::segregate(size_t begin, size_t end, bool constant) { |
109 | while (begin < end) { |
110 | // Divide [Begin, End) into two. Let Mid be the start index of the |
111 | // second group. |
112 | auto bound = std::stable_partition( |
113 | first: chunks.begin() + begin + 1, last: chunks.begin() + end, pred: [&](SectionChunk *s) { |
114 | if (constant) |
115 | return equalsConstant(a: chunks[begin], b: s); |
116 | return equalsVariable(a: chunks[begin], b: s); |
117 | }); |
118 | size_t mid = bound - chunks.begin(); |
119 | |
120 | // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an |
121 | // equivalence class ID because every group ends with a unique index. |
122 | for (size_t i = begin; i < mid; ++i) |
123 | chunks[i]->eqClass[(cnt + 1) % 2] = mid; |
124 | |
125 | // If we created a group, we need to iterate the main loop again. |
126 | if (mid != end) |
127 | repeat = true; |
128 | |
129 | begin = mid; |
130 | } |
131 | } |
132 | |
133 | // Returns true if two sections' associative children are equal. |
134 | bool ICF::assocEquals(const SectionChunk *a, const SectionChunk *b) { |
135 | // Ignore associated metadata sections that don't participate in ICF, such as |
136 | // debug info and CFGuard metadata. |
137 | auto considerForICF = [](const SectionChunk &assoc) { |
138 | StringRef Name = assoc.getSectionName(); |
139 | return !(Name.starts_with(Prefix: ".debug" ) || Name == ".gfids$y" || |
140 | Name == ".giats$y" || Name == ".gljmp$y" ); |
141 | }; |
142 | auto ra = make_filter_range(Range: a->children(), Pred: considerForICF); |
143 | auto rb = make_filter_range(Range: b->children(), Pred: considerForICF); |
144 | return std::equal(first1: ra.begin(), last1: ra.end(), first2: rb.begin(), last2: rb.end(), |
145 | binary_pred: [&](const SectionChunk &ia, const SectionChunk &ib) { |
146 | return ia.eqClass[cnt % 2] == ib.eqClass[cnt % 2]; |
147 | }); |
148 | } |
149 | |
150 | // Compare "non-moving" part of two sections, namely everything |
151 | // except relocation targets. |
152 | bool ICF::equalsConstant(const SectionChunk *a, const SectionChunk *b) { |
153 | if (a->relocsSize != b->relocsSize) |
154 | return false; |
155 | |
156 | // Compare relocations. |
157 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
158 | if (r1.Type != r2.Type || |
159 | r1.VirtualAddress != r2.VirtualAddress) { |
160 | return false; |
161 | } |
162 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
163 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
164 | if (b1 == b2) |
165 | return true; |
166 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
167 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
168 | return d1->getValue() == d2->getValue() && |
169 | d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
170 | return false; |
171 | }; |
172 | if (!std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
173 | first2: b->getRelocs().begin(), binary_pred: eq)) |
174 | return false; |
175 | |
176 | // Compare section attributes and contents. |
177 | return a->getOutputCharacteristics() == b->getOutputCharacteristics() && |
178 | a->getSectionName() == b->getSectionName() && |
179 | a->header->SizeOfRawData == b->header->SizeOfRawData && |
180 | a->checksum == b->checksum && a->getContents() == b->getContents() && |
181 | a->getMachine() == b->getMachine() && assocEquals(a, b); |
182 | } |
183 | |
184 | // Compare "moving" part of two sections, namely relocation targets. |
185 | bool ICF::equalsVariable(const SectionChunk *a, const SectionChunk *b) { |
186 | // Compare relocations. |
187 | auto eq = [&](const coff_relocation &r1, const coff_relocation &r2) { |
188 | Symbol *b1 = a->file->getSymbol(symbolIndex: r1.SymbolTableIndex); |
189 | Symbol *b2 = b->file->getSymbol(symbolIndex: r2.SymbolTableIndex); |
190 | if (b1 == b2) |
191 | return true; |
192 | if (auto *d1 = dyn_cast<DefinedRegular>(Val: b1)) |
193 | if (auto *d2 = dyn_cast<DefinedRegular>(Val: b2)) |
194 | return d1->getChunk()->eqClass[cnt % 2] == d2->getChunk()->eqClass[cnt % 2]; |
195 | return false; |
196 | }; |
197 | return std::equal(first1: a->getRelocs().begin(), last1: a->getRelocs().end(), |
198 | first2: b->getRelocs().begin(), binary_pred: eq) && |
199 | assocEquals(a, b); |
200 | } |
201 | |
202 | // Find the first Chunk after Begin that has a different class from Begin. |
203 | size_t ICF::findBoundary(size_t begin, size_t end) { |
204 | for (size_t i = begin + 1; i < end; ++i) |
205 | if (chunks[begin]->eqClass[cnt % 2] != chunks[i]->eqClass[cnt % 2]) |
206 | return i; |
207 | return end; |
208 | } |
209 | |
210 | void ICF::forEachClassRange(size_t begin, size_t end, |
211 | std::function<void(size_t, size_t)> fn) { |
212 | while (begin < end) { |
213 | size_t mid = findBoundary(begin, end); |
214 | fn(begin, mid); |
215 | begin = mid; |
216 | } |
217 | } |
218 | |
219 | // Call Fn on each class group. |
220 | void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { |
221 | // If the number of sections are too small to use threading, |
222 | // call Fn sequentially. |
223 | if (chunks.size() < 1024) { |
224 | forEachClassRange(begin: 0, end: chunks.size(), fn); |
225 | ++cnt; |
226 | return; |
227 | } |
228 | |
229 | // Shard into non-overlapping intervals, and call Fn in parallel. |
230 | // The sharding must be completed before any calls to Fn are made |
231 | // so that Fn can modify the Chunks in its shard without causing data |
232 | // races. |
233 | const size_t numShards = 256; |
234 | size_t step = chunks.size() / numShards; |
235 | size_t boundaries[numShards + 1]; |
236 | boundaries[0] = 0; |
237 | boundaries[numShards] = chunks.size(); |
238 | parallelFor(Begin: 1, End: numShards, Fn: [&](size_t i) { |
239 | boundaries[i] = findBoundary(begin: (i - 1) * step, end: chunks.size()); |
240 | }); |
241 | parallelFor(Begin: 1, End: numShards + 1, Fn: [&](size_t i) { |
242 | if (boundaries[i - 1] < boundaries[i]) { |
243 | forEachClassRange(begin: boundaries[i - 1], end: boundaries[i], fn); |
244 | } |
245 | }); |
246 | ++cnt; |
247 | } |
248 | |
249 | // Merge identical COMDAT sections. |
250 | // Two sections are considered the same if their section headers, |
251 | // contents and relocations are all the same. |
252 | void ICF::run() { |
253 | llvm::TimeTraceScope timeScope("ICF" ); |
254 | ScopedTimer t(ctx.icfTimer); |
255 | |
256 | // Collect only mergeable sections and group by hash value. |
257 | uint32_t nextId = 1; |
258 | for (Chunk *c : ctx.symtab.getChunks()) { |
259 | if (auto *sc = dyn_cast<SectionChunk>(Val: c)) { |
260 | if (isEligible(c: sc)) |
261 | chunks.push_back(x: sc); |
262 | else |
263 | sc->eqClass[0] = nextId++; |
264 | } |
265 | } |
266 | |
267 | // Make sure that ICF doesn't merge sections that are being handled by string |
268 | // tail merging. |
269 | for (MergeChunk *mc : ctx.mergeChunkInstances) |
270 | if (mc) |
271 | for (SectionChunk *sc : mc->sections) |
272 | sc->eqClass[0] = nextId++; |
273 | |
274 | // Initially, we use hash values to partition sections. |
275 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
276 | sc->eqClass[0] = xxh3_64bits(data: sc->getContents()); |
277 | }); |
278 | |
279 | // Combine the hashes of the sections referenced by each section into its |
280 | // hash. |
281 | for (unsigned cnt = 0; cnt != 2; ++cnt) { |
282 | parallelForEach(R&: chunks, Fn: [&](SectionChunk *sc) { |
283 | uint32_t hash = sc->eqClass[cnt % 2]; |
284 | for (Symbol *b : sc->symbols()) |
285 | if (auto *sym = dyn_cast_or_null<DefinedRegular>(Val: b)) |
286 | hash += sym->getChunk()->eqClass[cnt % 2]; |
287 | // Set MSB to 1 to avoid collisions with non-hash classes. |
288 | sc->eqClass[(cnt + 1) % 2] = hash | (1U << 31); |
289 | }); |
290 | } |
291 | |
292 | // From now on, sections in Chunks are ordered so that sections in |
293 | // the same group are consecutive in the vector. |
294 | llvm::stable_sort(Range&: chunks, C: [](const SectionChunk *a, const SectionChunk *b) { |
295 | return a->eqClass[0] < b->eqClass[0]; |
296 | }); |
297 | |
298 | // Compare static contents and assign unique IDs for each static content. |
299 | forEachClass(fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: true); }); |
300 | |
301 | // Split groups by comparing relocations until convergence is obtained. |
302 | do { |
303 | repeat = false; |
304 | forEachClass( |
305 | fn: [&](size_t begin, size_t end) { segregate(begin, end, constant: false); }); |
306 | } while (repeat); |
307 | |
308 | log(msg: "ICF needed " + Twine(cnt) + " iterations" ); |
309 | |
310 | // Merge sections in the same classes. |
311 | forEachClass(fn: [&](size_t begin, size_t end) { |
312 | if (end - begin == 1) |
313 | return; |
314 | |
315 | log(msg: "Selected " + chunks[begin]->getDebugName()); |
316 | for (size_t i = begin + 1; i < end; ++i) { |
317 | log(msg: " Removed " + chunks[i]->getDebugName()); |
318 | chunks[begin]->replace(other: chunks[i]); |
319 | } |
320 | }); |
321 | } |
322 | |
323 | // Entry point to ICF. |
324 | void doICF(COFFLinkerContext &ctx) { ICF(ctx).run(); } |
325 | |
326 | } // namespace lld::coff |
327 | |