1 | //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the IdenticalCodeFolding class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/IdenticalCodeFolding.h" |
14 | #include "bolt/Core/HashUtilities.h" |
15 | #include "bolt/Core/ParallelUtilities.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | #include "llvm/Support/FormatVariadic.h" |
19 | #include "llvm/Support/ThreadPool.h" |
20 | #include "llvm/Support/Timer.h" |
21 | #include <atomic> |
22 | #include <iterator> |
23 | #include <map> |
24 | #include <set> |
25 | #include <unordered_map> |
26 | |
27 | #define DEBUG_TYPE "bolt-icf" |
28 | |
29 | using namespace llvm; |
30 | using namespace bolt; |
31 | |
32 | namespace opts { |
33 | |
34 | extern cl::OptionCategory BoltOptCategory; |
35 | |
36 | static cl::opt<bool> |
37 | ICFUseDFS("icf-dfs" , cl::desc("use DFS ordering when using -icf option" ), |
38 | cl::ReallyHidden, cl::cat(BoltOptCategory)); |
39 | |
40 | static cl::opt<bool> |
41 | TimeICF("time-icf" , |
42 | cl::desc("time icf steps" ), |
43 | cl::ReallyHidden, |
44 | cl::ZeroOrMore, |
45 | cl::cat(BoltOptCategory)); |
46 | |
47 | cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, |
48 | DeprecatedICFNumericOptionParser> |
49 | ICF("icf" , cl::desc("fold functions with identical code" ), |
50 | cl::init(Val: bolt::IdenticalCodeFolding::ICFLevel::None), |
51 | cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all" , |
52 | "Enable identical code folding" ), |
53 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1" , |
54 | "Enable identical code folding" ), |
55 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "" , |
56 | "Enable identical code folding" ), |
57 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, |
58 | "none" , |
59 | "Disable identical code folding (default)" ), |
60 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0" , |
61 | "Disable identical code folding (default)" ), |
62 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, |
63 | "safe" , "Enable safe identical code folding" )), |
64 | cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory)); |
65 | } // namespace opts |
66 | |
67 | bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const { |
68 | if (BF.hasUnknownControlFlow()) |
69 | return false; |
70 | if (BF.isFolded()) |
71 | return false; |
72 | if (BF.hasSDTMarker()) |
73 | return false; |
74 | if (BF.isPseudo()) |
75 | return false; |
76 | if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken()) |
77 | return false; |
78 | return BinaryFunctionPass::shouldOptimize(BF); |
79 | } |
80 | |
81 | /// Compare two jump tables in 2 functions. The function relies on consistent |
82 | /// ordering of basic blocks in both binary functions (e.g. DFS). |
83 | static bool equalJumpTables(const JumpTable &JumpTableA, |
84 | const JumpTable &JumpTableB, |
85 | const BinaryFunction &FunctionA, |
86 | const BinaryFunction &FunctionB) { |
87 | if (JumpTableA.EntrySize != JumpTableB.EntrySize) |
88 | return false; |
89 | |
90 | if (JumpTableA.Type != JumpTableB.Type) |
91 | return false; |
92 | |
93 | if (JumpTableA.getSize() != JumpTableB.getSize()) |
94 | return false; |
95 | |
96 | for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { |
97 | const MCSymbol *LabelA = JumpTableA.Entries[Index]; |
98 | const MCSymbol *LabelB = JumpTableB.Entries[Index]; |
99 | |
100 | const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(Label: LabelA); |
101 | const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(Label: LabelB); |
102 | |
103 | if (!TargetA || !TargetB) { |
104 | assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && |
105 | "no target basic block found" ); |
106 | assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && |
107 | "no target basic block found" ); |
108 | |
109 | if (TargetA != TargetB) |
110 | return false; |
111 | |
112 | continue; |
113 | } |
114 | |
115 | assert(TargetA && TargetB && "cannot locate target block(s)" ); |
116 | |
117 | if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) |
118 | return false; |
119 | } |
120 | |
121 | return true; |
122 | } |
123 | |
124 | /// Helper function that compares an instruction of this function to the |
125 | /// given instruction of the given function. The functions should have |
126 | /// identical CFG. |
127 | template <class Compare> |
128 | static bool isInstrEquivalentWith(const MCInst &InstA, |
129 | const BinaryBasicBlock &BBA, |
130 | const MCInst &InstB, |
131 | const BinaryBasicBlock &BBB, Compare Comp) { |
132 | if (InstA.getOpcode() != InstB.getOpcode()) |
133 | return false; |
134 | |
135 | const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); |
136 | |
137 | // In this function we check for special conditions: |
138 | // |
139 | // * instructions with landing pads |
140 | // |
141 | // Most of the common cases should be handled by MCPlus::equals() |
142 | // that compares regular instruction operands. |
143 | // |
144 | // NB: there's no need to compare jump table indirect jump instructions |
145 | // separately as jump tables are handled by comparing corresponding |
146 | // symbols. |
147 | const std::optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(Inst: InstA); |
148 | const std::optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(Inst: InstB); |
149 | |
150 | if (EHInfoA || EHInfoB) { |
151 | if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) |
152 | return false; |
153 | |
154 | if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) |
155 | return false; |
156 | |
157 | if (EHInfoA && EHInfoB) { |
158 | // Action indices should match. |
159 | if (EHInfoA->second != EHInfoB->second) |
160 | return false; |
161 | |
162 | if (!EHInfoA->first != !EHInfoB->first) |
163 | return false; |
164 | |
165 | if (EHInfoA->first && EHInfoB->first) { |
166 | const BinaryBasicBlock *LPA = BBA.getLandingPad(Label: EHInfoA->first); |
167 | const BinaryBasicBlock *LPB = BBB.getLandingPad(Label: EHInfoB->first); |
168 | assert(LPA && LPB && "cannot locate landing pad(s)" ); |
169 | |
170 | if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) |
171 | return false; |
172 | } |
173 | } |
174 | } |
175 | |
176 | return BC.MIB->equals(InstA, InstB, Comp); |
177 | } |
178 | |
179 | /// Returns true if this function has identical code and CFG with |
180 | /// the given function \p BF. |
181 | /// |
182 | /// If \p CongruentSymbols is set to true, then symbolic operands that reference |
183 | /// potentially identical but different functions are ignored during the |
184 | /// comparison. |
185 | static bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, |
186 | bool CongruentSymbols) { |
187 | assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG" ); |
188 | |
189 | // Compare the two functions, one basic block at a time. |
190 | // Currently we require two identical basic blocks to have identical |
191 | // instruction sequences and the same index in their corresponding |
192 | // functions. The latter is important for CFG equality. |
193 | |
194 | if (A.getLayout().block_size() != B.getLayout().block_size()) |
195 | return false; |
196 | |
197 | // Comparing multi-entry functions could be non-trivial. |
198 | if (A.isMultiEntry() || B.isMultiEntry()) |
199 | return false; |
200 | |
201 | if (A.hasIslandsInfo() || B.hasIslandsInfo()) |
202 | return false; |
203 | |
204 | // Process both functions in either DFS or existing order. |
205 | SmallVector<const BinaryBasicBlock *, 0> OrderA; |
206 | SmallVector<const BinaryBasicBlock *, 0> OrderB; |
207 | if (opts::ICFUseDFS) { |
208 | copy(Range: A.dfs(), Out: std::back_inserter(x&: OrderA)); |
209 | copy(Range: B.dfs(), Out: std::back_inserter(x&: OrderB)); |
210 | } else { |
211 | copy(Range: A.getLayout().blocks(), Out: std::back_inserter(x&: OrderA)); |
212 | copy(Range: B.getLayout().blocks(), Out: std::back_inserter(x&: OrderB)); |
213 | } |
214 | |
215 | const BinaryContext &BC = A.getBinaryContext(); |
216 | |
217 | auto BBI = OrderB.begin(); |
218 | for (const BinaryBasicBlock *BB : OrderA) { |
219 | const BinaryBasicBlock *OtherBB = *BBI; |
220 | |
221 | if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) |
222 | return false; |
223 | |
224 | // Compare successor basic blocks. |
225 | // NOTE: the comparison for jump tables is only partially verified here. |
226 | if (BB->succ_size() != OtherBB->succ_size()) |
227 | return false; |
228 | |
229 | auto SuccBBI = OtherBB->succ_begin(); |
230 | for (const BinaryBasicBlock *SuccBB : BB->successors()) { |
231 | const BinaryBasicBlock *SuccOtherBB = *SuccBBI; |
232 | if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) |
233 | return false; |
234 | ++SuccBBI; |
235 | } |
236 | |
237 | // Compare all instructions including pseudos. |
238 | auto I = BB->begin(), E = BB->end(); |
239 | auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); |
240 | while (I != E && OtherI != OtherE) { |
241 | // Compare symbols. |
242 | auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, |
243 | const MCSymbol *SymbolB) { |
244 | if (SymbolA == SymbolB) |
245 | return true; |
246 | |
247 | // All local symbols are considered identical since they affect a |
248 | // control flow and we check the control flow separately. |
249 | // If a local symbol is escaped, then the function (potentially) has |
250 | // multiple entry points and we exclude such functions from |
251 | // comparison. |
252 | if (SymbolA->isTemporary() && SymbolB->isTemporary()) |
253 | return true; |
254 | |
255 | // Compare symbols as functions. |
256 | uint64_t EntryIDA = 0; |
257 | uint64_t EntryIDB = 0; |
258 | const BinaryFunction *FunctionA = |
259 | BC.getFunctionForSymbol(Symbol: SymbolA, EntryDesc: &EntryIDA); |
260 | const BinaryFunction *FunctionB = |
261 | BC.getFunctionForSymbol(Symbol: SymbolB, EntryDesc: &EntryIDB); |
262 | if (FunctionA && EntryIDA) |
263 | FunctionA = nullptr; |
264 | if (FunctionB && EntryIDB) |
265 | FunctionB = nullptr; |
266 | if (FunctionA && FunctionB) { |
267 | // Self-referencing functions and recursive calls. |
268 | if (FunctionA == &A && FunctionB == &B) |
269 | return true; |
270 | |
271 | // Functions with different hash values can never become identical, |
272 | // hence A and B are different. |
273 | if (CongruentSymbols) |
274 | return FunctionA->getHash() == FunctionB->getHash(); |
275 | |
276 | return FunctionA == FunctionB; |
277 | } |
278 | |
279 | // One of the symbols represents a function, the other one does not. |
280 | if (FunctionA != FunctionB) |
281 | return false; |
282 | |
283 | // Check if symbols are jump tables. |
284 | const BinaryData *SIA = BC.getBinaryDataByName(Name: SymbolA->getName()); |
285 | if (!SIA) |
286 | return false; |
287 | const BinaryData *SIB = BC.getBinaryDataByName(Name: SymbolB->getName()); |
288 | if (!SIB) |
289 | return false; |
290 | |
291 | assert((SIA->getAddress() != SIB->getAddress()) && |
292 | "different symbols should not have the same value" ); |
293 | |
294 | const JumpTable *JumpTableA = |
295 | A.getJumpTableContainingAddress(Address: SIA->getAddress()); |
296 | if (!JumpTableA) |
297 | return false; |
298 | |
299 | const JumpTable *JumpTableB = |
300 | B.getJumpTableContainingAddress(Address: SIB->getAddress()); |
301 | if (!JumpTableB) |
302 | return false; |
303 | |
304 | if ((SIA->getAddress() - JumpTableA->getAddress()) != |
305 | (SIB->getAddress() - JumpTableB->getAddress())) |
306 | return false; |
307 | |
308 | return equalJumpTables(JumpTableA: *JumpTableA, JumpTableB: *JumpTableB, FunctionA: A, FunctionB: B); |
309 | }; |
310 | |
311 | if (!isInstrEquivalentWith(InstA: *I, BBA: *BB, InstB: *OtherI, BBB: *OtherBB, |
312 | Comp: AreSymbolsIdentical)) |
313 | return false; |
314 | |
315 | ++I; |
316 | ++OtherI; |
317 | } |
318 | |
319 | // One of the identical blocks may have a trailing unconditional jump that |
320 | // is ignored for CFG purposes. |
321 | const MCInst *TrailingInstr = |
322 | (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr)); |
323 | if (TrailingInstr && !BC.MIB->isUnconditionalBranch(Inst: *TrailingInstr)) |
324 | return false; |
325 | |
326 | ++BBI; |
327 | } |
328 | |
329 | // Compare exceptions action tables. |
330 | if (A.getLSDAActionTable() != B.getLSDAActionTable() || |
331 | A.getLSDATypeTable() != B.getLSDATypeTable() || |
332 | A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) |
333 | return false; |
334 | |
335 | return true; |
336 | } |
337 | |
338 | // This hash table is used to identify identical functions. It maps |
339 | // a function to a bucket of functions identical to it. |
340 | struct KeyHash { |
341 | size_t operator()(const BinaryFunction *F) const { return F->getHash(); } |
342 | }; |
343 | |
344 | /// Identify two congruent functions. Two functions are considered congruent, |
345 | /// if they are identical/equal except for some of their instruction operands |
346 | /// that reference potentially identical functions, i.e. functions that could |
347 | /// be folded later. Congruent functions are candidates for folding in our |
348 | /// iterative ICF algorithm. |
349 | /// |
350 | /// Congruent functions are required to have identical hash. |
351 | struct KeyCongruent { |
352 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
353 | if (A == B) |
354 | return true; |
355 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/true); |
356 | } |
357 | }; |
358 | |
359 | struct KeyEqual { |
360 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
361 | if (A == B) |
362 | return true; |
363 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/false); |
364 | } |
365 | }; |
366 | |
367 | typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, |
368 | KeyHash, KeyCongruent> |
369 | CongruentBucketsMap; |
370 | |
371 | typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, |
372 | KeyHash, KeyEqual> |
373 | IdenticalBucketsMap; |
374 | |
375 | namespace llvm { |
376 | namespace bolt { |
377 | void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { |
378 | for (const auto &[Address, Data] : BC.getBinaryData()) { |
379 | // Filter out all symbols that are not vtables. |
380 | if (!Data->getName().starts_with(Prefix: "_ZTV" )) |
381 | continue; |
382 | for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) |
383 | setAddressUsedInVTable(I); |
384 | } |
385 | } |
386 | |
387 | void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) { |
388 | initVTableReferences(BC); |
389 | // For static relocations there should be a symbol for function references. |
390 | for (const BinarySection &Sec : BC.sections()) { |
391 | if (!Sec.hasSectionRef() || !Sec.isData()) |
392 | continue; |
393 | for (const auto &Rel : Sec.relocations()) { |
394 | const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); |
395 | if (isAddressInVTable(Address: RelAddr)) |
396 | continue; |
397 | if (BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: Rel.Symbol)) |
398 | BF->setHasAddressTaken(true); |
399 | } |
400 | // For dynamic relocations there are two cases: |
401 | // 1: No symbol and only addend. |
402 | // 2: There is a symbol, but it does not references a function in a binary. |
403 | for (const auto &Rel : Sec.dynamicRelocations()) { |
404 | const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); |
405 | if (isAddressInVTable(Address: RelAddr)) |
406 | continue; |
407 | if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: Rel.Addend)) |
408 | BF->setHasAddressTaken(true); |
409 | } |
410 | } |
411 | } |
412 | |
413 | void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) { |
414 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
415 | for (const BinaryBasicBlock &BB : BF) |
416 | for (const MCInst &Inst : BB) |
417 | if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst))) |
418 | BF.analyzeInstructionForFuncReference(Inst); |
419 | }; |
420 | ParallelUtilities::PredicateTy SkipFunc = |
421 | [&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); }; |
422 | ParallelUtilities::runOnEachFunction( |
423 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun, |
424 | SkipPredicate: SkipFunc, LogName: "markUnsafe" ); |
425 | |
426 | LLVM_DEBUG({ |
427 | for (const auto &BFIter : BC.getBinaryFunctions()) { |
428 | if (!BFIter.second.hasAddressTaken()) |
429 | continue; |
430 | dbgs() << "BOLT-DEBUG: skipping function with reference taken " |
431 | << BFIter.second.getOneName() << '\n'; |
432 | } |
433 | }); |
434 | } |
435 | |
436 | void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { |
437 | NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( |
438 | "markFunctionsUnsafeToFold" , "markFunctionsUnsafeToFold" , "ICF breakdown" , |
439 | "ICF breakdown" , opts::TimeICF); |
440 | if (!BC.isX86()) |
441 | BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n" ; |
442 | analyzeDataRelocations(BC); |
443 | analyzeFunctions(BC); |
444 | } |
445 | |
446 | Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { |
447 | const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); |
448 | uint64_t NumFunctionsFolded = 0; |
449 | std::atomic<uint64_t> NumJTFunctionsFolded{0}; |
450 | std::atomic<uint64_t> BytesSavedEstimate{0}; |
451 | std::atomic<uint64_t> NumCalled{0}; |
452 | std::atomic<uint64_t> NumFoldedLastIteration{0}; |
453 | CongruentBucketsMap CongruentBuckets; |
454 | |
455 | // Hash all the functions |
456 | auto hashFunctions = [&]() { |
457 | NamedRegionTimer HashFunctionsTimer("hashing" , "hashing" , "ICF breakdown" , |
458 | "ICF breakdown" , opts::TimeICF); |
459 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
460 | // Make sure indices are in-order. |
461 | if (opts::ICFUseDFS) |
462 | BF.getLayout().updateLayoutIndices(Order: BF.dfs()); |
463 | else |
464 | BF.getLayout().updateLayoutIndices(); |
465 | |
466 | // Pre-compute hash before pushing into hashtable. |
467 | // Hash instruction operands to minimize hash collisions. |
468 | BF.computeHash( |
469 | UseDFS: opts::ICFUseDFS, HashFunction: HashFunction::Default, |
470 | OperandHashFunc: [&BC](const MCOperand &Op) { return hashInstOperand(BC, Operand: Op); }); |
471 | }; |
472 | |
473 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
474 | return !shouldOptimize(BF); |
475 | }; |
476 | |
477 | ParallelUtilities::runOnEachFunction( |
478 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, SkipPredicate: SkipFunc, |
479 | LogName: "hashFunctions" , /*ForceSequential*/ false, TasksPerThread: 2); |
480 | }; |
481 | |
482 | // Creates buckets with congruent functions - functions that potentially |
483 | // could be folded. |
484 | auto createCongruentBuckets = [&]() { |
485 | NamedRegionTimer CongruentBucketsTimer("congruent buckets" , |
486 | "congruent buckets" , "ICF breakdown" , |
487 | "ICF breakdown" , opts::TimeICF); |
488 | for (auto &BFI : BC.getBinaryFunctions()) { |
489 | BinaryFunction &BF = BFI.second; |
490 | if (!shouldOptimize(BF)) |
491 | continue; |
492 | CongruentBuckets[&BF].emplace(args: &BF); |
493 | } |
494 | }; |
495 | |
496 | // Partition each set of congruent functions into sets of identical functions |
497 | // and fold them |
498 | auto performFoldingPass = [&]() { |
499 | NamedRegionTimer FoldingPassesTimer("folding passes" , "folding passes" , |
500 | "ICF breakdown" , "ICF breakdown" , |
501 | opts::TimeICF); |
502 | Timer SinglePass("single fold pass" , "single fold pass" ); |
503 | LLVM_DEBUG(SinglePass.startTimer()); |
504 | |
505 | ThreadPoolInterface *ThPool; |
506 | if (!opts::NoThreads) |
507 | ThPool = &ParallelUtilities::getThreadPool(); |
508 | |
509 | // Fold identical functions within a single congruent bucket |
510 | auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { |
511 | Timer T("folding single congruent list" , "folding single congruent list" ); |
512 | LLVM_DEBUG(T.startTimer()); |
513 | |
514 | // Identical functions go into the same bucket. |
515 | IdenticalBucketsMap IdenticalBuckets; |
516 | for (BinaryFunction *BF : Candidates) { |
517 | IdenticalBuckets[BF].emplace_back(args&: BF); |
518 | } |
519 | |
520 | for (auto &IBI : IdenticalBuckets) { |
521 | // Functions identified as identical. |
522 | std::vector<BinaryFunction *> &Twins = IBI.second; |
523 | if (Twins.size() < 2) |
524 | continue; |
525 | |
526 | // Fold functions. Keep the order consistent across invocations with |
527 | // different options. |
528 | llvm::stable_sort( |
529 | Range&: Twins, C: [](const BinaryFunction *A, const BinaryFunction *B) { |
530 | return A->getFunctionNumber() < B->getFunctionNumber(); |
531 | }); |
532 | |
533 | BinaryFunction *ParentBF = Twins[0]; |
534 | if (!ParentBF->hasFunctionsFoldedInto()) |
535 | NumCalled += ParentBF->getKnownExecutionCount(); |
536 | for (unsigned I = 1; I < Twins.size(); ++I) { |
537 | BinaryFunction *ChildBF = Twins[I]; |
538 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " |
539 | << *ParentBF << '\n'); |
540 | |
541 | // Remove child function from the list of candidates. |
542 | auto FI = Candidates.find(x: ChildBF); |
543 | assert(FI != Candidates.end() && |
544 | "function expected to be in the set" ); |
545 | Candidates.erase(position: FI); |
546 | |
547 | // Fold the function and remove from the list of processed functions. |
548 | BytesSavedEstimate += ChildBF->getSize(); |
549 | if (!ChildBF->hasFunctionsFoldedInto()) |
550 | NumCalled += ChildBF->getKnownExecutionCount(); |
551 | BC.foldFunction(ChildBF&: *ChildBF, ParentBF&: *ParentBF); |
552 | |
553 | ++NumFoldedLastIteration; |
554 | |
555 | if (ParentBF->hasJumpTables()) |
556 | ++NumJTFunctionsFolded; |
557 | } |
558 | } |
559 | |
560 | LLVM_DEBUG(T.stopTimer()); |
561 | }; |
562 | |
563 | // Create a task for each congruent bucket |
564 | for (auto &Entry : CongruentBuckets) { |
565 | std::set<BinaryFunction *> &Bucket = Entry.second; |
566 | if (Bucket.size() < 2) |
567 | continue; |
568 | |
569 | if (opts::NoThreads) |
570 | processSingleBucket(Bucket); |
571 | else |
572 | ThPool->async(F&: processSingleBucket, ArgList: std::ref(t&: Bucket)); |
573 | } |
574 | |
575 | if (!opts::NoThreads) |
576 | ThPool->wait(); |
577 | |
578 | LLVM_DEBUG(SinglePass.stopTimer()); |
579 | }; |
580 | if (opts::ICF == ICFLevel::Safe) |
581 | markFunctionsUnsafeToFold(BC); |
582 | hashFunctions(); |
583 | createCongruentBuckets(); |
584 | |
585 | unsigned Iteration = 1; |
586 | // We repeat the pass until no new modifications happen. |
587 | do { |
588 | NumFoldedLastIteration = 0; |
589 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n" ); |
590 | |
591 | performFoldingPass(); |
592 | |
593 | NumFunctionsFolded += NumFoldedLastIteration; |
594 | ++Iteration; |
595 | |
596 | } while (NumFoldedLastIteration > 0); |
597 | |
598 | LLVM_DEBUG({ |
599 | // Print functions that are congruent but not identical. |
600 | for (auto &CBI : CongruentBuckets) { |
601 | std::set<BinaryFunction *> &Candidates = CBI.second; |
602 | if (Candidates.size() < 2) |
603 | continue; |
604 | dbgs() << "BOLT-DEBUG: the following " << Candidates.size() |
605 | << " functions (each of size " << (*Candidates.begin())->getSize() |
606 | << " bytes) are congruent but not identical:\n" ; |
607 | for (BinaryFunction *BF : Candidates) { |
608 | dbgs() << " " << *BF; |
609 | if (BF->getKnownExecutionCount()) |
610 | dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)" ; |
611 | dbgs() << '\n'; |
612 | } |
613 | } |
614 | }); |
615 | |
616 | if (NumFunctionsFolded) |
617 | BC.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " |
618 | << OriginalFunctionCount << " functions in " << Iteration |
619 | << " passes. " << NumJTFunctionsFolded |
620 | << " functions had jump tables.\n" |
621 | << "BOLT-INFO: Removing all identical functions will save " |
622 | << format(Fmt: "%.2lf" , Vals: (double)BytesSavedEstimate / 1024) |
623 | << " KB of code space. Folded functions were called " << NumCalled |
624 | << " times based on profile.\n" ; |
625 | |
626 | return Error::success(); |
627 | } |
628 | |
629 | } // namespace bolt |
630 | } // namespace llvm |
631 | |