1 | //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the IdenticalCodeFolding class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/IdenticalCodeFolding.h" |
14 | #include "bolt/Core/HashUtilities.h" |
15 | #include "bolt/Core/ParallelUtilities.h" |
16 | #include "llvm/ADT/SmallVector.h" |
17 | #include "llvm/Support/CommandLine.h" |
18 | #include "llvm/Support/ThreadPool.h" |
19 | #include "llvm/Support/Timer.h" |
20 | #include <atomic> |
21 | #include <iterator> |
22 | #include <map> |
23 | #include <set> |
24 | #include <unordered_map> |
25 | |
26 | #define DEBUG_TYPE "bolt-icf" |
27 | |
28 | using namespace llvm; |
29 | using namespace bolt; |
30 | |
31 | namespace opts { |
32 | |
33 | extern cl::OptionCategory BoltOptCategory; |
34 | |
35 | static cl::opt<bool> |
36 | ICFUseDFS("icf-dfs" , cl::desc("use DFS ordering when using -icf option" ), |
37 | cl::ReallyHidden, cl::cat(BoltOptCategory)); |
38 | |
39 | static cl::opt<bool> |
40 | TimeICF("time-icf" , |
41 | cl::desc("time icf steps" ), |
42 | cl::ReallyHidden, |
43 | cl::ZeroOrMore, |
44 | cl::cat(BoltOptCategory)); |
45 | } // namespace opts |
46 | |
47 | /// Compare two jump tables in 2 functions. The function relies on consistent |
48 | /// ordering of basic blocks in both binary functions (e.g. DFS). |
49 | static bool equalJumpTables(const JumpTable &JumpTableA, |
50 | const JumpTable &JumpTableB, |
51 | const BinaryFunction &FunctionA, |
52 | const BinaryFunction &FunctionB) { |
53 | if (JumpTableA.EntrySize != JumpTableB.EntrySize) |
54 | return false; |
55 | |
56 | if (JumpTableA.Type != JumpTableB.Type) |
57 | return false; |
58 | |
59 | if (JumpTableA.getSize() != JumpTableB.getSize()) |
60 | return false; |
61 | |
62 | for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { |
63 | const MCSymbol *LabelA = JumpTableA.Entries[Index]; |
64 | const MCSymbol *LabelB = JumpTableB.Entries[Index]; |
65 | |
66 | const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(Label: LabelA); |
67 | const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(Label: LabelB); |
68 | |
69 | if (!TargetA || !TargetB) { |
70 | assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && |
71 | "no target basic block found" ); |
72 | assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && |
73 | "no target basic block found" ); |
74 | |
75 | if (TargetA != TargetB) |
76 | return false; |
77 | |
78 | continue; |
79 | } |
80 | |
81 | assert(TargetA && TargetB && "cannot locate target block(s)" ); |
82 | |
83 | if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) |
84 | return false; |
85 | } |
86 | |
87 | return true; |
88 | } |
89 | |
90 | /// Helper function that compares an instruction of this function to the |
91 | /// given instruction of the given function. The functions should have |
92 | /// identical CFG. |
93 | template <class Compare> |
94 | static bool isInstrEquivalentWith(const MCInst &InstA, |
95 | const BinaryBasicBlock &BBA, |
96 | const MCInst &InstB, |
97 | const BinaryBasicBlock &BBB, Compare Comp) { |
98 | if (InstA.getOpcode() != InstB.getOpcode()) |
99 | return false; |
100 | |
101 | const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); |
102 | |
103 | // In this function we check for special conditions: |
104 | // |
105 | // * instructions with landing pads |
106 | // |
107 | // Most of the common cases should be handled by MCPlus::equals() |
108 | // that compares regular instruction operands. |
109 | // |
110 | // NB: there's no need to compare jump table indirect jump instructions |
111 | // separately as jump tables are handled by comparing corresponding |
112 | // symbols. |
113 | const std::optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(Inst: InstA); |
114 | const std::optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(Inst: InstB); |
115 | |
116 | if (EHInfoA || EHInfoB) { |
117 | if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) |
118 | return false; |
119 | |
120 | if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) |
121 | return false; |
122 | |
123 | if (EHInfoA && EHInfoB) { |
124 | // Action indices should match. |
125 | if (EHInfoA->second != EHInfoB->second) |
126 | return false; |
127 | |
128 | if (!EHInfoA->first != !EHInfoB->first) |
129 | return false; |
130 | |
131 | if (EHInfoA->first && EHInfoB->first) { |
132 | const BinaryBasicBlock *LPA = BBA.getLandingPad(Label: EHInfoA->first); |
133 | const BinaryBasicBlock *LPB = BBB.getLandingPad(Label: EHInfoB->first); |
134 | assert(LPA && LPB && "cannot locate landing pad(s)" ); |
135 | |
136 | if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) |
137 | return false; |
138 | } |
139 | } |
140 | } |
141 | |
142 | return BC.MIB->equals(InstA, InstB, Comp); |
143 | } |
144 | |
145 | /// Returns true if this function has identical code and CFG with |
146 | /// the given function \p BF. |
147 | /// |
148 | /// If \p CongruentSymbols is set to true, then symbolic operands that reference |
149 | /// potentially identical but different functions are ignored during the |
150 | /// comparison. |
151 | static bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, |
152 | bool CongruentSymbols) { |
153 | assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG" ); |
154 | |
155 | // Compare the two functions, one basic block at a time. |
156 | // Currently we require two identical basic blocks to have identical |
157 | // instruction sequences and the same index in their corresponding |
158 | // functions. The latter is important for CFG equality. |
159 | |
160 | if (A.getLayout().block_size() != B.getLayout().block_size()) |
161 | return false; |
162 | |
163 | // Comparing multi-entry functions could be non-trivial. |
164 | if (A.isMultiEntry() || B.isMultiEntry()) |
165 | return false; |
166 | |
167 | if (A.hasIslandsInfo() || B.hasIslandsInfo()) |
168 | return false; |
169 | |
170 | // Process both functions in either DFS or existing order. |
171 | SmallVector<const BinaryBasicBlock *, 0> OrderA; |
172 | SmallVector<const BinaryBasicBlock *, 0> OrderB; |
173 | if (opts::ICFUseDFS) { |
174 | copy(Range: A.dfs(), Out: std::back_inserter(x&: OrderA)); |
175 | copy(Range: B.dfs(), Out: std::back_inserter(x&: OrderB)); |
176 | } else { |
177 | copy(Range: A.getLayout().blocks(), Out: std::back_inserter(x&: OrderA)); |
178 | copy(Range: B.getLayout().blocks(), Out: std::back_inserter(x&: OrderB)); |
179 | } |
180 | |
181 | const BinaryContext &BC = A.getBinaryContext(); |
182 | |
183 | auto BBI = OrderB.begin(); |
184 | for (const BinaryBasicBlock *BB : OrderA) { |
185 | const BinaryBasicBlock *OtherBB = *BBI; |
186 | |
187 | if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) |
188 | return false; |
189 | |
190 | // Compare successor basic blocks. |
191 | // NOTE: the comparison for jump tables is only partially verified here. |
192 | if (BB->succ_size() != OtherBB->succ_size()) |
193 | return false; |
194 | |
195 | auto SuccBBI = OtherBB->succ_begin(); |
196 | for (const BinaryBasicBlock *SuccBB : BB->successors()) { |
197 | const BinaryBasicBlock *SuccOtherBB = *SuccBBI; |
198 | if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) |
199 | return false; |
200 | ++SuccBBI; |
201 | } |
202 | |
203 | // Compare all instructions including pseudos. |
204 | auto I = BB->begin(), E = BB->end(); |
205 | auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); |
206 | while (I != E && OtherI != OtherE) { |
207 | // Compare symbols. |
208 | auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, |
209 | const MCSymbol *SymbolB) { |
210 | if (SymbolA == SymbolB) |
211 | return true; |
212 | |
213 | // All local symbols are considered identical since they affect a |
214 | // control flow and we check the control flow separately. |
215 | // If a local symbol is escaped, then the function (potentially) has |
216 | // multiple entry points and we exclude such functions from |
217 | // comparison. |
218 | if (SymbolA->isTemporary() && SymbolB->isTemporary()) |
219 | return true; |
220 | |
221 | // Compare symbols as functions. |
222 | uint64_t EntryIDA = 0; |
223 | uint64_t EntryIDB = 0; |
224 | const BinaryFunction *FunctionA = |
225 | BC.getFunctionForSymbol(Symbol: SymbolA, EntryDesc: &EntryIDA); |
226 | const BinaryFunction *FunctionB = |
227 | BC.getFunctionForSymbol(Symbol: SymbolB, EntryDesc: &EntryIDB); |
228 | if (FunctionA && EntryIDA) |
229 | FunctionA = nullptr; |
230 | if (FunctionB && EntryIDB) |
231 | FunctionB = nullptr; |
232 | if (FunctionA && FunctionB) { |
233 | // Self-referencing functions and recursive calls. |
234 | if (FunctionA == &A && FunctionB == &B) |
235 | return true; |
236 | |
237 | // Functions with different hash values can never become identical, |
238 | // hence A and B are different. |
239 | if (CongruentSymbols) |
240 | return FunctionA->getHash() == FunctionB->getHash(); |
241 | |
242 | return FunctionA == FunctionB; |
243 | } |
244 | |
245 | // One of the symbols represents a function, the other one does not. |
246 | if (FunctionA != FunctionB) |
247 | return false; |
248 | |
249 | // Check if symbols are jump tables. |
250 | const BinaryData *SIA = BC.getBinaryDataByName(Name: SymbolA->getName()); |
251 | if (!SIA) |
252 | return false; |
253 | const BinaryData *SIB = BC.getBinaryDataByName(Name: SymbolB->getName()); |
254 | if (!SIB) |
255 | return false; |
256 | |
257 | assert((SIA->getAddress() != SIB->getAddress()) && |
258 | "different symbols should not have the same value" ); |
259 | |
260 | const JumpTable *JumpTableA = |
261 | A.getJumpTableContainingAddress(Address: SIA->getAddress()); |
262 | if (!JumpTableA) |
263 | return false; |
264 | |
265 | const JumpTable *JumpTableB = |
266 | B.getJumpTableContainingAddress(Address: SIB->getAddress()); |
267 | if (!JumpTableB) |
268 | return false; |
269 | |
270 | if ((SIA->getAddress() - JumpTableA->getAddress()) != |
271 | (SIB->getAddress() - JumpTableB->getAddress())) |
272 | return false; |
273 | |
274 | return equalJumpTables(JumpTableA: *JumpTableA, JumpTableB: *JumpTableB, FunctionA: A, FunctionB: B); |
275 | }; |
276 | |
277 | if (!isInstrEquivalentWith(InstA: *I, BBA: *BB, InstB: *OtherI, BBB: *OtherBB, |
278 | Comp: AreSymbolsIdentical)) |
279 | return false; |
280 | |
281 | ++I; |
282 | ++OtherI; |
283 | } |
284 | |
285 | // One of the identical blocks may have a trailing unconditional jump that |
286 | // is ignored for CFG purposes. |
287 | const MCInst *TrailingInstr = |
288 | (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr)); |
289 | if (TrailingInstr && !BC.MIB->isUnconditionalBranch(Inst: *TrailingInstr)) |
290 | return false; |
291 | |
292 | ++BBI; |
293 | } |
294 | |
295 | // Compare exceptions action tables. |
296 | if (A.getLSDAActionTable() != B.getLSDAActionTable() || |
297 | A.getLSDATypeTable() != B.getLSDATypeTable() || |
298 | A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) |
299 | return false; |
300 | |
301 | return true; |
302 | } |
303 | |
304 | // This hash table is used to identify identical functions. It maps |
305 | // a function to a bucket of functions identical to it. |
306 | struct KeyHash { |
307 | size_t operator()(const BinaryFunction *F) const { return F->getHash(); } |
308 | }; |
309 | |
310 | /// Identify two congruent functions. Two functions are considered congruent, |
311 | /// if they are identical/equal except for some of their instruction operands |
312 | /// that reference potentially identical functions, i.e. functions that could |
313 | /// be folded later. Congruent functions are candidates for folding in our |
314 | /// iterative ICF algorithm. |
315 | /// |
316 | /// Congruent functions are required to have identical hash. |
317 | struct KeyCongruent { |
318 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
319 | if (A == B) |
320 | return true; |
321 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/true); |
322 | } |
323 | }; |
324 | |
325 | struct KeyEqual { |
326 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
327 | if (A == B) |
328 | return true; |
329 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/false); |
330 | } |
331 | }; |
332 | |
333 | typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, |
334 | KeyHash, KeyCongruent> |
335 | CongruentBucketsMap; |
336 | |
337 | typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, |
338 | KeyHash, KeyEqual> |
339 | IdenticalBucketsMap; |
340 | |
341 | namespace llvm { |
342 | namespace bolt { |
343 | |
344 | Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { |
345 | const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); |
346 | uint64_t NumFunctionsFolded = 0; |
347 | std::atomic<uint64_t> NumJTFunctionsFolded{0}; |
348 | std::atomic<uint64_t> BytesSavedEstimate{0}; |
349 | std::atomic<uint64_t> NumCalled{0}; |
350 | std::atomic<uint64_t> NumFoldedLastIteration{0}; |
351 | CongruentBucketsMap CongruentBuckets; |
352 | |
353 | // Hash all the functions |
354 | auto hashFunctions = [&]() { |
355 | NamedRegionTimer HashFunctionsTimer("hashing" , "hashing" , "ICF breakdown" , |
356 | "ICF breakdown" , opts::TimeICF); |
357 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
358 | // Make sure indices are in-order. |
359 | BF.getLayout().updateLayoutIndices(); |
360 | |
361 | // Pre-compute hash before pushing into hashtable. |
362 | // Hash instruction operands to minimize hash collisions. |
363 | BF.computeHash( |
364 | UseDFS: opts::ICFUseDFS, HashFunction: HashFunction::Default, |
365 | OperandHashFunc: [&BC](const MCOperand &Op) { return hashInstOperand(BC, Operand: Op); }); |
366 | }; |
367 | |
368 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
369 | return !shouldOptimize(BF); |
370 | }; |
371 | |
372 | ParallelUtilities::runOnEachFunction( |
373 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, SkipPredicate: SkipFunc, |
374 | LogName: "hashFunctions" , /*ForceSequential*/ false, TasksPerThread: 2); |
375 | }; |
376 | |
377 | // Creates buckets with congruent functions - functions that potentially |
378 | // could be folded. |
379 | auto createCongruentBuckets = [&]() { |
380 | NamedRegionTimer CongruentBucketsTimer("congruent buckets" , |
381 | "congruent buckets" , "ICF breakdown" , |
382 | "ICF breakdown" , opts::TimeICF); |
383 | for (auto &BFI : BC.getBinaryFunctions()) { |
384 | BinaryFunction &BF = BFI.second; |
385 | if (!this->shouldOptimize(BF)) |
386 | continue; |
387 | CongruentBuckets[&BF].emplace(args: &BF); |
388 | } |
389 | }; |
390 | |
391 | // Partition each set of congruent functions into sets of identical functions |
392 | // and fold them |
393 | auto performFoldingPass = [&]() { |
394 | NamedRegionTimer FoldingPassesTimer("folding passes" , "folding passes" , |
395 | "ICF breakdown" , "ICF breakdown" , |
396 | opts::TimeICF); |
397 | Timer SinglePass("single fold pass" , "single fold pass" ); |
398 | LLVM_DEBUG(SinglePass.startTimer()); |
399 | |
400 | ThreadPoolInterface *ThPool; |
401 | if (!opts::NoThreads) |
402 | ThPool = &ParallelUtilities::getThreadPool(); |
403 | |
404 | // Fold identical functions within a single congruent bucket |
405 | auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { |
406 | Timer T("folding single congruent list" , "folding single congruent list" ); |
407 | LLVM_DEBUG(T.startTimer()); |
408 | |
409 | // Identical functions go into the same bucket. |
410 | IdenticalBucketsMap IdenticalBuckets; |
411 | for (BinaryFunction *BF : Candidates) { |
412 | IdenticalBuckets[BF].emplace_back(args&: BF); |
413 | } |
414 | |
415 | for (auto &IBI : IdenticalBuckets) { |
416 | // Functions identified as identical. |
417 | std::vector<BinaryFunction *> &Twins = IBI.second; |
418 | if (Twins.size() < 2) |
419 | continue; |
420 | |
421 | // Fold functions. Keep the order consistent across invocations with |
422 | // different options. |
423 | llvm::stable_sort( |
424 | Range&: Twins, C: [](const BinaryFunction *A, const BinaryFunction *B) { |
425 | return A->getFunctionNumber() < B->getFunctionNumber(); |
426 | }); |
427 | |
428 | BinaryFunction *ParentBF = Twins[0]; |
429 | if (!ParentBF->hasFunctionsFoldedInto()) |
430 | NumCalled += ParentBF->getKnownExecutionCount(); |
431 | for (unsigned I = 1; I < Twins.size(); ++I) { |
432 | BinaryFunction *ChildBF = Twins[I]; |
433 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " |
434 | << *ParentBF << '\n'); |
435 | |
436 | // Remove child function from the list of candidates. |
437 | auto FI = Candidates.find(x: ChildBF); |
438 | assert(FI != Candidates.end() && |
439 | "function expected to be in the set" ); |
440 | Candidates.erase(position: FI); |
441 | |
442 | // Fold the function and remove from the list of processed functions. |
443 | BytesSavedEstimate += ChildBF->getSize(); |
444 | if (!ChildBF->hasFunctionsFoldedInto()) |
445 | NumCalled += ChildBF->getKnownExecutionCount(); |
446 | BC.foldFunction(ChildBF&: *ChildBF, ParentBF&: *ParentBF); |
447 | |
448 | ++NumFoldedLastIteration; |
449 | |
450 | if (ParentBF->hasJumpTables()) |
451 | ++NumJTFunctionsFolded; |
452 | } |
453 | } |
454 | |
455 | LLVM_DEBUG(T.stopTimer()); |
456 | }; |
457 | |
458 | // Create a task for each congruent bucket |
459 | for (auto &Entry : CongruentBuckets) { |
460 | std::set<BinaryFunction *> &Bucket = Entry.second; |
461 | if (Bucket.size() < 2) |
462 | continue; |
463 | |
464 | if (opts::NoThreads) |
465 | processSingleBucket(Bucket); |
466 | else |
467 | ThPool->async(F&: processSingleBucket, ArgList: std::ref(t&: Bucket)); |
468 | } |
469 | |
470 | if (!opts::NoThreads) |
471 | ThPool->wait(); |
472 | |
473 | LLVM_DEBUG(SinglePass.stopTimer()); |
474 | }; |
475 | |
476 | hashFunctions(); |
477 | createCongruentBuckets(); |
478 | |
479 | unsigned Iteration = 1; |
480 | // We repeat the pass until no new modifications happen. |
481 | do { |
482 | NumFoldedLastIteration = 0; |
483 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n" ); |
484 | |
485 | performFoldingPass(); |
486 | |
487 | NumFunctionsFolded += NumFoldedLastIteration; |
488 | ++Iteration; |
489 | |
490 | } while (NumFoldedLastIteration > 0); |
491 | |
492 | LLVM_DEBUG({ |
493 | // Print functions that are congruent but not identical. |
494 | for (auto &CBI : CongruentBuckets) { |
495 | std::set<BinaryFunction *> &Candidates = CBI.second; |
496 | if (Candidates.size() < 2) |
497 | continue; |
498 | dbgs() << "BOLT-DEBUG: the following " << Candidates.size() |
499 | << " functions (each of size " << (*Candidates.begin())->getSize() |
500 | << " bytes) are congruent but not identical:\n" ; |
501 | for (BinaryFunction *BF : Candidates) { |
502 | dbgs() << " " << *BF; |
503 | if (BF->getKnownExecutionCount()) |
504 | dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)" ; |
505 | dbgs() << '\n'; |
506 | } |
507 | } |
508 | }); |
509 | |
510 | if (NumFunctionsFolded) |
511 | BC.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " |
512 | << OriginalFunctionCount << " functions in " << Iteration |
513 | << " passes. " << NumJTFunctionsFolded |
514 | << " functions had jump tables.\n" |
515 | << "BOLT-INFO: Removing all identical functions will save " |
516 | << format(Fmt: "%.2lf" , Vals: (double)BytesSavedEstimate / 1024) |
517 | << " KB of code space. Folded functions were called " << NumCalled |
518 | << " times based on profile.\n" ; |
519 | |
520 | return Error::success(); |
521 | } |
522 | |
523 | } // namespace bolt |
524 | } // namespace llvm |
525 | |