1//===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the IdenticalCodeFolding class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/IdenticalCodeFolding.h"
14#include "bolt/Core/HashUtilities.h"
15#include "bolt/Core/ParallelUtilities.h"
16#include "llvm/ADT/SmallVector.h"
17#include "llvm/Support/CommandLine.h"
18#include "llvm/Support/ThreadPool.h"
19#include "llvm/Support/Timer.h"
20#include <atomic>
21#include <iterator>
22#include <map>
23#include <set>
24#include <unordered_map>
25
26#define DEBUG_TYPE "bolt-icf"
27
28using namespace llvm;
29using namespace bolt;
30
31namespace opts {
32
33extern cl::OptionCategory BoltOptCategory;
34
35static cl::opt<bool>
36 ICFUseDFS("icf-dfs", cl::desc("use DFS ordering when using -icf option"),
37 cl::ReallyHidden, cl::cat(BoltOptCategory));
38
39static cl::opt<bool>
40TimeICF("time-icf",
41 cl::desc("time icf steps"),
42 cl::ReallyHidden,
43 cl::ZeroOrMore,
44 cl::cat(BoltOptCategory));
45} // namespace opts
46
47/// Compare two jump tables in 2 functions. The function relies on consistent
48/// ordering of basic blocks in both binary functions (e.g. DFS).
49static bool equalJumpTables(const JumpTable &JumpTableA,
50 const JumpTable &JumpTableB,
51 const BinaryFunction &FunctionA,
52 const BinaryFunction &FunctionB) {
53 if (JumpTableA.EntrySize != JumpTableB.EntrySize)
54 return false;
55
56 if (JumpTableA.Type != JumpTableB.Type)
57 return false;
58
59 if (JumpTableA.getSize() != JumpTableB.getSize())
60 return false;
61
62 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
63 const MCSymbol *LabelA = JumpTableA.Entries[Index];
64 const MCSymbol *LabelB = JumpTableB.Entries[Index];
65
66 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(Label: LabelA);
67 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(Label: LabelB);
68
69 if (!TargetA || !TargetB) {
70 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
71 "no target basic block found");
72 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
73 "no target basic block found");
74
75 if (TargetA != TargetB)
76 return false;
77
78 continue;
79 }
80
81 assert(TargetA && TargetB && "cannot locate target block(s)");
82
83 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
84 return false;
85 }
86
87 return true;
88}
89
90/// Helper function that compares an instruction of this function to the
91/// given instruction of the given function. The functions should have
92/// identical CFG.
93template <class Compare>
94static bool isInstrEquivalentWith(const MCInst &InstA,
95 const BinaryBasicBlock &BBA,
96 const MCInst &InstB,
97 const BinaryBasicBlock &BBB, Compare Comp) {
98 if (InstA.getOpcode() != InstB.getOpcode())
99 return false;
100
101 const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
102
103 // In this function we check for special conditions:
104 //
105 // * instructions with landing pads
106 //
107 // Most of the common cases should be handled by MCPlus::equals()
108 // that compares regular instruction operands.
109 //
110 // NB: there's no need to compare jump table indirect jump instructions
111 // separately as jump tables are handled by comparing corresponding
112 // symbols.
113 const std::optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(Inst: InstA);
114 const std::optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(Inst: InstB);
115
116 if (EHInfoA || EHInfoB) {
117 if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
118 return false;
119
120 if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
121 return false;
122
123 if (EHInfoA && EHInfoB) {
124 // Action indices should match.
125 if (EHInfoA->second != EHInfoB->second)
126 return false;
127
128 if (!EHInfoA->first != !EHInfoB->first)
129 return false;
130
131 if (EHInfoA->first && EHInfoB->first) {
132 const BinaryBasicBlock *LPA = BBA.getLandingPad(Label: EHInfoA->first);
133 const BinaryBasicBlock *LPB = BBB.getLandingPad(Label: EHInfoB->first);
134 assert(LPA && LPB && "cannot locate landing pad(s)");
135
136 if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
137 return false;
138 }
139 }
140 }
141
142 return BC.MIB->equals(InstA, InstB, Comp);
143}
144
145/// Returns true if this function has identical code and CFG with
146/// the given function \p BF.
147///
148/// If \p CongruentSymbols is set to true, then symbolic operands that reference
149/// potentially identical but different functions are ignored during the
150/// comparison.
151static bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
152 bool CongruentSymbols) {
153 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
154
155 // Compare the two functions, one basic block at a time.
156 // Currently we require two identical basic blocks to have identical
157 // instruction sequences and the same index in their corresponding
158 // functions. The latter is important for CFG equality.
159
160 if (A.getLayout().block_size() != B.getLayout().block_size())
161 return false;
162
163 // Comparing multi-entry functions could be non-trivial.
164 if (A.isMultiEntry() || B.isMultiEntry())
165 return false;
166
167 if (A.hasIslandsInfo() || B.hasIslandsInfo())
168 return false;
169
170 // Process both functions in either DFS or existing order.
171 SmallVector<const BinaryBasicBlock *, 0> OrderA;
172 SmallVector<const BinaryBasicBlock *, 0> OrderB;
173 if (opts::ICFUseDFS) {
174 copy(Range: A.dfs(), Out: std::back_inserter(x&: OrderA));
175 copy(Range: B.dfs(), Out: std::back_inserter(x&: OrderB));
176 } else {
177 copy(Range: A.getLayout().blocks(), Out: std::back_inserter(x&: OrderA));
178 copy(Range: B.getLayout().blocks(), Out: std::back_inserter(x&: OrderB));
179 }
180
181 const BinaryContext &BC = A.getBinaryContext();
182
183 auto BBI = OrderB.begin();
184 for (const BinaryBasicBlock *BB : OrderA) {
185 const BinaryBasicBlock *OtherBB = *BBI;
186
187 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
188 return false;
189
190 // Compare successor basic blocks.
191 // NOTE: the comparison for jump tables is only partially verified here.
192 if (BB->succ_size() != OtherBB->succ_size())
193 return false;
194
195 auto SuccBBI = OtherBB->succ_begin();
196 for (const BinaryBasicBlock *SuccBB : BB->successors()) {
197 const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
198 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
199 return false;
200 ++SuccBBI;
201 }
202
203 // Compare all instructions including pseudos.
204 auto I = BB->begin(), E = BB->end();
205 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
206 while (I != E && OtherI != OtherE) {
207 // Compare symbols.
208 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
209 const MCSymbol *SymbolB) {
210 if (SymbolA == SymbolB)
211 return true;
212
213 // All local symbols are considered identical since they affect a
214 // control flow and we check the control flow separately.
215 // If a local symbol is escaped, then the function (potentially) has
216 // multiple entry points and we exclude such functions from
217 // comparison.
218 if (SymbolA->isTemporary() && SymbolB->isTemporary())
219 return true;
220
221 // Compare symbols as functions.
222 uint64_t EntryIDA = 0;
223 uint64_t EntryIDB = 0;
224 const BinaryFunction *FunctionA =
225 BC.getFunctionForSymbol(Symbol: SymbolA, EntryDesc: &EntryIDA);
226 const BinaryFunction *FunctionB =
227 BC.getFunctionForSymbol(Symbol: SymbolB, EntryDesc: &EntryIDB);
228 if (FunctionA && EntryIDA)
229 FunctionA = nullptr;
230 if (FunctionB && EntryIDB)
231 FunctionB = nullptr;
232 if (FunctionA && FunctionB) {
233 // Self-referencing functions and recursive calls.
234 if (FunctionA == &A && FunctionB == &B)
235 return true;
236
237 // Functions with different hash values can never become identical,
238 // hence A and B are different.
239 if (CongruentSymbols)
240 return FunctionA->getHash() == FunctionB->getHash();
241
242 return FunctionA == FunctionB;
243 }
244
245 // One of the symbols represents a function, the other one does not.
246 if (FunctionA != FunctionB)
247 return false;
248
249 // Check if symbols are jump tables.
250 const BinaryData *SIA = BC.getBinaryDataByName(Name: SymbolA->getName());
251 if (!SIA)
252 return false;
253 const BinaryData *SIB = BC.getBinaryDataByName(Name: SymbolB->getName());
254 if (!SIB)
255 return false;
256
257 assert((SIA->getAddress() != SIB->getAddress()) &&
258 "different symbols should not have the same value");
259
260 const JumpTable *JumpTableA =
261 A.getJumpTableContainingAddress(Address: SIA->getAddress());
262 if (!JumpTableA)
263 return false;
264
265 const JumpTable *JumpTableB =
266 B.getJumpTableContainingAddress(Address: SIB->getAddress());
267 if (!JumpTableB)
268 return false;
269
270 if ((SIA->getAddress() - JumpTableA->getAddress()) !=
271 (SIB->getAddress() - JumpTableB->getAddress()))
272 return false;
273
274 return equalJumpTables(JumpTableA: *JumpTableA, JumpTableB: *JumpTableB, FunctionA: A, FunctionB: B);
275 };
276
277 if (!isInstrEquivalentWith(InstA: *I, BBA: *BB, InstB: *OtherI, BBB: *OtherBB,
278 Comp: AreSymbolsIdentical))
279 return false;
280
281 ++I;
282 ++OtherI;
283 }
284
285 // One of the identical blocks may have a trailing unconditional jump that
286 // is ignored for CFG purposes.
287 const MCInst *TrailingInstr =
288 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr));
289 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(Inst: *TrailingInstr))
290 return false;
291
292 ++BBI;
293 }
294
295 // Compare exceptions action tables.
296 if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
297 A.getLSDATypeTable() != B.getLSDATypeTable() ||
298 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
299 return false;
300
301 return true;
302}
303
304// This hash table is used to identify identical functions. It maps
305// a function to a bucket of functions identical to it.
306struct KeyHash {
307 size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
308};
309
310/// Identify two congruent functions. Two functions are considered congruent,
311/// if they are identical/equal except for some of their instruction operands
312/// that reference potentially identical functions, i.e. functions that could
313/// be folded later. Congruent functions are candidates for folding in our
314/// iterative ICF algorithm.
315///
316/// Congruent functions are required to have identical hash.
317struct KeyCongruent {
318 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
319 if (A == B)
320 return true;
321 return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/true);
322 }
323};
324
325struct KeyEqual {
326 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
327 if (A == B)
328 return true;
329 return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/false);
330 }
331};
332
333typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
334 KeyHash, KeyCongruent>
335 CongruentBucketsMap;
336
337typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
338 KeyHash, KeyEqual>
339 IdenticalBucketsMap;
340
341namespace llvm {
342namespace bolt {
343
344Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
345 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
346 uint64_t NumFunctionsFolded = 0;
347 std::atomic<uint64_t> NumJTFunctionsFolded{0};
348 std::atomic<uint64_t> BytesSavedEstimate{0};
349 std::atomic<uint64_t> NumCalled{0};
350 std::atomic<uint64_t> NumFoldedLastIteration{0};
351 CongruentBucketsMap CongruentBuckets;
352
353 // Hash all the functions
354 auto hashFunctions = [&]() {
355 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
356 "ICF breakdown", opts::TimeICF);
357 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
358 // Make sure indices are in-order.
359 BF.getLayout().updateLayoutIndices();
360
361 // Pre-compute hash before pushing into hashtable.
362 // Hash instruction operands to minimize hash collisions.
363 BF.computeHash(
364 UseDFS: opts::ICFUseDFS, HashFunction: HashFunction::Default,
365 OperandHashFunc: [&BC](const MCOperand &Op) { return hashInstOperand(BC, Operand: Op); });
366 };
367
368 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
369 return !shouldOptimize(BF);
370 };
371
372 ParallelUtilities::runOnEachFunction(
373 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, SkipPredicate: SkipFunc,
374 LogName: "hashFunctions", /*ForceSequential*/ false, TasksPerThread: 2);
375 };
376
377 // Creates buckets with congruent functions - functions that potentially
378 // could be folded.
379 auto createCongruentBuckets = [&]() {
380 NamedRegionTimer CongruentBucketsTimer("congruent buckets",
381 "congruent buckets", "ICF breakdown",
382 "ICF breakdown", opts::TimeICF);
383 for (auto &BFI : BC.getBinaryFunctions()) {
384 BinaryFunction &BF = BFI.second;
385 if (!this->shouldOptimize(BF))
386 continue;
387 CongruentBuckets[&BF].emplace(args: &BF);
388 }
389 };
390
391 // Partition each set of congruent functions into sets of identical functions
392 // and fold them
393 auto performFoldingPass = [&]() {
394 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
395 "ICF breakdown", "ICF breakdown",
396 opts::TimeICF);
397 Timer SinglePass("single fold pass", "single fold pass");
398 LLVM_DEBUG(SinglePass.startTimer());
399
400 ThreadPoolInterface *ThPool;
401 if (!opts::NoThreads)
402 ThPool = &ParallelUtilities::getThreadPool();
403
404 // Fold identical functions within a single congruent bucket
405 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
406 Timer T("folding single congruent list", "folding single congruent list");
407 LLVM_DEBUG(T.startTimer());
408
409 // Identical functions go into the same bucket.
410 IdenticalBucketsMap IdenticalBuckets;
411 for (BinaryFunction *BF : Candidates) {
412 IdenticalBuckets[BF].emplace_back(args&: BF);
413 }
414
415 for (auto &IBI : IdenticalBuckets) {
416 // Functions identified as identical.
417 std::vector<BinaryFunction *> &Twins = IBI.second;
418 if (Twins.size() < 2)
419 continue;
420
421 // Fold functions. Keep the order consistent across invocations with
422 // different options.
423 llvm::stable_sort(
424 Range&: Twins, C: [](const BinaryFunction *A, const BinaryFunction *B) {
425 return A->getFunctionNumber() < B->getFunctionNumber();
426 });
427
428 BinaryFunction *ParentBF = Twins[0];
429 if (!ParentBF->hasFunctionsFoldedInto())
430 NumCalled += ParentBF->getKnownExecutionCount();
431 for (unsigned I = 1; I < Twins.size(); ++I) {
432 BinaryFunction *ChildBF = Twins[I];
433 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
434 << *ParentBF << '\n');
435
436 // Remove child function from the list of candidates.
437 auto FI = Candidates.find(x: ChildBF);
438 assert(FI != Candidates.end() &&
439 "function expected to be in the set");
440 Candidates.erase(position: FI);
441
442 // Fold the function and remove from the list of processed functions.
443 BytesSavedEstimate += ChildBF->getSize();
444 if (!ChildBF->hasFunctionsFoldedInto())
445 NumCalled += ChildBF->getKnownExecutionCount();
446 BC.foldFunction(ChildBF&: *ChildBF, ParentBF&: *ParentBF);
447
448 ++NumFoldedLastIteration;
449
450 if (ParentBF->hasJumpTables())
451 ++NumJTFunctionsFolded;
452 }
453 }
454
455 LLVM_DEBUG(T.stopTimer());
456 };
457
458 // Create a task for each congruent bucket
459 for (auto &Entry : CongruentBuckets) {
460 std::set<BinaryFunction *> &Bucket = Entry.second;
461 if (Bucket.size() < 2)
462 continue;
463
464 if (opts::NoThreads)
465 processSingleBucket(Bucket);
466 else
467 ThPool->async(F&: processSingleBucket, ArgList: std::ref(t&: Bucket));
468 }
469
470 if (!opts::NoThreads)
471 ThPool->wait();
472
473 LLVM_DEBUG(SinglePass.stopTimer());
474 };
475
476 hashFunctions();
477 createCongruentBuckets();
478
479 unsigned Iteration = 1;
480 // We repeat the pass until no new modifications happen.
481 do {
482 NumFoldedLastIteration = 0;
483 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
484
485 performFoldingPass();
486
487 NumFunctionsFolded += NumFoldedLastIteration;
488 ++Iteration;
489
490 } while (NumFoldedLastIteration > 0);
491
492 LLVM_DEBUG({
493 // Print functions that are congruent but not identical.
494 for (auto &CBI : CongruentBuckets) {
495 std::set<BinaryFunction *> &Candidates = CBI.second;
496 if (Candidates.size() < 2)
497 continue;
498 dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
499 << " functions (each of size " << (*Candidates.begin())->getSize()
500 << " bytes) are congruent but not identical:\n";
501 for (BinaryFunction *BF : Candidates) {
502 dbgs() << " " << *BF;
503 if (BF->getKnownExecutionCount())
504 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
505 dbgs() << '\n';
506 }
507 }
508 });
509
510 if (NumFunctionsFolded)
511 BC.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
512 << OriginalFunctionCount << " functions in " << Iteration
513 << " passes. " << NumJTFunctionsFolded
514 << " functions had jump tables.\n"
515 << "BOLT-INFO: Removing all identical functions will save "
516 << format(Fmt: "%.2lf", Vals: (double)BytesSavedEstimate / 1024)
517 << " KB of code space. Folded functions were called " << NumCalled
518 << " times based on profile.\n";
519
520 return Error::success();
521}
522
523} // namespace bolt
524} // namespace llvm
525

source code of bolt/lib/Passes/IdenticalCodeFolding.cpp