| 1 | //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the IdenticalCodeFolding class. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/IdenticalCodeFolding.h" |
| 14 | #include "bolt/Core/HashUtilities.h" |
| 15 | #include "bolt/Core/ParallelUtilities.h" |
| 16 | #include "llvm/ADT/SmallVector.h" |
| 17 | #include "llvm/Support/CommandLine.h" |
| 18 | #include "llvm/Support/FormatVariadic.h" |
| 19 | #include "llvm/Support/ThreadPool.h" |
| 20 | #include "llvm/Support/Timer.h" |
| 21 | #include <atomic> |
| 22 | #include <iterator> |
| 23 | #include <map> |
| 24 | #include <set> |
| 25 | #include <unordered_map> |
| 26 | |
| 27 | #define DEBUG_TYPE "bolt-icf" |
| 28 | |
| 29 | using namespace llvm; |
| 30 | using namespace bolt; |
| 31 | |
| 32 | namespace opts { |
| 33 | |
| 34 | extern cl::OptionCategory BoltOptCategory; |
| 35 | |
| 36 | static cl::opt<bool> |
| 37 | ICFUseDFS("icf-dfs" , cl::desc("use DFS ordering when using -icf option" ), |
| 38 | cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 39 | |
| 40 | static cl::opt<bool> |
| 41 | TimeICF("time-icf" , |
| 42 | cl::desc("time icf steps" ), |
| 43 | cl::ReallyHidden, |
| 44 | cl::ZeroOrMore, |
| 45 | cl::cat(BoltOptCategory)); |
| 46 | |
| 47 | cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false, |
| 48 | DeprecatedICFNumericOptionParser> |
| 49 | ICF("icf" , cl::desc("fold functions with identical code" ), |
| 50 | cl::init(Val: bolt::IdenticalCodeFolding::ICFLevel::None), |
| 51 | cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all" , |
| 52 | "Enable identical code folding" ), |
| 53 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1" , |
| 54 | "Enable identical code folding" ), |
| 55 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "" , |
| 56 | "Enable identical code folding" ), |
| 57 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, |
| 58 | "none" , |
| 59 | "Disable identical code folding (default)" ), |
| 60 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0" , |
| 61 | "Disable identical code folding (default)" ), |
| 62 | clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, |
| 63 | "safe" , "Enable safe identical code folding" )), |
| 64 | cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory)); |
| 65 | } // namespace opts |
| 66 | |
| 67 | bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const { |
| 68 | if (BF.hasUnknownControlFlow()) |
| 69 | return false; |
| 70 | if (BF.isFolded()) |
| 71 | return false; |
| 72 | if (BF.hasSDTMarker()) |
| 73 | return false; |
| 74 | if (BF.isPseudo()) |
| 75 | return false; |
| 76 | if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken()) |
| 77 | return false; |
| 78 | return BinaryFunctionPass::shouldOptimize(BF); |
| 79 | } |
| 80 | |
| 81 | /// Compare two jump tables in 2 functions. The function relies on consistent |
| 82 | /// ordering of basic blocks in both binary functions (e.g. DFS). |
| 83 | static bool equalJumpTables(const JumpTable &JumpTableA, |
| 84 | const JumpTable &JumpTableB, |
| 85 | const BinaryFunction &FunctionA, |
| 86 | const BinaryFunction &FunctionB) { |
| 87 | if (JumpTableA.EntrySize != JumpTableB.EntrySize) |
| 88 | return false; |
| 89 | |
| 90 | if (JumpTableA.Type != JumpTableB.Type) |
| 91 | return false; |
| 92 | |
| 93 | if (JumpTableA.getSize() != JumpTableB.getSize()) |
| 94 | return false; |
| 95 | |
| 96 | for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { |
| 97 | const MCSymbol *LabelA = JumpTableA.Entries[Index]; |
| 98 | const MCSymbol *LabelB = JumpTableB.Entries[Index]; |
| 99 | |
| 100 | const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(Label: LabelA); |
| 101 | const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(Label: LabelB); |
| 102 | |
| 103 | if (!TargetA || !TargetB) { |
| 104 | assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && |
| 105 | "no target basic block found" ); |
| 106 | assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && |
| 107 | "no target basic block found" ); |
| 108 | |
| 109 | if (TargetA != TargetB) |
| 110 | return false; |
| 111 | |
| 112 | continue; |
| 113 | } |
| 114 | |
| 115 | assert(TargetA && TargetB && "cannot locate target block(s)" ); |
| 116 | |
| 117 | if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) |
| 118 | return false; |
| 119 | } |
| 120 | |
| 121 | return true; |
| 122 | } |
| 123 | |
| 124 | /// Helper function that compares an instruction of this function to the |
| 125 | /// given instruction of the given function. The functions should have |
| 126 | /// identical CFG. |
| 127 | template <class Compare> |
| 128 | static bool isInstrEquivalentWith(const MCInst &InstA, |
| 129 | const BinaryBasicBlock &BBA, |
| 130 | const MCInst &InstB, |
| 131 | const BinaryBasicBlock &BBB, Compare Comp) { |
| 132 | if (InstA.getOpcode() != InstB.getOpcode()) |
| 133 | return false; |
| 134 | |
| 135 | const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); |
| 136 | |
| 137 | // In this function we check for special conditions: |
| 138 | // |
| 139 | // * instructions with landing pads |
| 140 | // |
| 141 | // Most of the common cases should be handled by MCPlus::equals() |
| 142 | // that compares regular instruction operands. |
| 143 | // |
| 144 | // NB: there's no need to compare jump table indirect jump instructions |
| 145 | // separately as jump tables are handled by comparing corresponding |
| 146 | // symbols. |
| 147 | const std::optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(Inst: InstA); |
| 148 | const std::optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(Inst: InstB); |
| 149 | |
| 150 | if (EHInfoA || EHInfoB) { |
| 151 | if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) |
| 152 | return false; |
| 153 | |
| 154 | if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) |
| 155 | return false; |
| 156 | |
| 157 | if (EHInfoA && EHInfoB) { |
| 158 | // Action indices should match. |
| 159 | if (EHInfoA->second != EHInfoB->second) |
| 160 | return false; |
| 161 | |
| 162 | if (!EHInfoA->first != !EHInfoB->first) |
| 163 | return false; |
| 164 | |
| 165 | if (EHInfoA->first && EHInfoB->first) { |
| 166 | const BinaryBasicBlock *LPA = BBA.getLandingPad(Label: EHInfoA->first); |
| 167 | const BinaryBasicBlock *LPB = BBB.getLandingPad(Label: EHInfoB->first); |
| 168 | assert(LPA && LPB && "cannot locate landing pad(s)" ); |
| 169 | |
| 170 | if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) |
| 171 | return false; |
| 172 | } |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | return BC.MIB->equals(InstA, InstB, Comp); |
| 177 | } |
| 178 | |
| 179 | /// Returns true if this function has identical code and CFG with |
| 180 | /// the given function \p BF. |
| 181 | /// |
| 182 | /// If \p CongruentSymbols is set to true, then symbolic operands that reference |
| 183 | /// potentially identical but different functions are ignored during the |
| 184 | /// comparison. |
| 185 | static bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, |
| 186 | bool CongruentSymbols) { |
| 187 | assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG" ); |
| 188 | |
| 189 | // Compare the two functions, one basic block at a time. |
| 190 | // Currently we require two identical basic blocks to have identical |
| 191 | // instruction sequences and the same index in their corresponding |
| 192 | // functions. The latter is important for CFG equality. |
| 193 | |
| 194 | if (A.getLayout().block_size() != B.getLayout().block_size()) |
| 195 | return false; |
| 196 | |
| 197 | // Comparing multi-entry functions could be non-trivial. |
| 198 | if (A.isMultiEntry() || B.isMultiEntry()) |
| 199 | return false; |
| 200 | |
| 201 | if (A.hasIslandsInfo() || B.hasIslandsInfo()) |
| 202 | return false; |
| 203 | |
| 204 | // Process both functions in either DFS or existing order. |
| 205 | SmallVector<const BinaryBasicBlock *, 0> OrderA; |
| 206 | SmallVector<const BinaryBasicBlock *, 0> OrderB; |
| 207 | if (opts::ICFUseDFS) { |
| 208 | copy(Range: A.dfs(), Out: std::back_inserter(x&: OrderA)); |
| 209 | copy(Range: B.dfs(), Out: std::back_inserter(x&: OrderB)); |
| 210 | } else { |
| 211 | copy(Range: A.getLayout().blocks(), Out: std::back_inserter(x&: OrderA)); |
| 212 | copy(Range: B.getLayout().blocks(), Out: std::back_inserter(x&: OrderB)); |
| 213 | } |
| 214 | |
| 215 | const BinaryContext &BC = A.getBinaryContext(); |
| 216 | |
| 217 | auto BBI = OrderB.begin(); |
| 218 | for (const BinaryBasicBlock *BB : OrderA) { |
| 219 | const BinaryBasicBlock *OtherBB = *BBI; |
| 220 | |
| 221 | if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) |
| 222 | return false; |
| 223 | |
| 224 | // Compare successor basic blocks. |
| 225 | // NOTE: the comparison for jump tables is only partially verified here. |
| 226 | if (BB->succ_size() != OtherBB->succ_size()) |
| 227 | return false; |
| 228 | |
| 229 | auto SuccBBI = OtherBB->succ_begin(); |
| 230 | for (const BinaryBasicBlock *SuccBB : BB->successors()) { |
| 231 | const BinaryBasicBlock *SuccOtherBB = *SuccBBI; |
| 232 | if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) |
| 233 | return false; |
| 234 | ++SuccBBI; |
| 235 | } |
| 236 | |
| 237 | // Compare all instructions including pseudos. |
| 238 | auto I = BB->begin(), E = BB->end(); |
| 239 | auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); |
| 240 | while (I != E && OtherI != OtherE) { |
| 241 | // Compare symbols. |
| 242 | auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, |
| 243 | const MCSymbol *SymbolB) { |
| 244 | if (SymbolA == SymbolB) |
| 245 | return true; |
| 246 | |
| 247 | // All local symbols are considered identical since they affect a |
| 248 | // control flow and we check the control flow separately. |
| 249 | // If a local symbol is escaped, then the function (potentially) has |
| 250 | // multiple entry points and we exclude such functions from |
| 251 | // comparison. |
| 252 | if (SymbolA->isTemporary() && SymbolB->isTemporary()) |
| 253 | return true; |
| 254 | |
| 255 | // Compare symbols as functions. |
| 256 | uint64_t EntryIDA = 0; |
| 257 | uint64_t EntryIDB = 0; |
| 258 | const BinaryFunction *FunctionA = |
| 259 | BC.getFunctionForSymbol(Symbol: SymbolA, EntryDesc: &EntryIDA); |
| 260 | const BinaryFunction *FunctionB = |
| 261 | BC.getFunctionForSymbol(Symbol: SymbolB, EntryDesc: &EntryIDB); |
| 262 | if (FunctionA && EntryIDA) |
| 263 | FunctionA = nullptr; |
| 264 | if (FunctionB && EntryIDB) |
| 265 | FunctionB = nullptr; |
| 266 | if (FunctionA && FunctionB) { |
| 267 | // Self-referencing functions and recursive calls. |
| 268 | if (FunctionA == &A && FunctionB == &B) |
| 269 | return true; |
| 270 | |
| 271 | // Functions with different hash values can never become identical, |
| 272 | // hence A and B are different. |
| 273 | if (CongruentSymbols) |
| 274 | return FunctionA->getHash() == FunctionB->getHash(); |
| 275 | |
| 276 | return FunctionA == FunctionB; |
| 277 | } |
| 278 | |
| 279 | // One of the symbols represents a function, the other one does not. |
| 280 | if (FunctionA != FunctionB) |
| 281 | return false; |
| 282 | |
| 283 | // Check if symbols are jump tables. |
| 284 | const BinaryData *SIA = BC.getBinaryDataByName(Name: SymbolA->getName()); |
| 285 | if (!SIA) |
| 286 | return false; |
| 287 | const BinaryData *SIB = BC.getBinaryDataByName(Name: SymbolB->getName()); |
| 288 | if (!SIB) |
| 289 | return false; |
| 290 | |
| 291 | assert((SIA->getAddress() != SIB->getAddress()) && |
| 292 | "different symbols should not have the same value" ); |
| 293 | |
| 294 | const JumpTable *JumpTableA = |
| 295 | A.getJumpTableContainingAddress(Address: SIA->getAddress()); |
| 296 | if (!JumpTableA) |
| 297 | return false; |
| 298 | |
| 299 | const JumpTable *JumpTableB = |
| 300 | B.getJumpTableContainingAddress(Address: SIB->getAddress()); |
| 301 | if (!JumpTableB) |
| 302 | return false; |
| 303 | |
| 304 | if ((SIA->getAddress() - JumpTableA->getAddress()) != |
| 305 | (SIB->getAddress() - JumpTableB->getAddress())) |
| 306 | return false; |
| 307 | |
| 308 | return equalJumpTables(JumpTableA: *JumpTableA, JumpTableB: *JumpTableB, FunctionA: A, FunctionB: B); |
| 309 | }; |
| 310 | |
| 311 | if (!isInstrEquivalentWith(InstA: *I, BBA: *BB, InstB: *OtherI, BBB: *OtherBB, |
| 312 | Comp: AreSymbolsIdentical)) |
| 313 | return false; |
| 314 | |
| 315 | ++I; |
| 316 | ++OtherI; |
| 317 | } |
| 318 | |
| 319 | // One of the identical blocks may have a trailing unconditional jump that |
| 320 | // is ignored for CFG purposes. |
| 321 | const MCInst *TrailingInstr = |
| 322 | (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : nullptr)); |
| 323 | if (TrailingInstr && !BC.MIB->isUnconditionalBranch(Inst: *TrailingInstr)) |
| 324 | return false; |
| 325 | |
| 326 | ++BBI; |
| 327 | } |
| 328 | |
| 329 | // Compare exceptions action tables. |
| 330 | if (A.getLSDAActionTable() != B.getLSDAActionTable() || |
| 331 | A.getLSDATypeTable() != B.getLSDATypeTable() || |
| 332 | A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) |
| 333 | return false; |
| 334 | |
| 335 | return true; |
| 336 | } |
| 337 | |
| 338 | // This hash table is used to identify identical functions. It maps |
| 339 | // a function to a bucket of functions identical to it. |
| 340 | struct KeyHash { |
| 341 | size_t operator()(const BinaryFunction *F) const { return F->getHash(); } |
| 342 | }; |
| 343 | |
| 344 | /// Identify two congruent functions. Two functions are considered congruent, |
| 345 | /// if they are identical/equal except for some of their instruction operands |
| 346 | /// that reference potentially identical functions, i.e. functions that could |
| 347 | /// be folded later. Congruent functions are candidates for folding in our |
| 348 | /// iterative ICF algorithm. |
| 349 | /// |
| 350 | /// Congruent functions are required to have identical hash. |
| 351 | struct KeyCongruent { |
| 352 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
| 353 | if (A == B) |
| 354 | return true; |
| 355 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/true); |
| 356 | } |
| 357 | }; |
| 358 | |
| 359 | struct KeyEqual { |
| 360 | bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { |
| 361 | if (A == B) |
| 362 | return true; |
| 363 | return isIdenticalWith(A: *A, B: *B, /*CongruentSymbols=*/false); |
| 364 | } |
| 365 | }; |
| 366 | |
| 367 | typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, |
| 368 | KeyHash, KeyCongruent> |
| 369 | CongruentBucketsMap; |
| 370 | |
| 371 | typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, |
| 372 | KeyHash, KeyEqual> |
| 373 | IdenticalBucketsMap; |
| 374 | |
| 375 | namespace llvm { |
| 376 | namespace bolt { |
| 377 | void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) { |
| 378 | for (const auto &[Address, Data] : BC.getBinaryData()) { |
| 379 | // Filter out all symbols that are not vtables. |
| 380 | if (!Data->getName().starts_with(Prefix: "_ZTV" )) |
| 381 | continue; |
| 382 | for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8) |
| 383 | setAddressUsedInVTable(I); |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) { |
| 388 | initVTableReferences(BC); |
| 389 | // For static relocations there should be a symbol for function references. |
| 390 | for (const BinarySection &Sec : BC.sections()) { |
| 391 | if (!Sec.hasSectionRef() || !Sec.isData()) |
| 392 | continue; |
| 393 | for (const auto &Rel : Sec.relocations()) { |
| 394 | const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); |
| 395 | if (isAddressInVTable(Address: RelAddr)) |
| 396 | continue; |
| 397 | if (BinaryFunction *BF = BC.getFunctionForSymbol(Symbol: Rel.Symbol)) |
| 398 | BF->setHasAddressTaken(true); |
| 399 | } |
| 400 | // For dynamic relocations there are two cases: |
| 401 | // 1: No symbol and only addend. |
| 402 | // 2: There is a symbol, but it does not references a function in a binary. |
| 403 | for (const auto &Rel : Sec.dynamicRelocations()) { |
| 404 | const uint64_t RelAddr = Rel.Offset + Sec.getAddress(); |
| 405 | if (isAddressInVTable(Address: RelAddr)) |
| 406 | continue; |
| 407 | if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Address: Rel.Addend)) |
| 408 | BF->setHasAddressTaken(true); |
| 409 | } |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) { |
| 414 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
| 415 | for (const BinaryBasicBlock &BB : BF) |
| 416 | for (const MCInst &Inst : BB) |
| 417 | if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst))) |
| 418 | BF.analyzeInstructionForFuncReference(Inst); |
| 419 | }; |
| 420 | ParallelUtilities::PredicateTy SkipFunc = |
| 421 | [&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); }; |
| 422 | ParallelUtilities::runOnEachFunction( |
| 423 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFunction: WorkFun, |
| 424 | SkipPredicate: SkipFunc, LogName: "markUnsafe" ); |
| 425 | |
| 426 | LLVM_DEBUG({ |
| 427 | for (const auto &BFIter : BC.getBinaryFunctions()) { |
| 428 | if (!BFIter.second.hasAddressTaken()) |
| 429 | continue; |
| 430 | dbgs() << "BOLT-DEBUG: skipping function with reference taken " |
| 431 | << BFIter.second.getOneName() << '\n'; |
| 432 | } |
| 433 | }); |
| 434 | } |
| 435 | |
| 436 | void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) { |
| 437 | NamedRegionTimer MarkFunctionsUnsafeToFoldTimer( |
| 438 | "markFunctionsUnsafeToFold" , "markFunctionsUnsafeToFold" , "ICF breakdown" , |
| 439 | "ICF breakdown" , opts::TimeICF); |
| 440 | if (!BC.isX86()) |
| 441 | BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n" ; |
| 442 | analyzeDataRelocations(BC); |
| 443 | analyzeFunctions(BC); |
| 444 | } |
| 445 | |
| 446 | Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { |
| 447 | const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); |
| 448 | uint64_t NumFunctionsFolded = 0; |
| 449 | std::atomic<uint64_t> NumJTFunctionsFolded{0}; |
| 450 | std::atomic<uint64_t> BytesSavedEstimate{0}; |
| 451 | std::atomic<uint64_t> NumCalled{0}; |
| 452 | std::atomic<uint64_t> NumFoldedLastIteration{0}; |
| 453 | CongruentBucketsMap CongruentBuckets; |
| 454 | |
| 455 | // Hash all the functions |
| 456 | auto hashFunctions = [&]() { |
| 457 | NamedRegionTimer HashFunctionsTimer("hashing" , "hashing" , "ICF breakdown" , |
| 458 | "ICF breakdown" , opts::TimeICF); |
| 459 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
| 460 | // Make sure indices are in-order. |
| 461 | if (opts::ICFUseDFS) |
| 462 | BF.getLayout().updateLayoutIndices(Order: BF.dfs()); |
| 463 | else |
| 464 | BF.getLayout().updateLayoutIndices(); |
| 465 | |
| 466 | // Pre-compute hash before pushing into hashtable. |
| 467 | // Hash instruction operands to minimize hash collisions. |
| 468 | BF.computeHash( |
| 469 | UseDFS: opts::ICFUseDFS, HashFunction: HashFunction::Default, |
| 470 | OperandHashFunc: [&BC](const MCOperand &Op) { return hashInstOperand(BC, Operand: Op); }); |
| 471 | }; |
| 472 | |
| 473 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
| 474 | return !shouldOptimize(BF); |
| 475 | }; |
| 476 | |
| 477 | ParallelUtilities::runOnEachFunction( |
| 478 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, SkipPredicate: SkipFunc, |
| 479 | LogName: "hashFunctions" , /*ForceSequential*/ false, TasksPerThread: 2); |
| 480 | }; |
| 481 | |
| 482 | // Creates buckets with congruent functions - functions that potentially |
| 483 | // could be folded. |
| 484 | auto createCongruentBuckets = [&]() { |
| 485 | NamedRegionTimer CongruentBucketsTimer("congruent buckets" , |
| 486 | "congruent buckets" , "ICF breakdown" , |
| 487 | "ICF breakdown" , opts::TimeICF); |
| 488 | for (auto &BFI : BC.getBinaryFunctions()) { |
| 489 | BinaryFunction &BF = BFI.second; |
| 490 | if (!shouldOptimize(BF)) |
| 491 | continue; |
| 492 | CongruentBuckets[&BF].emplace(args: &BF); |
| 493 | } |
| 494 | }; |
| 495 | |
| 496 | // Partition each set of congruent functions into sets of identical functions |
| 497 | // and fold them |
| 498 | auto performFoldingPass = [&]() { |
| 499 | NamedRegionTimer FoldingPassesTimer("folding passes" , "folding passes" , |
| 500 | "ICF breakdown" , "ICF breakdown" , |
| 501 | opts::TimeICF); |
| 502 | Timer SinglePass("single fold pass" , "single fold pass" ); |
| 503 | LLVM_DEBUG(SinglePass.startTimer()); |
| 504 | |
| 505 | ThreadPoolInterface *ThPool; |
| 506 | if (!opts::NoThreads) |
| 507 | ThPool = &ParallelUtilities::getThreadPool(); |
| 508 | |
| 509 | // Fold identical functions within a single congruent bucket |
| 510 | auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { |
| 511 | Timer T("folding single congruent list" , "folding single congruent list" ); |
| 512 | LLVM_DEBUG(T.startTimer()); |
| 513 | |
| 514 | // Identical functions go into the same bucket. |
| 515 | IdenticalBucketsMap IdenticalBuckets; |
| 516 | for (BinaryFunction *BF : Candidates) { |
| 517 | IdenticalBuckets[BF].emplace_back(args&: BF); |
| 518 | } |
| 519 | |
| 520 | for (auto &IBI : IdenticalBuckets) { |
| 521 | // Functions identified as identical. |
| 522 | std::vector<BinaryFunction *> &Twins = IBI.second; |
| 523 | if (Twins.size() < 2) |
| 524 | continue; |
| 525 | |
| 526 | // Fold functions. Keep the order consistent across invocations with |
| 527 | // different options. |
| 528 | llvm::stable_sort( |
| 529 | Range&: Twins, C: [](const BinaryFunction *A, const BinaryFunction *B) { |
| 530 | return A->getFunctionNumber() < B->getFunctionNumber(); |
| 531 | }); |
| 532 | |
| 533 | BinaryFunction *ParentBF = Twins[0]; |
| 534 | if (!ParentBF->hasFunctionsFoldedInto()) |
| 535 | NumCalled += ParentBF->getKnownExecutionCount(); |
| 536 | for (unsigned I = 1; I < Twins.size(); ++I) { |
| 537 | BinaryFunction *ChildBF = Twins[I]; |
| 538 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " |
| 539 | << *ParentBF << '\n'); |
| 540 | |
| 541 | // Remove child function from the list of candidates. |
| 542 | auto FI = Candidates.find(x: ChildBF); |
| 543 | assert(FI != Candidates.end() && |
| 544 | "function expected to be in the set" ); |
| 545 | Candidates.erase(position: FI); |
| 546 | |
| 547 | // Fold the function and remove from the list of processed functions. |
| 548 | BytesSavedEstimate += ChildBF->getSize(); |
| 549 | if (!ChildBF->hasFunctionsFoldedInto()) |
| 550 | NumCalled += ChildBF->getKnownExecutionCount(); |
| 551 | BC.foldFunction(ChildBF&: *ChildBF, ParentBF&: *ParentBF); |
| 552 | |
| 553 | ++NumFoldedLastIteration; |
| 554 | |
| 555 | if (ParentBF->hasJumpTables()) |
| 556 | ++NumJTFunctionsFolded; |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | LLVM_DEBUG(T.stopTimer()); |
| 561 | }; |
| 562 | |
| 563 | // Create a task for each congruent bucket |
| 564 | for (auto &Entry : CongruentBuckets) { |
| 565 | std::set<BinaryFunction *> &Bucket = Entry.second; |
| 566 | if (Bucket.size() < 2) |
| 567 | continue; |
| 568 | |
| 569 | if (opts::NoThreads) |
| 570 | processSingleBucket(Bucket); |
| 571 | else |
| 572 | ThPool->async(F&: processSingleBucket, ArgList: std::ref(t&: Bucket)); |
| 573 | } |
| 574 | |
| 575 | if (!opts::NoThreads) |
| 576 | ThPool->wait(); |
| 577 | |
| 578 | LLVM_DEBUG(SinglePass.stopTimer()); |
| 579 | }; |
| 580 | if (opts::ICF == ICFLevel::Safe) |
| 581 | markFunctionsUnsafeToFold(BC); |
| 582 | hashFunctions(); |
| 583 | createCongruentBuckets(); |
| 584 | |
| 585 | unsigned Iteration = 1; |
| 586 | // We repeat the pass until no new modifications happen. |
| 587 | do { |
| 588 | NumFoldedLastIteration = 0; |
| 589 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n" ); |
| 590 | |
| 591 | performFoldingPass(); |
| 592 | |
| 593 | NumFunctionsFolded += NumFoldedLastIteration; |
| 594 | ++Iteration; |
| 595 | |
| 596 | } while (NumFoldedLastIteration > 0); |
| 597 | |
| 598 | LLVM_DEBUG({ |
| 599 | // Print functions that are congruent but not identical. |
| 600 | for (auto &CBI : CongruentBuckets) { |
| 601 | std::set<BinaryFunction *> &Candidates = CBI.second; |
| 602 | if (Candidates.size() < 2) |
| 603 | continue; |
| 604 | dbgs() << "BOLT-DEBUG: the following " << Candidates.size() |
| 605 | << " functions (each of size " << (*Candidates.begin())->getSize() |
| 606 | << " bytes) are congruent but not identical:\n" ; |
| 607 | for (BinaryFunction *BF : Candidates) { |
| 608 | dbgs() << " " << *BF; |
| 609 | if (BF->getKnownExecutionCount()) |
| 610 | dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)" ; |
| 611 | dbgs() << '\n'; |
| 612 | } |
| 613 | } |
| 614 | }); |
| 615 | |
| 616 | if (NumFunctionsFolded) |
| 617 | BC.outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " |
| 618 | << OriginalFunctionCount << " functions in " << Iteration |
| 619 | << " passes. " << NumJTFunctionsFolded |
| 620 | << " functions had jump tables.\n" |
| 621 | << "BOLT-INFO: Removing all identical functions will save " |
| 622 | << format(Fmt: "%.2lf" , Vals: (double)BytesSavedEstimate / 1024) |
| 623 | << " KB of code space. Folded functions were called " << NumCalled |
| 624 | << " times based on profile.\n" ; |
| 625 | |
| 626 | return Error::success(); |
| 627 | } |
| 628 | |
| 629 | } // namespace bolt |
| 630 | } // namespace llvm |
| 631 | |