| 1 | //===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the Inliner class used for inlining binary functions. |
| 10 | // |
| 11 | // The current inliner has a limited callee support |
| 12 | // (see Inliner::getInliningInfo() for the most up-to-date details): |
| 13 | // |
| 14 | // * No exception handling |
| 15 | // * No jump tables |
| 16 | // * Single entry point |
| 17 | // * CFI update not supported - breaks unwinding |
| 18 | // * Regular Call Sites: |
| 19 | // - only leaf functions (or callees with only tail calls) |
| 20 | // * no invokes (they can't be tail calls) |
| 21 | // - no direct use of %rsp |
| 22 | // * Tail Call Sites: |
| 23 | // - since the stack is unmodified, the regular call limitations are lifted |
| 24 | // |
| 25 | //===----------------------------------------------------------------------===// |
| 26 | |
| 27 | #include "bolt/Passes/Inliner.h" |
| 28 | #include "bolt/Core/MCPlus.h" |
| 29 | #include "llvm/Support/CommandLine.h" |
| 30 | |
| 31 | #define DEBUG_TYPE "bolt-inliner" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | |
| 35 | namespace opts { |
| 36 | |
| 37 | extern cl::OptionCategory BoltOptCategory; |
| 38 | |
| 39 | static cl::opt<bool> |
| 40 | AdjustProfile("inline-ap" , |
| 41 | cl::desc("adjust function profile after inlining" ), |
| 42 | cl::cat(BoltOptCategory)); |
| 43 | |
| 44 | static cl::list<std::string> |
| 45 | ForceInlineFunctions("force-inline" , |
| 46 | cl::CommaSeparated, |
| 47 | cl::desc("list of functions to always consider for inlining" ), |
| 48 | cl::value_desc("func1,func2,func3,..." ), |
| 49 | cl::Hidden, |
| 50 | cl::cat(BoltOptCategory)); |
| 51 | |
| 52 | static cl::list<std::string> SkipInlineFunctions( |
| 53 | "skip-inline" , cl::CommaSeparated, |
| 54 | cl::desc("list of functions to never consider for inlining" ), |
| 55 | cl::value_desc("func1,func2,func3,..." ), cl::Hidden, |
| 56 | cl::cat(BoltOptCategory)); |
| 57 | |
| 58 | static cl::opt<bool> InlineAll("inline-all" , cl::desc("inline all functions" ), |
| 59 | cl::cat(BoltOptCategory)); |
| 60 | |
| 61 | static cl::opt<bool> InlineIgnoreLeafCFI( |
| 62 | "inline-ignore-leaf-cfi" , |
| 63 | cl::desc("inline leaf functions with CFI programs (can break unwinding)" ), |
| 64 | cl::init(Val: true), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 65 | |
| 66 | static cl::opt<bool> InlineIgnoreCFI( |
| 67 | "inline-ignore-cfi" , |
| 68 | cl::desc( |
| 69 | "inline functions with CFI programs (can break exception handling)" ), |
| 70 | cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 71 | |
| 72 | static cl::opt<unsigned> |
| 73 | InlineLimit("inline-limit" , |
| 74 | cl::desc("maximum number of call sites to inline" ), cl::init(Val: 0), |
| 75 | cl::Hidden, cl::cat(BoltOptCategory)); |
| 76 | |
| 77 | static cl::opt<unsigned> |
| 78 | InlineMaxIters("inline-max-iters" , |
| 79 | cl::desc("maximum number of inline iterations" ), cl::init(Val: 3), |
| 80 | cl::Hidden, cl::cat(BoltOptCategory)); |
| 81 | |
| 82 | static cl::opt<bool> InlineSmallFunctions( |
| 83 | "inline-small-functions" , |
| 84 | cl::desc("inline functions if increase in size is less than defined by " |
| 85 | "-inline-small-functions-bytes" ), |
| 86 | cl::cat(BoltOptCategory)); |
| 87 | |
| 88 | static cl::opt<unsigned> InlineSmallFunctionsBytes( |
| 89 | "inline-small-functions-bytes" , |
| 90 | cl::desc("max number of bytes for the function to be considered small for " |
| 91 | "inlining purposes" ), |
| 92 | cl::init(Val: 4), cl::Hidden, cl::cat(BoltOptCategory)); |
| 93 | |
| 94 | static cl::opt<bool> NoInline( |
| 95 | "no-inline" , |
| 96 | cl::desc("disable all inlining (overrides other inlining options)" ), |
| 97 | cl::cat(BoltOptCategory)); |
| 98 | |
| 99 | /// This function returns true if any of inlining options are specified and the |
| 100 | /// inlining pass should be executed. Whenever a new inlining option is added, |
| 101 | /// this function should reflect the change. |
| 102 | bool inliningEnabled() { |
| 103 | return !NoInline && |
| 104 | (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty()); |
| 105 | } |
| 106 | |
| 107 | bool mustConsider(const llvm::bolt::BinaryFunction &Function) { |
| 108 | for (std::string &Name : opts::ForceInlineFunctions) |
| 109 | if (Function.hasName(FunctionName: Name)) |
| 110 | return true; |
| 111 | return false; |
| 112 | } |
| 113 | |
| 114 | bool mustSkip(const llvm::bolt::BinaryFunction &Function) { |
| 115 | return llvm::any_of(Range&: opts::SkipInlineFunctions, P: [&](const std::string &Name) { |
| 116 | return Function.hasName(FunctionName: Name); |
| 117 | }); |
| 118 | } |
| 119 | |
| 120 | void syncOptions() { |
| 121 | if (opts::InlineIgnoreCFI) |
| 122 | opts::InlineIgnoreLeafCFI = true; |
| 123 | |
| 124 | if (opts::InlineAll) |
| 125 | opts::InlineSmallFunctions = true; |
| 126 | } |
| 127 | |
| 128 | } // namespace opts |
| 129 | |
| 130 | namespace llvm { |
| 131 | namespace bolt { |
| 132 | |
| 133 | uint64_t Inliner::SizeOfCallInst; |
| 134 | uint64_t Inliner::SizeOfTailCallInst; |
| 135 | |
| 136 | uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) { |
| 137 | if (SizeOfCallInst) |
| 138 | return SizeOfCallInst; |
| 139 | |
| 140 | MCInst Inst; |
| 141 | BC.MIB->createCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get()); |
| 142 | SizeOfCallInst = BC.computeInstructionSize(Inst); |
| 143 | |
| 144 | return SizeOfCallInst; |
| 145 | } |
| 146 | |
| 147 | uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) { |
| 148 | if (SizeOfTailCallInst) |
| 149 | return SizeOfTailCallInst; |
| 150 | |
| 151 | MCInst Inst; |
| 152 | BC.MIB->createTailCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get()); |
| 153 | SizeOfTailCallInst = BC.computeInstructionSize(Inst); |
| 154 | |
| 155 | return SizeOfTailCallInst; |
| 156 | } |
| 157 | |
| 158 | InliningInfo getInliningInfo(const BinaryFunction &BF) { |
| 159 | const BinaryContext &BC = BF.getBinaryContext(); |
| 160 | bool DirectSP = false; |
| 161 | bool HasCFI = false; |
| 162 | bool IsLeaf = true; |
| 163 | |
| 164 | // Perform necessary checks unless the option overrides it. |
| 165 | if (!opts::mustConsider(Function: BF)) { |
| 166 | if (BF.hasSDTMarker()) |
| 167 | return INL_NONE; |
| 168 | |
| 169 | if (BF.hasEHRanges()) |
| 170 | return INL_NONE; |
| 171 | |
| 172 | if (BF.isMultiEntry()) |
| 173 | return INL_NONE; |
| 174 | |
| 175 | if (BF.hasJumpTables()) |
| 176 | return INL_NONE; |
| 177 | |
| 178 | const MCPhysReg SPReg = BC.MIB->getStackPointer(); |
| 179 | for (const BinaryBasicBlock &BB : BF) { |
| 180 | for (const MCInst &Inst : BB) { |
| 181 | // Tail calls are marked as implicitly using the stack pointer and they |
| 182 | // could be inlined. |
| 183 | if (BC.MIB->isTailCall(Inst)) |
| 184 | break; |
| 185 | |
| 186 | if (BC.MIB->isCFI(Inst)) { |
| 187 | HasCFI = true; |
| 188 | continue; |
| 189 | } |
| 190 | |
| 191 | if (BC.MIB->isCall(Inst)) |
| 192 | IsLeaf = false; |
| 193 | |
| 194 | // Push/pop instructions are straightforward to handle. |
| 195 | if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst)) |
| 196 | continue; |
| 197 | |
| 198 | DirectSP |= BC.MIB->hasDefOfPhysReg(MI: Inst, Reg: SPReg) || |
| 199 | BC.MIB->hasUseOfPhysReg(MI: Inst, Reg: SPReg); |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | if (HasCFI) { |
| 205 | if (!opts::InlineIgnoreLeafCFI) |
| 206 | return INL_NONE; |
| 207 | |
| 208 | if (!IsLeaf && !opts::InlineIgnoreCFI) |
| 209 | return INL_NONE; |
| 210 | } |
| 211 | |
| 212 | InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY); |
| 213 | |
| 214 | size_t Size = BF.estimateSize(); |
| 215 | |
| 216 | Info.SizeAfterInlining = Size; |
| 217 | Info.SizeAfterTailCallInlining = Size; |
| 218 | |
| 219 | // Handle special case of the known size reduction. |
| 220 | if (BF.size() == 1) { |
| 221 | // For a regular call the last return instruction could be removed |
| 222 | // (or converted to a branch). |
| 223 | const MCInst *LastInst = BF.back().getLastNonPseudoInstr(); |
| 224 | if (LastInst && BC.MIB->isReturn(Inst: *LastInst) && |
| 225 | !BC.MIB->isTailCall(Inst: *LastInst)) { |
| 226 | const uint64_t RetInstSize = BC.computeInstructionSize(Inst: *LastInst); |
| 227 | assert(Size >= RetInstSize); |
| 228 | Info.SizeAfterInlining -= RetInstSize; |
| 229 | } |
| 230 | } |
| 231 | |
| 232 | return Info; |
| 233 | } |
| 234 | |
| 235 | void Inliner::findInliningCandidates(BinaryContext &BC) { |
| 236 | for (const auto &BFI : BC.getBinaryFunctions()) { |
| 237 | const BinaryFunction &Function = BFI.second; |
| 238 | if (!shouldOptimize(BF: Function) || opts::mustSkip(Function)) |
| 239 | continue; |
| 240 | const InliningInfo InlInfo = getInliningInfo(BF: Function); |
| 241 | if (InlInfo.Type != INL_NONE) |
| 242 | InliningCandidates[&Function] = InlInfo; |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator> |
| 247 | Inliner::inlineCall(BinaryBasicBlock &CallerBB, |
| 248 | BinaryBasicBlock::iterator CallInst, |
| 249 | const BinaryFunction &Callee) { |
| 250 | BinaryFunction &CallerFunction = *CallerBB.getFunction(); |
| 251 | BinaryContext &BC = CallerFunction.getBinaryContext(); |
| 252 | auto &MIB = *BC.MIB; |
| 253 | |
| 254 | assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call" ); |
| 255 | assert(!Callee.isMultiEntry() && |
| 256 | "cannot inline function with multiple entries" ); |
| 257 | assert(!Callee.hasJumpTables() && |
| 258 | "cannot inline function with jump table(s)" ); |
| 259 | |
| 260 | // Get information about the call site. |
| 261 | const bool CSIsInvoke = BC.MIB->isInvoke(Inst: *CallInst); |
| 262 | const bool CSIsTailCall = BC.MIB->isTailCall(Inst: *CallInst); |
| 263 | const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(Inst: *CallInst); |
| 264 | const std::optional<MCPlus::MCLandingPad> CSEHInfo = |
| 265 | BC.MIB->getEHInfo(Inst: *CallInst); |
| 266 | |
| 267 | // Split basic block at the call site if there will be more incoming edges |
| 268 | // coming from the callee. |
| 269 | BinaryBasicBlock *FirstInlinedBB = &CallerBB; |
| 270 | if (Callee.front().pred_size() && CallInst != CallerBB.begin()) { |
| 271 | FirstInlinedBB = CallerBB.splitAt(II: CallInst); |
| 272 | CallInst = FirstInlinedBB->begin(); |
| 273 | } |
| 274 | |
| 275 | // Split basic block after the call instruction unless the callee is trivial |
| 276 | // (i.e. consists of a single basic block). If necessary, obtain a basic block |
| 277 | // for return instructions in the callee to redirect to. |
| 278 | BinaryBasicBlock *NextBB = nullptr; |
| 279 | if (Callee.size() > 1) { |
| 280 | if (std::next(x: CallInst) != FirstInlinedBB->end()) |
| 281 | NextBB = FirstInlinedBB->splitAt(II: std::next(x: CallInst)); |
| 282 | else |
| 283 | NextBB = FirstInlinedBB->getSuccessor(); |
| 284 | } |
| 285 | if (NextBB) |
| 286 | FirstInlinedBB->removeSuccessor(Succ: NextBB); |
| 287 | |
| 288 | // Remove the call instruction. |
| 289 | auto InsertII = FirstInlinedBB->eraseInstruction(II: CallInst); |
| 290 | |
| 291 | double ProfileRatio = 0; |
| 292 | if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount()) |
| 293 | ProfileRatio = |
| 294 | (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount; |
| 295 | |
| 296 | // Save execution count of the first block as we don't want it to change |
| 297 | // later due to profile adjustment rounding errors. |
| 298 | const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount(); |
| 299 | |
| 300 | // Copy basic blocks and maintain a map from their origin. |
| 301 | std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap; |
| 302 | InlinedBBMap[&Callee.front()] = FirstInlinedBB; |
| 303 | for (const BinaryBasicBlock &BB : llvm::drop_begin(RangeOrContainer: Callee)) { |
| 304 | BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock(); |
| 305 | InlinedBBMap[&BB] = InlinedBB; |
| 306 | InlinedBB->setCFIState(FirstInlinedBB->getCFIState()); |
| 307 | if (Callee.hasValidProfile()) |
| 308 | InlinedBB->setExecutionCount(BB.getKnownExecutionCount()); |
| 309 | else |
| 310 | InlinedBB->setExecutionCount(FirstInlinedBBCount); |
| 311 | } |
| 312 | |
| 313 | // Copy over instructions and edges. |
| 314 | for (const BinaryBasicBlock &BB : Callee) { |
| 315 | BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB]; |
| 316 | |
| 317 | if (InlinedBB != FirstInlinedBB) |
| 318 | InsertII = InlinedBB->begin(); |
| 319 | |
| 320 | // Copy over instructions making any necessary mods. |
| 321 | for (MCInst Inst : BB) { |
| 322 | if (MIB.isPseudo(Inst)) |
| 323 | continue; |
| 324 | |
| 325 | MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64()); |
| 326 | |
| 327 | // Fix branch target. Strictly speaking, we don't have to do this as |
| 328 | // targets of direct branches will be fixed later and don't matter |
| 329 | // in the CFG state. However, disassembly may look misleading, and |
| 330 | // hence we do the fixing. |
| 331 | if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) { |
| 332 | assert(!MIB.isIndirectBranch(Inst) && |
| 333 | "unexpected indirect branch in callee" ); |
| 334 | const BinaryBasicBlock *TargetBB = |
| 335 | Callee.getBasicBlockForLabel(Label: MIB.getTargetSymbol(Inst)); |
| 336 | assert(TargetBB && "cannot find target block in callee" ); |
| 337 | MIB.replaceBranchTarget(Inst, TBB: InlinedBBMap[TargetBB]->getLabel(), |
| 338 | Ctx: BC.Ctx.get()); |
| 339 | } |
| 340 | |
| 341 | if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) { |
| 342 | InsertII = |
| 343 | std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst))); |
| 344 | continue; |
| 345 | } |
| 346 | |
| 347 | // Handle special instructions for a non-tail call site. |
| 348 | if (!MIB.isCall(Inst)) { |
| 349 | // Returns are removed. |
| 350 | break; |
| 351 | } |
| 352 | |
| 353 | MIB.convertTailCallToCall(Inst); |
| 354 | |
| 355 | // Propagate EH-related info to call instructions. |
| 356 | if (CSIsInvoke) { |
| 357 | MIB.addEHInfo(Inst, LP: *CSEHInfo); |
| 358 | if (CSGNUArgsSize >= 0) |
| 359 | MIB.addGnuArgsSize(Inst, GnuArgsSize: CSGNUArgsSize); |
| 360 | } |
| 361 | |
| 362 | InsertII = |
| 363 | std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst))); |
| 364 | } |
| 365 | |
| 366 | // Add CFG edges to the basic blocks of the inlined instance. |
| 367 | std::vector<BinaryBasicBlock *> Successors(BB.succ_size()); |
| 368 | llvm::transform(Range: BB.successors(), d_first: Successors.begin(), |
| 369 | F: [&InlinedBBMap](const BinaryBasicBlock *BB) { |
| 370 | auto It = InlinedBBMap.find(x: BB); |
| 371 | assert(It != InlinedBBMap.end()); |
| 372 | return It->second; |
| 373 | }); |
| 374 | |
| 375 | if (CallerFunction.hasValidProfile() && Callee.hasValidProfile()) |
| 376 | InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end(), |
| 377 | BrBegin: BB.branch_info_begin(), BrEnd: BB.branch_info_end()); |
| 378 | else |
| 379 | InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end()); |
| 380 | |
| 381 | if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) { |
| 382 | // Either it's a return block or the last instruction never returns. |
| 383 | InlinedBB->addSuccessor(Succ: NextBB, Count: InlinedBB->getExecutionCount()); |
| 384 | } |
| 385 | |
| 386 | // Scale profiling info for blocks and edges after inlining. |
| 387 | if (CallerFunction.hasValidProfile() && Callee.size() > 1) { |
| 388 | if (opts::AdjustProfile) |
| 389 | InlinedBB->adjustExecutionCount(Ratio: ProfileRatio); |
| 390 | else |
| 391 | InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() * |
| 392 | ProfileRatio); |
| 393 | } |
| 394 | } |
| 395 | |
| 396 | // Restore the original execution count of the first inlined basic block. |
| 397 | FirstInlinedBB->setExecutionCount(FirstInlinedBBCount); |
| 398 | |
| 399 | CallerFunction.recomputeLandingPads(); |
| 400 | |
| 401 | if (NextBB) |
| 402 | return std::make_pair(x&: NextBB, y: NextBB->begin()); |
| 403 | |
| 404 | if (Callee.size() == 1) |
| 405 | return std::make_pair(x&: FirstInlinedBB, y&: InsertII); |
| 406 | |
| 407 | return std::make_pair(x&: FirstInlinedBB, y: FirstInlinedBB->end()); |
| 408 | } |
| 409 | |
| 410 | bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { |
| 411 | BinaryContext &BC = Function.getBinaryContext(); |
| 412 | std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(), |
| 413 | Function.getLayout().block_end()); |
| 414 | llvm::sort( |
| 415 | C&: Blocks, Comp: [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { |
| 416 | return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount(); |
| 417 | }); |
| 418 | |
| 419 | bool DidInlining = false; |
| 420 | for (BinaryBasicBlock *BB : Blocks) { |
| 421 | for (auto InstIt = BB->begin(); InstIt != BB->end();) { |
| 422 | MCInst &Inst = *InstIt; |
| 423 | if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 || |
| 424 | !Inst.getOperand(i: 0).isExpr()) { |
| 425 | ++InstIt; |
| 426 | continue; |
| 427 | } |
| 428 | |
| 429 | const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst); |
| 430 | assert(TargetSymbol && "target symbol expected for direct call" ); |
| 431 | |
| 432 | // Don't inline calls to a secondary entry point in a target function. |
| 433 | uint64_t EntryID = 0; |
| 434 | BinaryFunction *TargetFunction = |
| 435 | BC.getFunctionForSymbol(Symbol: TargetSymbol, EntryDesc: &EntryID); |
| 436 | if (!TargetFunction || EntryID != 0) { |
| 437 | ++InstIt; |
| 438 | continue; |
| 439 | } |
| 440 | |
| 441 | // Don't do recursive inlining. |
| 442 | if (TargetFunction == &Function) { |
| 443 | ++InstIt; |
| 444 | continue; |
| 445 | } |
| 446 | |
| 447 | auto IInfo = InliningCandidates.find(x: TargetFunction); |
| 448 | if (IInfo == InliningCandidates.end()) { |
| 449 | ++InstIt; |
| 450 | continue; |
| 451 | } |
| 452 | |
| 453 | const bool IsTailCall = BC.MIB->isTailCall(Inst); |
| 454 | if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) { |
| 455 | ++InstIt; |
| 456 | continue; |
| 457 | } |
| 458 | |
| 459 | int64_t SizeAfterInlining; |
| 460 | if (IsTailCall) |
| 461 | SizeAfterInlining = |
| 462 | IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC); |
| 463 | else |
| 464 | SizeAfterInlining = |
| 465 | IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC); |
| 466 | |
| 467 | if (!opts::InlineAll && !opts::mustConsider(Function: *TargetFunction)) { |
| 468 | if (!opts::InlineSmallFunctions || |
| 469 | SizeAfterInlining > opts::InlineSmallFunctionsBytes) { |
| 470 | ++InstIt; |
| 471 | continue; |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction |
| 476 | << " in " << Function << " : " << BB->getName() |
| 477 | << ". Count: " << BB->getKnownExecutionCount() |
| 478 | << ". Size change: " << SizeAfterInlining |
| 479 | << " bytes.\n" ); |
| 480 | |
| 481 | std::tie(args&: BB, args&: InstIt) = inlineCall(CallerBB&: *BB, CallInst: InstIt, Callee: *TargetFunction); |
| 482 | |
| 483 | DidInlining = true; |
| 484 | TotalInlinedBytes += SizeAfterInlining; |
| 485 | |
| 486 | ++NumInlinedCallSites; |
| 487 | NumInlinedDynamicCalls += BB->getExecutionCount(); |
| 488 | |
| 489 | // Subtract basic block execution count from the callee execution count. |
| 490 | if (opts::AdjustProfile) |
| 491 | TargetFunction->adjustExecutionCount(Count: BB->getKnownExecutionCount()); |
| 492 | |
| 493 | // Check if the caller inlining status has to be adjusted. |
| 494 | if (IInfo->second.Type == INL_TAILCALL) { |
| 495 | auto CallerIInfo = InliningCandidates.find(x: &Function); |
| 496 | if (CallerIInfo != InliningCandidates.end() && |
| 497 | CallerIInfo->second.Type == INL_ANY) { |
| 498 | LLVM_DEBUG(dbgs() << "adjusting inlining status for function " |
| 499 | << Function << '\n'); |
| 500 | CallerIInfo->second.Type = INL_TAILCALL; |
| 501 | } |
| 502 | } |
| 503 | |
| 504 | if (NumInlinedCallSites == opts::InlineLimit) |
| 505 | return true; |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | return DidInlining; |
| 510 | } |
| 511 | |
| 512 | Error Inliner::runOnFunctions(BinaryContext &BC) { |
| 513 | opts::syncOptions(); |
| 514 | |
| 515 | if (!opts::inliningEnabled()) |
| 516 | return Error::success(); |
| 517 | |
| 518 | bool InlinedOnce; |
| 519 | unsigned NumIters = 0; |
| 520 | do { |
| 521 | if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) |
| 522 | break; |
| 523 | |
| 524 | InlinedOnce = false; |
| 525 | |
| 526 | InliningCandidates.clear(); |
| 527 | findInliningCandidates(BC); |
| 528 | |
| 529 | std::vector<BinaryFunction *> ConsideredFunctions; |
| 530 | for (auto &BFI : BC.getBinaryFunctions()) { |
| 531 | BinaryFunction &Function = BFI.second; |
| 532 | if (!shouldOptimize(BF: Function)) |
| 533 | continue; |
| 534 | ConsideredFunctions.push_back(x: &Function); |
| 535 | } |
| 536 | llvm::sort(C&: ConsideredFunctions, Comp: [](const BinaryFunction *A, |
| 537 | const BinaryFunction *B) { |
| 538 | return B->getKnownExecutionCount() < A->getKnownExecutionCount(); |
| 539 | }); |
| 540 | for (BinaryFunction *Function : ConsideredFunctions) { |
| 541 | if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit) |
| 542 | break; |
| 543 | |
| 544 | const bool DidInline = inlineCallsInFunction(Function&: *Function); |
| 545 | |
| 546 | if (DidInline) |
| 547 | Modified.insert(x: Function); |
| 548 | |
| 549 | InlinedOnce |= DidInline; |
| 550 | } |
| 551 | |
| 552 | ++NumIters; |
| 553 | } while (InlinedOnce && NumIters < opts::InlineMaxIters); |
| 554 | |
| 555 | if (NumInlinedCallSites) |
| 556 | BC.outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at " |
| 557 | << NumInlinedCallSites << " call sites in " << NumIters |
| 558 | << " iteration(s). Change in binary size: " << TotalInlinedBytes |
| 559 | << " bytes.\n" ; |
| 560 | return Error::success(); |
| 561 | } |
| 562 | |
| 563 | } // namespace bolt |
| 564 | } // namespace llvm |
| 565 | |