1//===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Inliner class used for inlining binary functions.
10//
11// The current inliner has a limited callee support
12// (see Inliner::getInliningInfo() for the most up-to-date details):
13//
14// * No exception handling
15// * No jump tables
16// * Single entry point
17// * CFI update not supported - breaks unwinding
18// * Regular Call Sites:
19// - only leaf functions (or callees with only tail calls)
20// * no invokes (they can't be tail calls)
21// - no direct use of %rsp
22// * Tail Call Sites:
23// - since the stack is unmodified, the regular call limitations are lifted
24//
25//===----------------------------------------------------------------------===//
26
27#include "bolt/Passes/Inliner.h"
28#include "bolt/Core/MCPlus.h"
29#include "llvm/Support/CommandLine.h"
30
31#define DEBUG_TYPE "bolt-inliner"
32
33using namespace llvm;
34
35namespace opts {
36
37extern cl::OptionCategory BoltOptCategory;
38
39static cl::opt<bool>
40 AdjustProfile("inline-ap",
41 cl::desc("adjust function profile after inlining"),
42 cl::cat(BoltOptCategory));
43
44static cl::list<std::string>
45ForceInlineFunctions("force-inline",
46 cl::CommaSeparated,
47 cl::desc("list of functions to always consider for inlining"),
48 cl::value_desc("func1,func2,func3,..."),
49 cl::Hidden,
50 cl::cat(BoltOptCategory));
51
52static cl::list<std::string> SkipInlineFunctions(
53 "skip-inline", cl::CommaSeparated,
54 cl::desc("list of functions to never consider for inlining"),
55 cl::value_desc("func1,func2,func3,..."), cl::Hidden,
56 cl::cat(BoltOptCategory));
57
58static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
59 cl::cat(BoltOptCategory));
60
61static cl::opt<bool> InlineIgnoreLeafCFI(
62 "inline-ignore-leaf-cfi",
63 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
64 cl::init(Val: true), cl::ReallyHidden, cl::cat(BoltOptCategory));
65
66static cl::opt<bool> InlineIgnoreCFI(
67 "inline-ignore-cfi",
68 cl::desc(
69 "inline functions with CFI programs (can break exception handling)"),
70 cl::ReallyHidden, cl::cat(BoltOptCategory));
71
72static cl::opt<unsigned>
73 InlineLimit("inline-limit",
74 cl::desc("maximum number of call sites to inline"), cl::init(Val: 0),
75 cl::Hidden, cl::cat(BoltOptCategory));
76
77static cl::opt<unsigned>
78 InlineMaxIters("inline-max-iters",
79 cl::desc("maximum number of inline iterations"), cl::init(Val: 3),
80 cl::Hidden, cl::cat(BoltOptCategory));
81
82static cl::opt<bool> InlineSmallFunctions(
83 "inline-small-functions",
84 cl::desc("inline functions if increase in size is less than defined by "
85 "-inline-small-functions-bytes"),
86 cl::cat(BoltOptCategory));
87
88static cl::opt<unsigned> InlineSmallFunctionsBytes(
89 "inline-small-functions-bytes",
90 cl::desc("max number of bytes for the function to be considered small for "
91 "inlining purposes"),
92 cl::init(Val: 4), cl::Hidden, cl::cat(BoltOptCategory));
93
94static cl::opt<bool> NoInline(
95 "no-inline",
96 cl::desc("disable all inlining (overrides other inlining options)"),
97 cl::cat(BoltOptCategory));
98
99/// This function returns true if any of inlining options are specified and the
100/// inlining pass should be executed. Whenever a new inlining option is added,
101/// this function should reflect the change.
102bool inliningEnabled() {
103 return !NoInline &&
104 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
105}
106
107bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
108 for (std::string &Name : opts::ForceInlineFunctions)
109 if (Function.hasName(FunctionName: Name))
110 return true;
111 return false;
112}
113
114bool mustSkip(const llvm::bolt::BinaryFunction &Function) {
115 return llvm::any_of(Range&: opts::SkipInlineFunctions, P: [&](const std::string &Name) {
116 return Function.hasName(FunctionName: Name);
117 });
118}
119
120void syncOptions() {
121 if (opts::InlineIgnoreCFI)
122 opts::InlineIgnoreLeafCFI = true;
123
124 if (opts::InlineAll)
125 opts::InlineSmallFunctions = true;
126}
127
128} // namespace opts
129
130namespace llvm {
131namespace bolt {
132
133uint64_t Inliner::SizeOfCallInst;
134uint64_t Inliner::SizeOfTailCallInst;
135
136uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
137 if (SizeOfCallInst)
138 return SizeOfCallInst;
139
140 MCInst Inst;
141 BC.MIB->createCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get());
142 SizeOfCallInst = BC.computeInstructionSize(Inst);
143
144 return SizeOfCallInst;
145}
146
147uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
148 if (SizeOfTailCallInst)
149 return SizeOfTailCallInst;
150
151 MCInst Inst;
152 BC.MIB->createTailCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get());
153 SizeOfTailCallInst = BC.computeInstructionSize(Inst);
154
155 return SizeOfTailCallInst;
156}
157
158InliningInfo getInliningInfo(const BinaryFunction &BF) {
159 const BinaryContext &BC = BF.getBinaryContext();
160 bool DirectSP = false;
161 bool HasCFI = false;
162 bool IsLeaf = true;
163
164 // Perform necessary checks unless the option overrides it.
165 if (!opts::mustConsider(Function: BF)) {
166 if (BF.hasSDTMarker())
167 return INL_NONE;
168
169 if (BF.hasEHRanges())
170 return INL_NONE;
171
172 if (BF.isMultiEntry())
173 return INL_NONE;
174
175 if (BF.hasJumpTables())
176 return INL_NONE;
177
178 const MCPhysReg SPReg = BC.MIB->getStackPointer();
179 for (const BinaryBasicBlock &BB : BF) {
180 for (const MCInst &Inst : BB) {
181 // Tail calls are marked as implicitly using the stack pointer and they
182 // could be inlined.
183 if (BC.MIB->isTailCall(Inst))
184 break;
185
186 if (BC.MIB->isCFI(Inst)) {
187 HasCFI = true;
188 continue;
189 }
190
191 if (BC.MIB->isCall(Inst))
192 IsLeaf = false;
193
194 // Push/pop instructions are straightforward to handle.
195 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
196 continue;
197
198 DirectSP |= BC.MIB->hasDefOfPhysReg(MI: Inst, Reg: SPReg) ||
199 BC.MIB->hasUseOfPhysReg(MI: Inst, Reg: SPReg);
200 }
201 }
202 }
203
204 if (HasCFI) {
205 if (!opts::InlineIgnoreLeafCFI)
206 return INL_NONE;
207
208 if (!IsLeaf && !opts::InlineIgnoreCFI)
209 return INL_NONE;
210 }
211
212 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
213
214 size_t Size = BF.estimateSize();
215
216 Info.SizeAfterInlining = Size;
217 Info.SizeAfterTailCallInlining = Size;
218
219 // Handle special case of the known size reduction.
220 if (BF.size() == 1) {
221 // For a regular call the last return instruction could be removed
222 // (or converted to a branch).
223 const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
224 if (LastInst && BC.MIB->isReturn(Inst: *LastInst) &&
225 !BC.MIB->isTailCall(Inst: *LastInst)) {
226 const uint64_t RetInstSize = BC.computeInstructionSize(Inst: *LastInst);
227 assert(Size >= RetInstSize);
228 Info.SizeAfterInlining -= RetInstSize;
229 }
230 }
231
232 return Info;
233}
234
235void Inliner::findInliningCandidates(BinaryContext &BC) {
236 for (const auto &BFI : BC.getBinaryFunctions()) {
237 const BinaryFunction &Function = BFI.second;
238 if (!shouldOptimize(BF: Function) || opts::mustSkip(Function))
239 continue;
240 const InliningInfo InlInfo = getInliningInfo(BF: Function);
241 if (InlInfo.Type != INL_NONE)
242 InliningCandidates[&Function] = InlInfo;
243 }
244}
245
246std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
247Inliner::inlineCall(BinaryBasicBlock &CallerBB,
248 BinaryBasicBlock::iterator CallInst,
249 const BinaryFunction &Callee) {
250 BinaryFunction &CallerFunction = *CallerBB.getFunction();
251 BinaryContext &BC = CallerFunction.getBinaryContext();
252 auto &MIB = *BC.MIB;
253
254 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
255 assert(!Callee.isMultiEntry() &&
256 "cannot inline function with multiple entries");
257 assert(!Callee.hasJumpTables() &&
258 "cannot inline function with jump table(s)");
259
260 // Get information about the call site.
261 const bool CSIsInvoke = BC.MIB->isInvoke(Inst: *CallInst);
262 const bool CSIsTailCall = BC.MIB->isTailCall(Inst: *CallInst);
263 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(Inst: *CallInst);
264 const std::optional<MCPlus::MCLandingPad> CSEHInfo =
265 BC.MIB->getEHInfo(Inst: *CallInst);
266
267 // Split basic block at the call site if there will be more incoming edges
268 // coming from the callee.
269 BinaryBasicBlock *FirstInlinedBB = &CallerBB;
270 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
271 FirstInlinedBB = CallerBB.splitAt(II: CallInst);
272 CallInst = FirstInlinedBB->begin();
273 }
274
275 // Split basic block after the call instruction unless the callee is trivial
276 // (i.e. consists of a single basic block). If necessary, obtain a basic block
277 // for return instructions in the callee to redirect to.
278 BinaryBasicBlock *NextBB = nullptr;
279 if (Callee.size() > 1) {
280 if (std::next(x: CallInst) != FirstInlinedBB->end())
281 NextBB = FirstInlinedBB->splitAt(II: std::next(x: CallInst));
282 else
283 NextBB = FirstInlinedBB->getSuccessor();
284 }
285 if (NextBB)
286 FirstInlinedBB->removeSuccessor(Succ: NextBB);
287
288 // Remove the call instruction.
289 auto InsertII = FirstInlinedBB->eraseInstruction(II: CallInst);
290
291 double ProfileRatio = 0;
292 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
293 ProfileRatio =
294 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
295
296 // Save execution count of the first block as we don't want it to change
297 // later due to profile adjustment rounding errors.
298 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
299
300 // Copy basic blocks and maintain a map from their origin.
301 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
302 InlinedBBMap[&Callee.front()] = FirstInlinedBB;
303 for (const BinaryBasicBlock &BB : llvm::drop_begin(RangeOrContainer: Callee)) {
304 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
305 InlinedBBMap[&BB] = InlinedBB;
306 InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
307 if (Callee.hasValidProfile())
308 InlinedBB->setExecutionCount(BB.getKnownExecutionCount());
309 else
310 InlinedBB->setExecutionCount(FirstInlinedBBCount);
311 }
312
313 // Copy over instructions and edges.
314 for (const BinaryBasicBlock &BB : Callee) {
315 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
316
317 if (InlinedBB != FirstInlinedBB)
318 InsertII = InlinedBB->begin();
319
320 // Copy over instructions making any necessary mods.
321 for (MCInst Inst : BB) {
322 if (MIB.isPseudo(Inst))
323 continue;
324
325 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
326
327 // Fix branch target. Strictly speaking, we don't have to do this as
328 // targets of direct branches will be fixed later and don't matter
329 // in the CFG state. However, disassembly may look misleading, and
330 // hence we do the fixing.
331 if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
332 assert(!MIB.isIndirectBranch(Inst) &&
333 "unexpected indirect branch in callee");
334 const BinaryBasicBlock *TargetBB =
335 Callee.getBasicBlockForLabel(Label: MIB.getTargetSymbol(Inst));
336 assert(TargetBB && "cannot find target block in callee");
337 MIB.replaceBranchTarget(Inst, TBB: InlinedBBMap[TargetBB]->getLabel(),
338 Ctx: BC.Ctx.get());
339 }
340
341 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
342 InsertII =
343 std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst)));
344 continue;
345 }
346
347 // Handle special instructions for a non-tail call site.
348 if (!MIB.isCall(Inst)) {
349 // Returns are removed.
350 break;
351 }
352
353 MIB.convertTailCallToCall(Inst);
354
355 // Propagate EH-related info to call instructions.
356 if (CSIsInvoke) {
357 MIB.addEHInfo(Inst, LP: *CSEHInfo);
358 if (CSGNUArgsSize >= 0)
359 MIB.addGnuArgsSize(Inst, GnuArgsSize: CSGNUArgsSize);
360 }
361
362 InsertII =
363 std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst)));
364 }
365
366 // Add CFG edges to the basic blocks of the inlined instance.
367 std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
368 llvm::transform(Range: BB.successors(), d_first: Successors.begin(),
369 F: [&InlinedBBMap](const BinaryBasicBlock *BB) {
370 auto It = InlinedBBMap.find(x: BB);
371 assert(It != InlinedBBMap.end());
372 return It->second;
373 });
374
375 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
376 InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end(),
377 BrBegin: BB.branch_info_begin(), BrEnd: BB.branch_info_end());
378 else
379 InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end());
380
381 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
382 // Either it's a return block or the last instruction never returns.
383 InlinedBB->addSuccessor(Succ: NextBB, Count: InlinedBB->getExecutionCount());
384 }
385
386 // Scale profiling info for blocks and edges after inlining.
387 if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
388 if (opts::AdjustProfile)
389 InlinedBB->adjustExecutionCount(Ratio: ProfileRatio);
390 else
391 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
392 ProfileRatio);
393 }
394 }
395
396 // Restore the original execution count of the first inlined basic block.
397 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
398
399 CallerFunction.recomputeLandingPads();
400
401 if (NextBB)
402 return std::make_pair(x&: NextBB, y: NextBB->begin());
403
404 if (Callee.size() == 1)
405 return std::make_pair(x&: FirstInlinedBB, y&: InsertII);
406
407 return std::make_pair(x&: FirstInlinedBB, y: FirstInlinedBB->end());
408}
409
410bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
411 BinaryContext &BC = Function.getBinaryContext();
412 std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
413 Function.getLayout().block_end());
414 llvm::sort(
415 C&: Blocks, Comp: [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
416 return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
417 });
418
419 bool DidInlining = false;
420 for (BinaryBasicBlock *BB : Blocks) {
421 for (auto InstIt = BB->begin(); InstIt != BB->end();) {
422 MCInst &Inst = *InstIt;
423 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
424 !Inst.getOperand(i: 0).isExpr()) {
425 ++InstIt;
426 continue;
427 }
428
429 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
430 assert(TargetSymbol && "target symbol expected for direct call");
431
432 // Don't inline calls to a secondary entry point in a target function.
433 uint64_t EntryID = 0;
434 BinaryFunction *TargetFunction =
435 BC.getFunctionForSymbol(Symbol: TargetSymbol, EntryDesc: &EntryID);
436 if (!TargetFunction || EntryID != 0) {
437 ++InstIt;
438 continue;
439 }
440
441 // Don't do recursive inlining.
442 if (TargetFunction == &Function) {
443 ++InstIt;
444 continue;
445 }
446
447 auto IInfo = InliningCandidates.find(x: TargetFunction);
448 if (IInfo == InliningCandidates.end()) {
449 ++InstIt;
450 continue;
451 }
452
453 const bool IsTailCall = BC.MIB->isTailCall(Inst);
454 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
455 ++InstIt;
456 continue;
457 }
458
459 int64_t SizeAfterInlining;
460 if (IsTailCall)
461 SizeAfterInlining =
462 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
463 else
464 SizeAfterInlining =
465 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
466
467 if (!opts::InlineAll && !opts::mustConsider(Function: *TargetFunction)) {
468 if (!opts::InlineSmallFunctions ||
469 SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
470 ++InstIt;
471 continue;
472 }
473 }
474
475 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
476 << " in " << Function << " : " << BB->getName()
477 << ". Count: " << BB->getKnownExecutionCount()
478 << ". Size change: " << SizeAfterInlining
479 << " bytes.\n");
480
481 std::tie(args&: BB, args&: InstIt) = inlineCall(CallerBB&: *BB, CallInst: InstIt, Callee: *TargetFunction);
482
483 DidInlining = true;
484 TotalInlinedBytes += SizeAfterInlining;
485
486 ++NumInlinedCallSites;
487 NumInlinedDynamicCalls += BB->getExecutionCount();
488
489 // Subtract basic block execution count from the callee execution count.
490 if (opts::AdjustProfile)
491 TargetFunction->adjustExecutionCount(Count: BB->getKnownExecutionCount());
492
493 // Check if the caller inlining status has to be adjusted.
494 if (IInfo->second.Type == INL_TAILCALL) {
495 auto CallerIInfo = InliningCandidates.find(x: &Function);
496 if (CallerIInfo != InliningCandidates.end() &&
497 CallerIInfo->second.Type == INL_ANY) {
498 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
499 << Function << '\n');
500 CallerIInfo->second.Type = INL_TAILCALL;
501 }
502 }
503
504 if (NumInlinedCallSites == opts::InlineLimit)
505 return true;
506 }
507 }
508
509 return DidInlining;
510}
511
512Error Inliner::runOnFunctions(BinaryContext &BC) {
513 opts::syncOptions();
514
515 if (!opts::inliningEnabled())
516 return Error::success();
517
518 bool InlinedOnce;
519 unsigned NumIters = 0;
520 do {
521 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
522 break;
523
524 InlinedOnce = false;
525
526 InliningCandidates.clear();
527 findInliningCandidates(BC);
528
529 std::vector<BinaryFunction *> ConsideredFunctions;
530 for (auto &BFI : BC.getBinaryFunctions()) {
531 BinaryFunction &Function = BFI.second;
532 if (!shouldOptimize(BF: Function))
533 continue;
534 ConsideredFunctions.push_back(x: &Function);
535 }
536 llvm::sort(C&: ConsideredFunctions, Comp: [](const BinaryFunction *A,
537 const BinaryFunction *B) {
538 return B->getKnownExecutionCount() < A->getKnownExecutionCount();
539 });
540 for (BinaryFunction *Function : ConsideredFunctions) {
541 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
542 break;
543
544 const bool DidInline = inlineCallsInFunction(Function&: *Function);
545
546 if (DidInline)
547 Modified.insert(x: Function);
548
549 InlinedOnce |= DidInline;
550 }
551
552 ++NumIters;
553 } while (InlinedOnce && NumIters < opts::InlineMaxIters);
554
555 if (NumInlinedCallSites)
556 BC.outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
557 << NumInlinedCallSites << " call sites in " << NumIters
558 << " iteration(s). Change in binary size: " << TotalInlinedBytes
559 << " bytes.\n";
560 return Error::success();
561}
562
563} // namespace bolt
564} // namespace llvm
565

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of bolt/lib/Passes/Inliner.cpp