1//===- bolt/Passes/Inliner.cpp - Inlining pass for low-level binary IR ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the Inliner class used for inlining binary functions.
10//
11// The current inliner has a limited callee support
12// (see Inliner::getInliningInfo() for the most up-to-date details):
13//
14// * No exception handling
15// * No jump tables
16// * Single entry point
17// * CFI update not supported - breaks unwinding
18// * Regular Call Sites:
19// - only leaf functions (or callees with only tail calls)
20// * no invokes (they can't be tail calls)
21// - no direct use of %rsp
22// * Tail Call Sites:
23// - since the stack is unmodified, the regular call limitations are lifted
24//
25//===----------------------------------------------------------------------===//
26
27#include "bolt/Passes/Inliner.h"
28#include "bolt/Core/MCPlus.h"
29#include "llvm/Support/CommandLine.h"
30
31#define DEBUG_TYPE "bolt-inliner"
32
33using namespace llvm;
34
35namespace opts {
36
37extern cl::OptionCategory BoltOptCategory;
38
39static cl::opt<bool>
40 AdjustProfile("inline-ap",
41 cl::desc("adjust function profile after inlining"),
42 cl::cat(BoltOptCategory));
43
44static cl::list<std::string>
45ForceInlineFunctions("force-inline",
46 cl::CommaSeparated,
47 cl::desc("list of functions to always consider for inlining"),
48 cl::value_desc("func1,func2,func3,..."),
49 cl::Hidden,
50 cl::cat(BoltOptCategory));
51
52static cl::opt<bool> InlineAll("inline-all", cl::desc("inline all functions"),
53 cl::cat(BoltOptCategory));
54
55static cl::opt<bool> InlineIgnoreLeafCFI(
56 "inline-ignore-leaf-cfi",
57 cl::desc("inline leaf functions with CFI programs (can break unwinding)"),
58 cl::init(Val: true), cl::ReallyHidden, cl::cat(BoltOptCategory));
59
60static cl::opt<bool> InlineIgnoreCFI(
61 "inline-ignore-cfi",
62 cl::desc(
63 "inline functions with CFI programs (can break exception handling)"),
64 cl::ReallyHidden, cl::cat(BoltOptCategory));
65
66static cl::opt<unsigned>
67 InlineLimit("inline-limit",
68 cl::desc("maximum number of call sites to inline"), cl::init(Val: 0),
69 cl::Hidden, cl::cat(BoltOptCategory));
70
71static cl::opt<unsigned>
72 InlineMaxIters("inline-max-iters",
73 cl::desc("maximum number of inline iterations"), cl::init(Val: 3),
74 cl::Hidden, cl::cat(BoltOptCategory));
75
76static cl::opt<bool> InlineSmallFunctions(
77 "inline-small-functions",
78 cl::desc("inline functions if increase in size is less than defined by "
79 "-inline-small-functions-bytes"),
80 cl::cat(BoltOptCategory));
81
82static cl::opt<unsigned> InlineSmallFunctionsBytes(
83 "inline-small-functions-bytes",
84 cl::desc("max number of bytes for the function to be considered small for "
85 "inlining purposes"),
86 cl::init(Val: 4), cl::Hidden, cl::cat(BoltOptCategory));
87
88static cl::opt<bool> NoInline(
89 "no-inline",
90 cl::desc("disable all inlining (overrides other inlining options)"),
91 cl::cat(BoltOptCategory));
92
93/// This function returns true if any of inlining options are specified and the
94/// inlining pass should be executed. Whenever a new inlining option is added,
95/// this function should reflect the change.
96bool inliningEnabled() {
97 return !NoInline &&
98 (InlineAll || InlineSmallFunctions || !ForceInlineFunctions.empty());
99}
100
101bool mustConsider(const llvm::bolt::BinaryFunction &Function) {
102 for (std::string &Name : opts::ForceInlineFunctions)
103 if (Function.hasName(FunctionName: Name))
104 return true;
105 return false;
106}
107
108void syncOptions() {
109 if (opts::InlineIgnoreCFI)
110 opts::InlineIgnoreLeafCFI = true;
111
112 if (opts::InlineAll)
113 opts::InlineSmallFunctions = true;
114}
115
116} // namespace opts
117
118namespace llvm {
119namespace bolt {
120
121uint64_t Inliner::SizeOfCallInst;
122uint64_t Inliner::SizeOfTailCallInst;
123
124uint64_t Inliner::getSizeOfCallInst(const BinaryContext &BC) {
125 if (SizeOfCallInst)
126 return SizeOfCallInst;
127
128 MCInst Inst;
129 BC.MIB->createCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get());
130 SizeOfCallInst = BC.computeInstructionSize(Inst);
131
132 return SizeOfCallInst;
133}
134
135uint64_t Inliner::getSizeOfTailCallInst(const BinaryContext &BC) {
136 if (SizeOfTailCallInst)
137 return SizeOfTailCallInst;
138
139 MCInst Inst;
140 BC.MIB->createTailCall(Inst, Target: BC.Ctx->createNamedTempSymbol(), Ctx: BC.Ctx.get());
141 SizeOfTailCallInst = BC.computeInstructionSize(Inst);
142
143 return SizeOfTailCallInst;
144}
145
146InliningInfo getInliningInfo(const BinaryFunction &BF) {
147 const BinaryContext &BC = BF.getBinaryContext();
148 bool DirectSP = false;
149 bool HasCFI = false;
150 bool IsLeaf = true;
151
152 // Perform necessary checks unless the option overrides it.
153 if (!opts::mustConsider(Function: BF)) {
154 if (BF.hasSDTMarker())
155 return INL_NONE;
156
157 if (BF.hasEHRanges())
158 return INL_NONE;
159
160 if (BF.isMultiEntry())
161 return INL_NONE;
162
163 if (BF.hasJumpTables())
164 return INL_NONE;
165
166 const MCPhysReg SPReg = BC.MIB->getStackPointer();
167 for (const BinaryBasicBlock &BB : BF) {
168 for (const MCInst &Inst : BB) {
169 // Tail calls are marked as implicitly using the stack pointer and they
170 // could be inlined.
171 if (BC.MIB->isTailCall(Inst))
172 break;
173
174 if (BC.MIB->isCFI(Inst)) {
175 HasCFI = true;
176 continue;
177 }
178
179 if (BC.MIB->isCall(Inst))
180 IsLeaf = false;
181
182 // Push/pop instructions are straightforward to handle.
183 if (BC.MIB->isPush(Inst) || BC.MIB->isPop(Inst))
184 continue;
185
186 DirectSP |= BC.MIB->hasDefOfPhysReg(MI: Inst, Reg: SPReg) ||
187 BC.MIB->hasUseOfPhysReg(MI: Inst, Reg: SPReg);
188 }
189 }
190 }
191
192 if (HasCFI) {
193 if (!opts::InlineIgnoreLeafCFI)
194 return INL_NONE;
195
196 if (!IsLeaf && !opts::InlineIgnoreCFI)
197 return INL_NONE;
198 }
199
200 InliningInfo Info(DirectSP ? INL_TAILCALL : INL_ANY);
201
202 size_t Size = BF.estimateSize();
203
204 Info.SizeAfterInlining = Size;
205 Info.SizeAfterTailCallInlining = Size;
206
207 // Handle special case of the known size reduction.
208 if (BF.size() == 1) {
209 // For a regular call the last return instruction could be removed
210 // (or converted to a branch).
211 const MCInst *LastInst = BF.back().getLastNonPseudoInstr();
212 if (LastInst && BC.MIB->isReturn(Inst: *LastInst) &&
213 !BC.MIB->isTailCall(Inst: *LastInst)) {
214 const uint64_t RetInstSize = BC.computeInstructionSize(Inst: *LastInst);
215 assert(Size >= RetInstSize);
216 Info.SizeAfterInlining -= RetInstSize;
217 }
218 }
219
220 return Info;
221}
222
223void Inliner::findInliningCandidates(BinaryContext &BC) {
224 for (const auto &BFI : BC.getBinaryFunctions()) {
225 const BinaryFunction &Function = BFI.second;
226 if (!shouldOptimize(BF: Function))
227 continue;
228 const InliningInfo InlInfo = getInliningInfo(BF: Function);
229 if (InlInfo.Type != INL_NONE)
230 InliningCandidates[&Function] = InlInfo;
231 }
232}
233
234std::pair<BinaryBasicBlock *, BinaryBasicBlock::iterator>
235Inliner::inlineCall(BinaryBasicBlock &CallerBB,
236 BinaryBasicBlock::iterator CallInst,
237 const BinaryFunction &Callee) {
238 BinaryFunction &CallerFunction = *CallerBB.getFunction();
239 BinaryContext &BC = CallerFunction.getBinaryContext();
240 auto &MIB = *BC.MIB;
241
242 assert(MIB.isCall(*CallInst) && "can only inline a call or a tail call");
243 assert(!Callee.isMultiEntry() &&
244 "cannot inline function with multiple entries");
245 assert(!Callee.hasJumpTables() &&
246 "cannot inline function with jump table(s)");
247
248 // Get information about the call site.
249 const bool CSIsInvoke = BC.MIB->isInvoke(Inst: *CallInst);
250 const bool CSIsTailCall = BC.MIB->isTailCall(Inst: *CallInst);
251 const int64_t CSGNUArgsSize = BC.MIB->getGnuArgsSize(Inst: *CallInst);
252 const std::optional<MCPlus::MCLandingPad> CSEHInfo =
253 BC.MIB->getEHInfo(Inst: *CallInst);
254
255 // Split basic block at the call site if there will be more incoming edges
256 // coming from the callee.
257 BinaryBasicBlock *FirstInlinedBB = &CallerBB;
258 if (Callee.front().pred_size() && CallInst != CallerBB.begin()) {
259 FirstInlinedBB = CallerBB.splitAt(II: CallInst);
260 CallInst = FirstInlinedBB->begin();
261 }
262
263 // Split basic block after the call instruction unless the callee is trivial
264 // (i.e. consists of a single basic block). If necessary, obtain a basic block
265 // for return instructions in the callee to redirect to.
266 BinaryBasicBlock *NextBB = nullptr;
267 if (Callee.size() > 1) {
268 if (std::next(x: CallInst) != FirstInlinedBB->end())
269 NextBB = FirstInlinedBB->splitAt(II: std::next(x: CallInst));
270 else
271 NextBB = FirstInlinedBB->getSuccessor();
272 }
273 if (NextBB)
274 FirstInlinedBB->removeSuccessor(Succ: NextBB);
275
276 // Remove the call instruction.
277 auto InsertII = FirstInlinedBB->eraseInstruction(II: CallInst);
278
279 double ProfileRatio = 0;
280 if (uint64_t CalleeExecCount = Callee.getKnownExecutionCount())
281 ProfileRatio =
282 (double)FirstInlinedBB->getKnownExecutionCount() / CalleeExecCount;
283
284 // Save execution count of the first block as we don't want it to change
285 // later due to profile adjustment rounding errors.
286 const uint64_t FirstInlinedBBCount = FirstInlinedBB->getKnownExecutionCount();
287
288 // Copy basic blocks and maintain a map from their origin.
289 std::unordered_map<const BinaryBasicBlock *, BinaryBasicBlock *> InlinedBBMap;
290 InlinedBBMap[&Callee.front()] = FirstInlinedBB;
291 for (const BinaryBasicBlock &BB : llvm::drop_begin(RangeOrContainer: Callee)) {
292 BinaryBasicBlock *InlinedBB = CallerFunction.addBasicBlock();
293 InlinedBBMap[&BB] = InlinedBB;
294 InlinedBB->setCFIState(FirstInlinedBB->getCFIState());
295 if (Callee.hasValidProfile())
296 InlinedBB->setExecutionCount(BB.getKnownExecutionCount());
297 else
298 InlinedBB->setExecutionCount(FirstInlinedBBCount);
299 }
300
301 // Copy over instructions and edges.
302 for (const BinaryBasicBlock &BB : Callee) {
303 BinaryBasicBlock *InlinedBB = InlinedBBMap[&BB];
304
305 if (InlinedBB != FirstInlinedBB)
306 InsertII = InlinedBB->begin();
307
308 // Copy over instructions making any necessary mods.
309 for (MCInst Inst : BB) {
310 if (MIB.isPseudo(Inst))
311 continue;
312
313 MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
314
315 // Fix branch target. Strictly speaking, we don't have to do this as
316 // targets of direct branches will be fixed later and don't matter
317 // in the CFG state. However, disassembly may look misleading, and
318 // hence we do the fixing.
319 if (MIB.isBranch(Inst)) {
320 assert(!MIB.isIndirectBranch(Inst) &&
321 "unexpected indirect branch in callee");
322 const BinaryBasicBlock *TargetBB =
323 Callee.getBasicBlockForLabel(Label: MIB.getTargetSymbol(Inst));
324 assert(TargetBB && "cannot find target block in callee");
325 MIB.replaceBranchTarget(Inst, TBB: InlinedBBMap[TargetBB]->getLabel(),
326 Ctx: BC.Ctx.get());
327 }
328
329 if (CSIsTailCall || (!MIB.isCall(Inst) && !MIB.isReturn(Inst))) {
330 InsertII =
331 std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst)));
332 continue;
333 }
334
335 // Handle special instructions for a non-tail call site.
336 if (!MIB.isCall(Inst)) {
337 // Returns are removed.
338 break;
339 }
340
341 MIB.convertTailCallToCall(Inst);
342
343 // Propagate EH-related info to call instructions.
344 if (CSIsInvoke) {
345 MIB.addEHInfo(Inst, LP: *CSEHInfo);
346 if (CSGNUArgsSize >= 0)
347 MIB.addGnuArgsSize(Inst, GnuArgsSize: CSGNUArgsSize);
348 }
349
350 InsertII =
351 std::next(x: InlinedBB->insertInstruction(At: InsertII, NewInst: std::move(Inst)));
352 }
353
354 // Add CFG edges to the basic blocks of the inlined instance.
355 std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
356 llvm::transform(Range: BB.successors(), d_first: Successors.begin(),
357 F: [&InlinedBBMap](const BinaryBasicBlock *BB) {
358 return InlinedBBMap.at(k: BB);
359 });
360
361 if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
362 InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end(),
363 BrBegin: BB.branch_info_begin(), BrEnd: BB.branch_info_end());
364 else
365 InlinedBB->addSuccessors(Begin: Successors.begin(), End: Successors.end());
366
367 if (!CSIsTailCall && BB.succ_size() == 0 && NextBB) {
368 // Either it's a return block or the last instruction never returns.
369 InlinedBB->addSuccessor(Succ: NextBB, Count: InlinedBB->getExecutionCount());
370 }
371
372 // Scale profiling info for blocks and edges after inlining.
373 if (CallerFunction.hasValidProfile() && Callee.size() > 1) {
374 if (opts::AdjustProfile)
375 InlinedBB->adjustExecutionCount(Ratio: ProfileRatio);
376 else
377 InlinedBB->setExecutionCount(InlinedBB->getKnownExecutionCount() *
378 ProfileRatio);
379 }
380 }
381
382 // Restore the original execution count of the first inlined basic block.
383 FirstInlinedBB->setExecutionCount(FirstInlinedBBCount);
384
385 CallerFunction.recomputeLandingPads();
386
387 if (NextBB)
388 return std::make_pair(x&: NextBB, y: NextBB->begin());
389
390 if (Callee.size() == 1)
391 return std::make_pair(x&: FirstInlinedBB, y&: InsertII);
392
393 return std::make_pair(x&: FirstInlinedBB, y: FirstInlinedBB->end());
394}
395
396bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
397 BinaryContext &BC = Function.getBinaryContext();
398 std::vector<BinaryBasicBlock *> Blocks(Function.getLayout().block_begin(),
399 Function.getLayout().block_end());
400 llvm::sort(
401 C&: Blocks, Comp: [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) {
402 return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount();
403 });
404
405 bool DidInlining = false;
406 for (BinaryBasicBlock *BB : Blocks) {
407 for (auto InstIt = BB->begin(); InstIt != BB->end();) {
408 MCInst &Inst = *InstIt;
409 if (!BC.MIB->isCall(Inst) || MCPlus::getNumPrimeOperands(Inst) != 1 ||
410 !Inst.getOperand(i: 0).isExpr()) {
411 ++InstIt;
412 continue;
413 }
414
415 const MCSymbol *TargetSymbol = BC.MIB->getTargetSymbol(Inst);
416 assert(TargetSymbol && "target symbol expected for direct call");
417
418 // Don't inline calls to a secondary entry point in a target function.
419 uint64_t EntryID = 0;
420 BinaryFunction *TargetFunction =
421 BC.getFunctionForSymbol(Symbol: TargetSymbol, EntryDesc: &EntryID);
422 if (!TargetFunction || EntryID != 0) {
423 ++InstIt;
424 continue;
425 }
426
427 // Don't do recursive inlining.
428 if (TargetFunction == &Function) {
429 ++InstIt;
430 continue;
431 }
432
433 auto IInfo = InliningCandidates.find(x: TargetFunction);
434 if (IInfo == InliningCandidates.end()) {
435 ++InstIt;
436 continue;
437 }
438
439 const bool IsTailCall = BC.MIB->isTailCall(Inst);
440 if (!IsTailCall && IInfo->second.Type == INL_TAILCALL) {
441 ++InstIt;
442 continue;
443 }
444
445 int64_t SizeAfterInlining;
446 if (IsTailCall)
447 SizeAfterInlining =
448 IInfo->second.SizeAfterTailCallInlining - getSizeOfTailCallInst(BC);
449 else
450 SizeAfterInlining =
451 IInfo->second.SizeAfterInlining - getSizeOfCallInst(BC);
452
453 if (!opts::InlineAll && !opts::mustConsider(Function: *TargetFunction)) {
454 if (!opts::InlineSmallFunctions ||
455 SizeAfterInlining > opts::InlineSmallFunctionsBytes) {
456 ++InstIt;
457 continue;
458 }
459 }
460
461 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: inlining call to " << *TargetFunction
462 << " in " << Function << " : " << BB->getName()
463 << ". Count: " << BB->getKnownExecutionCount()
464 << ". Size change: " << SizeAfterInlining
465 << " bytes.\n");
466
467 std::tie(args&: BB, args&: InstIt) = inlineCall(CallerBB&: *BB, CallInst: InstIt, Callee: *TargetFunction);
468
469 DidInlining = true;
470 TotalInlinedBytes += SizeAfterInlining;
471
472 ++NumInlinedCallSites;
473 NumInlinedDynamicCalls += BB->getExecutionCount();
474
475 // Subtract basic block execution count from the callee execution count.
476 if (opts::AdjustProfile)
477 TargetFunction->adjustExecutionCount(Count: BB->getKnownExecutionCount());
478
479 // Check if the caller inlining status has to be adjusted.
480 if (IInfo->second.Type == INL_TAILCALL) {
481 auto CallerIInfo = InliningCandidates.find(x: &Function);
482 if (CallerIInfo != InliningCandidates.end() &&
483 CallerIInfo->second.Type == INL_ANY) {
484 LLVM_DEBUG(dbgs() << "adjusting inlining status for function "
485 << Function << '\n');
486 CallerIInfo->second.Type = INL_TAILCALL;
487 }
488 }
489
490 if (NumInlinedCallSites == opts::InlineLimit)
491 return true;
492 }
493 }
494
495 return DidInlining;
496}
497
498Error Inliner::runOnFunctions(BinaryContext &BC) {
499 opts::syncOptions();
500
501 if (!opts::inliningEnabled())
502 return Error::success();
503
504 bool InlinedOnce;
505 unsigned NumIters = 0;
506 do {
507 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
508 break;
509
510 InlinedOnce = false;
511
512 InliningCandidates.clear();
513 findInliningCandidates(BC);
514
515 std::vector<BinaryFunction *> ConsideredFunctions;
516 for (auto &BFI : BC.getBinaryFunctions()) {
517 BinaryFunction &Function = BFI.second;
518 if (!shouldOptimize(BF: Function))
519 continue;
520 ConsideredFunctions.push_back(x: &Function);
521 }
522 llvm::sort(C&: ConsideredFunctions, Comp: [](const BinaryFunction *A,
523 const BinaryFunction *B) {
524 return B->getKnownExecutionCount() < A->getKnownExecutionCount();
525 });
526 for (BinaryFunction *Function : ConsideredFunctions) {
527 if (opts::InlineLimit && NumInlinedCallSites >= opts::InlineLimit)
528 break;
529
530 const bool DidInline = inlineCallsInFunction(Function&: *Function);
531
532 if (DidInline)
533 Modified.insert(x: Function);
534
535 InlinedOnce |= DidInline;
536 }
537
538 ++NumIters;
539 } while (InlinedOnce && NumIters < opts::InlineMaxIters);
540
541 if (NumInlinedCallSites)
542 BC.outs() << "BOLT-INFO: inlined " << NumInlinedDynamicCalls << " calls at "
543 << NumInlinedCallSites << " call sites in " << NumIters
544 << " iteration(s). Change in binary size: " << TotalInlinedBytes
545 << " bytes.\n";
546 return Error::success();
547}
548
549} // namespace bolt
550} // namespace llvm
551

source code of bolt/lib/Passes/Inliner.cpp