1//===- InlineFunction.cpp - Code to perform function inlining -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements inlining of a function into a call site, resolving
10// parameters and the return value as appropriate.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/DenseMap.h"
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/SetVector.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/StringExtras.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/BlockFrequencyInfo.h"
24#include "llvm/Analysis/CallGraph.h"
25#include "llvm/Analysis/CaptureTracking.h"
26#include "llvm/Analysis/InstructionSimplify.h"
27#include "llvm/Analysis/MemoryProfileInfo.h"
28#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
29#include "llvm/Analysis/ObjCARCUtil.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/ValueTracking.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/IR/AttributeMask.h"
34#include "llvm/IR/Argument.h"
35#include "llvm/IR/BasicBlock.h"
36#include "llvm/IR/CFG.h"
37#include "llvm/IR/Constant.h"
38#include "llvm/IR/Constants.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/IR/DebugInfo.h"
41#include "llvm/IR/DebugInfoMetadata.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Dominators.h"
45#include "llvm/IR/EHPersonalities.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InlineAsm.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/IntrinsicInst.h"
53#include "llvm/IR/Intrinsics.h"
54#include "llvm/IR/LLVMContext.h"
55#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/User.h"
60#include "llvm/IR/Value.h"
61#include "llvm/Support/Casting.h"
62#include "llvm/Support/CommandLine.h"
63#include "llvm/Support/ErrorHandling.h"
64#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
65#include "llvm/Transforms/Utils/Cloning.h"
66#include "llvm/Transforms/Utils/Local.h"
67#include "llvm/Transforms/Utils/ValueMapper.h"
68#include <algorithm>
69#include <cassert>
70#include <cstdint>
71#include <iterator>
72#include <limits>
73#include <optional>
74#include <string>
75#include <utility>
76#include <vector>
77
78#define DEBUG_TYPE "inline-function"
79
80using namespace llvm;
81using namespace llvm::memprof;
82using ProfileCount = Function::ProfileCount;
83
84static cl::opt<bool>
85EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(Val: true),
86 cl::Hidden,
87 cl::desc("Convert noalias attributes to metadata during inlining."));
88
89static cl::opt<bool>
90 UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,
91 cl::init(Val: true),
92 cl::desc("Use the llvm.experimental.noalias.scope.decl "
93 "intrinsic during inlining."));
94
95// Disabled by default, because the added alignment assumptions may increase
96// compile-time and block optimizations. This option is not suitable for use
97// with frontends that emit comprehensive parameter alignment annotations.
98static cl::opt<bool>
99PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
100 cl::init(Val: false), cl::Hidden,
101 cl::desc("Convert align attributes to assumptions during inlining."));
102
103static cl::opt<unsigned> InlinerAttributeWindow(
104 "max-inst-checked-for-throw-during-inlining", cl::Hidden,
105 cl::desc("the maximum number of instructions analyzed for may throw during "
106 "attribute inference in inlined body"),
107 cl::init(Val: 4));
108
109namespace {
110
111 /// A class for recording information about inlining a landing pad.
112 class LandingPadInliningInfo {
113 /// Destination of the invoke's unwind.
114 BasicBlock *OuterResumeDest;
115
116 /// Destination for the callee's resume.
117 BasicBlock *InnerResumeDest = nullptr;
118
119 /// LandingPadInst associated with the invoke.
120 LandingPadInst *CallerLPad = nullptr;
121
122 /// PHI for EH values from landingpad insts.
123 PHINode *InnerEHValuesPHI = nullptr;
124
125 SmallVector<Value*, 8> UnwindDestPHIValues;
126
127 public:
128 LandingPadInliningInfo(InvokeInst *II)
129 : OuterResumeDest(II->getUnwindDest()) {
130 // If there are PHI nodes in the unwind destination block, we need to keep
131 // track of which values came into them from the invoke before removing
132 // the edge from this block.
133 BasicBlock *InvokeBB = II->getParent();
134 BasicBlock::iterator I = OuterResumeDest->begin();
135 for (; isa<PHINode>(Val: I); ++I) {
136 // Save the value to use for this edge.
137 PHINode *PHI = cast<PHINode>(Val&: I);
138 UnwindDestPHIValues.push_back(Elt: PHI->getIncomingValueForBlock(BB: InvokeBB));
139 }
140
141 CallerLPad = cast<LandingPadInst>(Val&: I);
142 }
143
144 /// The outer unwind destination is the target of
145 /// unwind edges introduced for calls within the inlined function.
146 BasicBlock *getOuterResumeDest() const {
147 return OuterResumeDest;
148 }
149
150 BasicBlock *getInnerResumeDest();
151
152 LandingPadInst *getLandingPadInst() const { return CallerLPad; }
153
154 /// Forward the 'resume' instruction to the caller's landing pad block.
155 /// When the landing pad block has only one predecessor, this is
156 /// a simple branch. When there is more than one predecessor, we need to
157 /// split the landing pad block after the landingpad instruction and jump
158 /// to there.
159 void forwardResume(ResumeInst *RI,
160 SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);
161
162 /// Add incoming-PHI values to the unwind destination block for the given
163 /// basic block, using the values for the original invoke's source block.
164 void addIncomingPHIValuesFor(BasicBlock *BB) const {
165 addIncomingPHIValuesForInto(src: BB, dest: OuterResumeDest);
166 }
167
168 void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
169 BasicBlock::iterator I = dest->begin();
170 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
171 PHINode *phi = cast<PHINode>(Val&: I);
172 phi->addIncoming(V: UnwindDestPHIValues[i], BB: src);
173 }
174 }
175 };
176
177} // end anonymous namespace
178
179/// Get or create a target for the branch from ResumeInsts.
180BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
181 if (InnerResumeDest) return InnerResumeDest;
182
183 // Split the landing pad.
184 BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
185 InnerResumeDest =
186 OuterResumeDest->splitBasicBlock(I: SplitPoint,
187 BBName: OuterResumeDest->getName() + ".body");
188
189 // The number of incoming edges we expect to the inner landing pad.
190 const unsigned PHICapacity = 2;
191
192 // Create corresponding new PHIs for all the PHIs in the outer landing pad.
193 BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
194 BasicBlock::iterator I = OuterResumeDest->begin();
195 for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
196 PHINode *OuterPHI = cast<PHINode>(Val&: I);
197 PHINode *InnerPHI = PHINode::Create(Ty: OuterPHI->getType(), NumReservedValues: PHICapacity,
198 NameStr: OuterPHI->getName() + ".lpad-body");
199 InnerPHI->insertBefore(InsertPos: InsertPoint);
200 OuterPHI->replaceAllUsesWith(V: InnerPHI);
201 InnerPHI->addIncoming(V: OuterPHI, BB: OuterResumeDest);
202 }
203
204 // Create a PHI for the exception values.
205 InnerEHValuesPHI =
206 PHINode::Create(Ty: CallerLPad->getType(), NumReservedValues: PHICapacity, NameStr: "eh.lpad-body");
207 InnerEHValuesPHI->insertBefore(InsertPos: InsertPoint);
208 CallerLPad->replaceAllUsesWith(V: InnerEHValuesPHI);
209 InnerEHValuesPHI->addIncoming(V: CallerLPad, BB: OuterResumeDest);
210
211 // All done.
212 return InnerResumeDest;
213}
214
215/// Forward the 'resume' instruction to the caller's landing pad block.
216/// When the landing pad block has only one predecessor, this is a simple
217/// branch. When there is more than one predecessor, we need to split the
218/// landing pad block after the landingpad instruction and jump to there.
219void LandingPadInliningInfo::forwardResume(
220 ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
221 BasicBlock *Dest = getInnerResumeDest();
222 BasicBlock *Src = RI->getParent();
223
224 BranchInst::Create(IfTrue: Dest, InsertAtEnd: Src);
225
226 // Update the PHIs in the destination. They were inserted in an order which
227 // makes this work.
228 addIncomingPHIValuesForInto(src: Src, dest: Dest);
229
230 InnerEHValuesPHI->addIncoming(V: RI->getOperand(i_nocapture: 0), BB: Src);
231 RI->eraseFromParent();
232}
233
234/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.
235static Value *getParentPad(Value *EHPad) {
236 if (auto *FPI = dyn_cast<FuncletPadInst>(Val: EHPad))
237 return FPI->getParentPad();
238 return cast<CatchSwitchInst>(Val: EHPad)->getParentPad();
239}
240
241using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;
242
243/// Helper for getUnwindDestToken that does the descendant-ward part of
244/// the search.
245static Value *getUnwindDestTokenHelper(Instruction *EHPad,
246 UnwindDestMemoTy &MemoMap) {
247 SmallVector<Instruction *, 8> Worklist(1, EHPad);
248
249 while (!Worklist.empty()) {
250 Instruction *CurrentPad = Worklist.pop_back_val();
251 // We only put pads on the worklist that aren't in the MemoMap. When
252 // we find an unwind dest for a pad we may update its ancestors, but
253 // the queue only ever contains uncles/great-uncles/etc. of CurrentPad,
254 // so they should never get updated while queued on the worklist.
255 assert(!MemoMap.count(CurrentPad));
256 Value *UnwindDestToken = nullptr;
257 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: CurrentPad)) {
258 if (CatchSwitch->hasUnwindDest()) {
259 UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();
260 } else {
261 // Catchswitch doesn't have a 'nounwind' variant, and one might be
262 // annotated as "unwinds to caller" when really it's nounwind (see
263 // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the
264 // parent's unwind dest from this. We can check its catchpads'
265 // descendants, since they might include a cleanuppad with an
266 // "unwinds to caller" cleanupret, which can be trusted.
267 for (auto HI = CatchSwitch->handler_begin(),
268 HE = CatchSwitch->handler_end();
269 HI != HE && !UnwindDestToken; ++HI) {
270 BasicBlock *HandlerBlock = *HI;
271 auto *CatchPad = cast<CatchPadInst>(Val: HandlerBlock->getFirstNonPHI());
272 for (User *Child : CatchPad->users()) {
273 // Intentionally ignore invokes here -- since the catchswitch is
274 // marked "unwind to caller", it would be a verifier error if it
275 // contained an invoke which unwinds out of it, so any invoke we'd
276 // encounter must unwind to some child of the catch.
277 if (!isa<CleanupPadInst>(Val: Child) && !isa<CatchSwitchInst>(Val: Child))
278 continue;
279
280 Instruction *ChildPad = cast<Instruction>(Val: Child);
281 auto Memo = MemoMap.find(Val: ChildPad);
282 if (Memo == MemoMap.end()) {
283 // Haven't figured out this child pad yet; queue it.
284 Worklist.push_back(Elt: ChildPad);
285 continue;
286 }
287 // We've already checked this child, but might have found that
288 // it offers no proof either way.
289 Value *ChildUnwindDestToken = Memo->second;
290 if (!ChildUnwindDestToken)
291 continue;
292 // We already know the child's unwind dest, which can either
293 // be ConstantTokenNone to indicate unwind to caller, or can
294 // be another child of the catchpad. Only the former indicates
295 // the unwind dest of the catchswitch.
296 if (isa<ConstantTokenNone>(Val: ChildUnwindDestToken)) {
297 UnwindDestToken = ChildUnwindDestToken;
298 break;
299 }
300 assert(getParentPad(ChildUnwindDestToken) == CatchPad);
301 }
302 }
303 }
304 } else {
305 auto *CleanupPad = cast<CleanupPadInst>(Val: CurrentPad);
306 for (User *U : CleanupPad->users()) {
307 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: U)) {
308 if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())
309 UnwindDestToken = RetUnwindDest->getFirstNonPHI();
310 else
311 UnwindDestToken = ConstantTokenNone::get(Context&: CleanupPad->getContext());
312 break;
313 }
314 Value *ChildUnwindDestToken;
315 if (auto *Invoke = dyn_cast<InvokeInst>(Val: U)) {
316 ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();
317 } else if (isa<CleanupPadInst>(Val: U) || isa<CatchSwitchInst>(Val: U)) {
318 Instruction *ChildPad = cast<Instruction>(Val: U);
319 auto Memo = MemoMap.find(Val: ChildPad);
320 if (Memo == MemoMap.end()) {
321 // Haven't resolved this child yet; queue it and keep searching.
322 Worklist.push_back(Elt: ChildPad);
323 continue;
324 }
325 // We've checked this child, but still need to ignore it if it
326 // had no proof either way.
327 ChildUnwindDestToken = Memo->second;
328 if (!ChildUnwindDestToken)
329 continue;
330 } else {
331 // Not a relevant user of the cleanuppad
332 continue;
333 }
334 // In a well-formed program, the child/invoke must either unwind to
335 // an(other) child of the cleanup, or exit the cleanup. In the
336 // first case, continue searching.
337 if (isa<Instruction>(Val: ChildUnwindDestToken) &&
338 getParentPad(EHPad: ChildUnwindDestToken) == CleanupPad)
339 continue;
340 UnwindDestToken = ChildUnwindDestToken;
341 break;
342 }
343 }
344 // If we haven't found an unwind dest for CurrentPad, we may have queued its
345 // children, so move on to the next in the worklist.
346 if (!UnwindDestToken)
347 continue;
348
349 // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits
350 // any ancestors of CurrentPad up to but not including UnwindDestToken's
351 // parent pad. Record this in the memo map, and check to see if the
352 // original EHPad being queried is one of the ones exited.
353 Value *UnwindParent;
354 if (auto *UnwindPad = dyn_cast<Instruction>(Val: UnwindDestToken))
355 UnwindParent = getParentPad(EHPad: UnwindPad);
356 else
357 UnwindParent = nullptr;
358 bool ExitedOriginalPad = false;
359 for (Instruction *ExitedPad = CurrentPad;
360 ExitedPad && ExitedPad != UnwindParent;
361 ExitedPad = dyn_cast<Instruction>(Val: getParentPad(EHPad: ExitedPad))) {
362 // Skip over catchpads since they just follow their catchswitches.
363 if (isa<CatchPadInst>(Val: ExitedPad))
364 continue;
365 MemoMap[ExitedPad] = UnwindDestToken;
366 ExitedOriginalPad |= (ExitedPad == EHPad);
367 }
368
369 if (ExitedOriginalPad)
370 return UnwindDestToken;
371
372 // Continue the search.
373 }
374
375 // No definitive information is contained within this funclet.
376 return nullptr;
377}
378
379/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,
380/// return that pad instruction. If it unwinds to caller, return
381/// ConstantTokenNone. If it does not have a definitive unwind destination,
382/// return nullptr.
383///
384/// This routine gets invoked for calls in funclets in inlinees when inlining
385/// an invoke. Since many funclets don't have calls inside them, it's queried
386/// on-demand rather than building a map of pads to unwind dests up front.
387/// Determining a funclet's unwind dest may require recursively searching its
388/// descendants, and also ancestors and cousins if the descendants don't provide
389/// an answer. Since most funclets will have their unwind dest immediately
390/// available as the unwind dest of a catchswitch or cleanupret, this routine
391/// searches top-down from the given pad and then up. To avoid worst-case
392/// quadratic run-time given that approach, it uses a memo map to avoid
393/// re-processing funclet trees. The callers that rewrite the IR as they go
394/// take advantage of this, for correctness, by checking/forcing rewritten
395/// pads' entries to match the original callee view.
396static Value *getUnwindDestToken(Instruction *EHPad,
397 UnwindDestMemoTy &MemoMap) {
398 // Catchpads unwind to the same place as their catchswitch;
399 // redirct any queries on catchpads so the code below can
400 // deal with just catchswitches and cleanuppads.
401 if (auto *CPI = dyn_cast<CatchPadInst>(Val: EHPad))
402 EHPad = CPI->getCatchSwitch();
403
404 // Check if we've already determined the unwind dest for this pad.
405 auto Memo = MemoMap.find(Val: EHPad);
406 if (Memo != MemoMap.end())
407 return Memo->second;
408
409 // Search EHPad and, if necessary, its descendants.
410 Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);
411 assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));
412 if (UnwindDestToken)
413 return UnwindDestToken;
414
415 // No information is available for this EHPad from itself or any of its
416 // descendants. An unwind all the way out to a pad in the caller would
417 // need also to agree with the unwind dest of the parent funclet, so
418 // search up the chain to try to find a funclet with information. Put
419 // null entries in the memo map to avoid re-processing as we go up.
420 MemoMap[EHPad] = nullptr;
421#ifndef NDEBUG
422 SmallPtrSet<Instruction *, 4> TempMemos;
423 TempMemos.insert(Ptr: EHPad);
424#endif
425 Instruction *LastUselessPad = EHPad;
426 Value *AncestorToken;
427 for (AncestorToken = getParentPad(EHPad);
428 auto *AncestorPad = dyn_cast<Instruction>(Val: AncestorToken);
429 AncestorToken = getParentPad(EHPad: AncestorToken)) {
430 // Skip over catchpads since they just follow their catchswitches.
431 if (isa<CatchPadInst>(Val: AncestorPad))
432 continue;
433 // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
434 // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
435 // call to getUnwindDestToken, that would mean that AncestorPad had no
436 // information in itself, its descendants, or its ancestors. If that
437 // were the case, then we should also have recorded the lack of information
438 // for the descendant that we're coming from. So assert that we don't
439 // find a null entry in the MemoMap for AncestorPad.
440 assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
441 auto AncestorMemo = MemoMap.find(Val: AncestorPad);
442 if (AncestorMemo == MemoMap.end()) {
443 UnwindDestToken = getUnwindDestTokenHelper(EHPad: AncestorPad, MemoMap);
444 } else {
445 UnwindDestToken = AncestorMemo->second;
446 }
447 if (UnwindDestToken)
448 break;
449 LastUselessPad = AncestorPad;
450 MemoMap[LastUselessPad] = nullptr;
451#ifndef NDEBUG
452 TempMemos.insert(Ptr: LastUselessPad);
453#endif
454 }
455
456 // We know that getUnwindDestTokenHelper was called on LastUselessPad and
457 // returned nullptr (and likewise for EHPad and any of its ancestors up to
458 // LastUselessPad), so LastUselessPad has no information from below. Since
459 // getUnwindDestTokenHelper must investigate all downward paths through
460 // no-information nodes to prove that a node has no information like this,
461 // and since any time it finds information it records it in the MemoMap for
462 // not just the immediately-containing funclet but also any ancestors also
463 // exited, it must be the case that, walking downward from LastUselessPad,
464 // visiting just those nodes which have not been mapped to an unwind dest
465 // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
466 // they are just used to keep getUnwindDestTokenHelper from repeating work),
467 // any node visited must have been exhaustively searched with no information
468 // for it found.
469 SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
470 while (!Worklist.empty()) {
471 Instruction *UselessPad = Worklist.pop_back_val();
472 auto Memo = MemoMap.find(Val: UselessPad);
473 if (Memo != MemoMap.end() && Memo->second) {
474 // Here the name 'UselessPad' is a bit of a misnomer, because we've found
475 // that it is a funclet that does have information about unwinding to
476 // a particular destination; its parent was a useless pad.
477 // Since its parent has no information, the unwind edge must not escape
478 // the parent, and must target a sibling of this pad. This local unwind
479 // gives us no information about EHPad. Leave it and the subtree rooted
480 // at it alone.
481 assert(getParentPad(Memo->second) == getParentPad(UselessPad));
482 continue;
483 }
484 // We know we don't have information for UselesPad. If it has an entry in
485 // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
486 // added on this invocation of getUnwindDestToken; if a previous invocation
487 // recorded nullptr, it would have had to prove that the ancestors of
488 // UselessPad, which include LastUselessPad, had no information, and that
489 // in turn would have required proving that the descendants of
490 // LastUselesPad, which include EHPad, have no information about
491 // LastUselessPad, which would imply that EHPad was mapped to nullptr in
492 // the MemoMap on that invocation, which isn't the case if we got here.
493 assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
494 // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
495 // information that we'd be contradicting by making a map entry for it
496 // (which is something that getUnwindDestTokenHelper must have proved for
497 // us to get here). Just assert on is direct users here; the checks in
498 // this downward walk at its descendants will verify that they don't have
499 // any unwind edges that exit 'UselessPad' either (i.e. they either have no
500 // unwind edges or unwind to a sibling).
501 MemoMap[UselessPad] = UnwindDestToken;
502 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: UselessPad)) {
503 assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
504 for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
505 auto *CatchPad = HandlerBlock->getFirstNonPHI();
506 for (User *U : CatchPad->users()) {
507 assert(
508 (!isa<InvokeInst>(U) ||
509 (getParentPad(
510 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
511 CatchPad)) &&
512 "Expected useless pad");
513 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
514 Worklist.push_back(Elt: cast<Instruction>(Val: U));
515 }
516 }
517 } else {
518 assert(isa<CleanupPadInst>(UselessPad));
519 for (User *U : UselessPad->users()) {
520 assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
521 assert((!isa<InvokeInst>(U) ||
522 (getParentPad(
523 cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
524 UselessPad)) &&
525 "Expected useless pad");
526 if (isa<CatchSwitchInst>(Val: U) || isa<CleanupPadInst>(Val: U))
527 Worklist.push_back(Elt: cast<Instruction>(Val: U));
528 }
529 }
530 }
531
532 return UnwindDestToken;
533}
534
535/// When we inline a basic block into an invoke,
536/// we have to turn all of the calls that can throw into invokes.
537/// This function analyze BB to see if there are any calls, and if so,
538/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
539/// nodes in that block with the values specified in InvokeDestPHIValues.
540static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
541 BasicBlock *BB, BasicBlock *UnwindEdge,
542 UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
543 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
544 // We only need to check for function calls: inlined invoke
545 // instructions require no special handling.
546 CallInst *CI = dyn_cast<CallInst>(Val: &I);
547
548 if (!CI || CI->doesNotThrow())
549 continue;
550
551 // We do not need to (and in fact, cannot) convert possibly throwing calls
552 // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into
553 // invokes. The caller's "segment" of the deoptimization continuation
554 // attached to the newly inlined @llvm.experimental_deoptimize
555 // (resp. @llvm.experimental.guard) call should contain the exception
556 // handling logic, if any.
557 if (auto *F = CI->getCalledFunction())
558 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||
559 F->getIntrinsicID() == Intrinsic::experimental_guard)
560 continue;
561
562 if (auto FuncletBundle = CI->getOperandBundle(ID: LLVMContext::OB_funclet)) {
563 // This call is nested inside a funclet. If that funclet has an unwind
564 // destination within the inlinee, then unwinding out of this call would
565 // be UB. Rewriting this call to an invoke which targets the inlined
566 // invoke's unwind dest would give the call's parent funclet multiple
567 // unwind destinations, which is something that subsequent EH table
568 // generation can't handle and that the veirifer rejects. So when we
569 // see such a call, leave it as a call.
570 auto *FuncletPad = cast<Instruction>(Val: FuncletBundle->Inputs[0]);
571 Value *UnwindDestToken =
572 getUnwindDestToken(EHPad: FuncletPad, MemoMap&: *FuncletUnwindMap);
573 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
574 continue;
575#ifndef NDEBUG
576 Instruction *MemoKey;
577 if (auto *CatchPad = dyn_cast<CatchPadInst>(Val: FuncletPad))
578 MemoKey = CatchPad->getCatchSwitch();
579 else
580 MemoKey = FuncletPad;
581 assert(FuncletUnwindMap->count(MemoKey) &&
582 (*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&
583 "must get memoized to avoid confusing later searches");
584#endif // NDEBUG
585 }
586
587 changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
588 return BB;
589 }
590 return nullptr;
591}
592
593/// If we inlined an invoke site, we need to convert calls
594/// in the body of the inlined function into invokes.
595///
596/// II is the invoke instruction being inlined. FirstNewBlock is the first
597/// block of the inlined code (the last block is the end of the function),
598/// and InlineCodeInfo is information about the code that got inlined.
599static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
600 ClonedCodeInfo &InlinedCodeInfo) {
601 BasicBlock *InvokeDest = II->getUnwindDest();
602
603 Function *Caller = FirstNewBlock->getParent();
604
605 // The inlined code is currently at the end of the function, scan from the
606 // start of the inlined code to its end, checking for stuff we need to
607 // rewrite.
608 LandingPadInliningInfo Invoke(II);
609
610 // Get all of the inlined landing pad instructions.
611 SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
612 for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
613 I != E; ++I)
614 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: I->getTerminator()))
615 InlinedLPads.insert(Ptr: II->getLandingPadInst());
616
617 // Append the clauses from the outer landing pad instruction into the inlined
618 // landing pad instructions.
619 LandingPadInst *OuterLPad = Invoke.getLandingPadInst();
620 for (LandingPadInst *InlinedLPad : InlinedLPads) {
621 unsigned OuterNum = OuterLPad->getNumClauses();
622 InlinedLPad->reserveClauses(Size: OuterNum);
623 for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)
624 InlinedLPad->addClause(ClauseVal: OuterLPad->getClause(Idx: OuterIdx));
625 if (OuterLPad->isCleanup())
626 InlinedLPad->setCleanup(true);
627 }
628
629 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
630 BB != E; ++BB) {
631 if (InlinedCodeInfo.ContainsCalls)
632 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
633 BB: &*BB, UnwindEdge: Invoke.getOuterResumeDest()))
634 // Update any PHI nodes in the exceptional block to indicate that there
635 // is now a new entry in them.
636 Invoke.addIncomingPHIValuesFor(BB: NewBB);
637
638 // Forward any resumes that are remaining here.
639 if (ResumeInst *RI = dyn_cast<ResumeInst>(Val: BB->getTerminator()))
640 Invoke.forwardResume(RI, InlinedLPads);
641 }
642
643 // Now that everything is happy, we have one final detail. The PHI nodes in
644 // the exception destination block still have entries due to the original
645 // invoke instruction. Eliminate these entries (which might even delete the
646 // PHI node) now.
647 InvokeDest->removePredecessor(Pred: II->getParent());
648}
649
650/// If we inlined an invoke site, we need to convert calls
651/// in the body of the inlined function into invokes.
652///
653/// II is the invoke instruction being inlined. FirstNewBlock is the first
654/// block of the inlined code (the last block is the end of the function),
655/// and InlineCodeInfo is information about the code that got inlined.
656static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
657 ClonedCodeInfo &InlinedCodeInfo) {
658 BasicBlock *UnwindDest = II->getUnwindDest();
659 Function *Caller = FirstNewBlock->getParent();
660
661 assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
662
663 // If there are PHI nodes in the unwind destination block, we need to keep
664 // track of which values came into them from the invoke before removing the
665 // edge from this block.
666 SmallVector<Value *, 8> UnwindDestPHIValues;
667 BasicBlock *InvokeBB = II->getParent();
668 for (PHINode &PHI : UnwindDest->phis()) {
669 // Save the value to use for this edge.
670 UnwindDestPHIValues.push_back(Elt: PHI.getIncomingValueForBlock(BB: InvokeBB));
671 }
672
673 // Add incoming-PHI values to the unwind destination block for the given basic
674 // block, using the values for the original invoke's source block.
675 auto UpdatePHINodes = [&](BasicBlock *Src) {
676 BasicBlock::iterator I = UnwindDest->begin();
677 for (Value *V : UnwindDestPHIValues) {
678 PHINode *PHI = cast<PHINode>(Val&: I);
679 PHI->addIncoming(V, BB: Src);
680 ++I;
681 }
682 };
683
684 // This connects all the instructions which 'unwind to caller' to the invoke
685 // destination.
686 UnwindDestMemoTy FuncletUnwindMap;
687 for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
688 BB != E; ++BB) {
689 if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator())) {
690 if (CRI->unwindsToCaller()) {
691 auto *CleanupPad = CRI->getCleanupPad();
692 CleanupReturnInst::Create(CleanupPad, UnwindBB: UnwindDest, InsertBefore: CRI->getIterator());
693 CRI->eraseFromParent();
694 UpdatePHINodes(&*BB);
695 // Finding a cleanupret with an unwind destination would confuse
696 // subsequent calls to getUnwindDestToken, so map the cleanuppad
697 // to short-circuit any such calls and recognize this as an "unwind
698 // to caller" cleanup.
699 assert(!FuncletUnwindMap.count(CleanupPad) ||
700 isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));
701 FuncletUnwindMap[CleanupPad] =
702 ConstantTokenNone::get(Context&: Caller->getContext());
703 }
704 }
705
706 Instruction *I = BB->getFirstNonPHI();
707 if (!I->isEHPad())
708 continue;
709
710 Instruction *Replacement = nullptr;
711 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: I)) {
712 if (CatchSwitch->unwindsToCaller()) {
713 Value *UnwindDestToken;
714 if (auto *ParentPad =
715 dyn_cast<Instruction>(Val: CatchSwitch->getParentPad())) {
716 // This catchswitch is nested inside another funclet. If that
717 // funclet has an unwind destination within the inlinee, then
718 // unwinding out of this catchswitch would be UB. Rewriting this
719 // catchswitch to unwind to the inlined invoke's unwind dest would
720 // give the parent funclet multiple unwind destinations, which is
721 // something that subsequent EH table generation can't handle and
722 // that the veirifer rejects. So when we see such a call, leave it
723 // as "unwind to caller".
724 UnwindDestToken = getUnwindDestToken(EHPad: ParentPad, MemoMap&: FuncletUnwindMap);
725 if (UnwindDestToken && !isa<ConstantTokenNone>(Val: UnwindDestToken))
726 continue;
727 } else {
728 // This catchswitch has no parent to inherit constraints from, and
729 // none of its descendants can have an unwind edge that exits it and
730 // targets another funclet in the inlinee. It may or may not have a
731 // descendant that definitively has an unwind to caller. In either
732 // case, we'll have to assume that any unwinds out of it may need to
733 // be routed to the caller, so treat it as though it has a definitive
734 // unwind to caller.
735 UnwindDestToken = ConstantTokenNone::get(Context&: Caller->getContext());
736 }
737 auto *NewCatchSwitch = CatchSwitchInst::Create(
738 ParentPad: CatchSwitch->getParentPad(), UnwindDest,
739 NumHandlers: CatchSwitch->getNumHandlers(), NameStr: CatchSwitch->getName(),
740 InsertBefore: CatchSwitch->getIterator());
741 for (BasicBlock *PadBB : CatchSwitch->handlers())
742 NewCatchSwitch->addHandler(Dest: PadBB);
743 // Propagate info for the old catchswitch over to the new one in
744 // the unwind map. This also serves to short-circuit any subsequent
745 // checks for the unwind dest of this catchswitch, which would get
746 // confused if they found the outer handler in the callee.
747 FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;
748 Replacement = NewCatchSwitch;
749 }
750 } else if (!isa<FuncletPadInst>(Val: I)) {
751 llvm_unreachable("unexpected EHPad!");
752 }
753
754 if (Replacement) {
755 Replacement->takeName(V: I);
756 I->replaceAllUsesWith(V: Replacement);
757 I->eraseFromParent();
758 UpdatePHINodes(&*BB);
759 }
760 }
761
762 if (InlinedCodeInfo.ContainsCalls)
763 for (Function::iterator BB = FirstNewBlock->getIterator(),
764 E = Caller->end();
765 BB != E; ++BB)
766 if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
767 BB: &*BB, UnwindEdge: UnwindDest, FuncletUnwindMap: &FuncletUnwindMap))
768 // Update any PHI nodes in the exceptional block to indicate that there
769 // is now a new entry in them.
770 UpdatePHINodes(NewBB);
771
772 // Now that everything is happy, we have one final detail. The PHI nodes in
773 // the exception destination block still have entries due to the original
774 // invoke instruction. Eliminate these entries (which might even delete the
775 // PHI node) now.
776 UnwindDest->removePredecessor(Pred: InvokeBB);
777}
778
779static bool haveCommonPrefix(MDNode *MIBStackContext,
780 MDNode *CallsiteStackContext) {
781 assert(MIBStackContext->getNumOperands() > 0 &&
782 CallsiteStackContext->getNumOperands() > 0);
783 // Because of the context trimming performed during matching, the callsite
784 // context could have more stack ids than the MIB. We match up to the end of
785 // the shortest stack context.
786 for (auto MIBStackIter = MIBStackContext->op_begin(),
787 CallsiteStackIter = CallsiteStackContext->op_begin();
788 MIBStackIter != MIBStackContext->op_end() &&
789 CallsiteStackIter != CallsiteStackContext->op_end();
790 MIBStackIter++, CallsiteStackIter++) {
791 auto *Val1 = mdconst::dyn_extract<ConstantInt>(MD: *MIBStackIter);
792 auto *Val2 = mdconst::dyn_extract<ConstantInt>(MD: *CallsiteStackIter);
793 assert(Val1 && Val2);
794 if (Val1->getZExtValue() != Val2->getZExtValue())
795 return false;
796 }
797 return true;
798}
799
800static void removeMemProfMetadata(CallBase *Call) {
801 Call->setMetadata(KindID: LLVMContext::MD_memprof, Node: nullptr);
802}
803
804static void removeCallsiteMetadata(CallBase *Call) {
805 Call->setMetadata(KindID: LLVMContext::MD_callsite, Node: nullptr);
806}
807
808static void updateMemprofMetadata(CallBase *CI,
809 const std::vector<Metadata *> &MIBList) {
810 assert(!MIBList.empty());
811 // Remove existing memprof, which will either be replaced or may not be needed
812 // if we are able to use a single allocation type function attribute.
813 removeMemProfMetadata(Call: CI);
814 CallStackTrie CallStack;
815 for (Metadata *MIB : MIBList)
816 CallStack.addCallStack(MIB: cast<MDNode>(Val: MIB));
817 bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
818 assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));
819 if (!MemprofMDAttached)
820 // If we used a function attribute remove the callsite metadata as well.
821 removeCallsiteMetadata(Call: CI);
822}
823
824// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the
825// inlined callee body, based on the callsite metadata InlinedCallsiteMD from
826// the call that was inlined.
827static void propagateMemProfHelper(const CallBase *OrigCall,
828 CallBase *ClonedCall,
829 MDNode *InlinedCallsiteMD) {
830 MDNode *OrigCallsiteMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_callsite);
831 MDNode *ClonedCallsiteMD = nullptr;
832 // Check if the call originally had callsite metadata, and update it for the
833 // new call in the inlined body.
834 if (OrigCallsiteMD) {
835 // The cloned call's context is now the concatenation of the original call's
836 // callsite metadata and the callsite metadata on the call where it was
837 // inlined.
838 ClonedCallsiteMD = MDNode::concatenate(A: OrigCallsiteMD, B: InlinedCallsiteMD);
839 ClonedCall->setMetadata(KindID: LLVMContext::MD_callsite, Node: ClonedCallsiteMD);
840 }
841
842 // Update any memprof metadata on the cloned call.
843 MDNode *OrigMemProfMD = ClonedCall->getMetadata(KindID: LLVMContext::MD_memprof);
844 if (!OrigMemProfMD)
845 return;
846 // We currently expect that allocations with memprof metadata also have
847 // callsite metadata for the allocation's part of the context.
848 assert(OrigCallsiteMD);
849
850 // New call's MIB list.
851 std::vector<Metadata *> NewMIBList;
852
853 // For each MIB metadata, check if its call stack context starts with the
854 // new clone's callsite metadata. If so, that MIB goes onto the cloned call in
855 // the inlined body. If not, it stays on the out-of-line original call.
856 for (auto &MIBOp : OrigMemProfMD->operands()) {
857 MDNode *MIB = dyn_cast<MDNode>(Val: MIBOp);
858 // Stack is first operand of MIB.
859 MDNode *StackMD = getMIBStackNode(MIB);
860 assert(StackMD);
861 // See if the new cloned callsite context matches this profiled context.
862 if (haveCommonPrefix(MIBStackContext: StackMD, CallsiteStackContext: ClonedCallsiteMD))
863 // Add it to the cloned call's MIB list.
864 NewMIBList.push_back(x: MIB);
865 }
866 if (NewMIBList.empty()) {
867 removeMemProfMetadata(Call: ClonedCall);
868 removeCallsiteMetadata(Call: ClonedCall);
869 return;
870 }
871 if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
872 updateMemprofMetadata(CI: ClonedCall, MIBList: NewMIBList);
873}
874
875// Update memprof related metadata (!memprof and !callsite) based on the
876// inlining of Callee into the callsite at CB. The updates include merging the
877// inlined callee's callsite metadata with that of the inlined call,
878// and moving the subset of any memprof contexts to the inlined callee
879// allocations if they match the new inlined call stack.
880static void
881propagateMemProfMetadata(Function *Callee, CallBase &CB,
882 bool ContainsMemProfMetadata,
883 const ValueMap<const Value *, WeakTrackingVH> &VMap) {
884 MDNode *CallsiteMD = CB.getMetadata(KindID: LLVMContext::MD_callsite);
885 // Only need to update if the inlined callsite had callsite metadata, or if
886 // there was any memprof metadata inlined.
887 if (!CallsiteMD && !ContainsMemProfMetadata)
888 return;
889
890 // Propagate metadata onto the cloned calls in the inlined callee.
891 for (const auto &Entry : VMap) {
892 // See if this is a call that has been inlined and remapped, and not
893 // simplified away in the process.
894 auto *OrigCall = dyn_cast_or_null<CallBase>(Val: Entry.first);
895 auto *ClonedCall = dyn_cast_or_null<CallBase>(Val: Entry.second);
896 if (!OrigCall || !ClonedCall)
897 continue;
898 // If the inlined callsite did not have any callsite metadata, then it isn't
899 // involved in any profiled call contexts, and we can remove any memprof
900 // metadata on the cloned call.
901 if (!CallsiteMD) {
902 removeMemProfMetadata(Call: ClonedCall);
903 removeCallsiteMetadata(Call: ClonedCall);
904 continue;
905 }
906 propagateMemProfHelper(OrigCall, ClonedCall, InlinedCallsiteMD: CallsiteMD);
907 }
908}
909
910/// When inlining a call site that has !llvm.mem.parallel_loop_access,
911/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
912/// be propagated to all memory-accessing cloned instructions.
913static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
914 Function::iterator FEnd) {
915 MDNode *MemParallelLoopAccess =
916 CB.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access);
917 MDNode *AccessGroup = CB.getMetadata(KindID: LLVMContext::MD_access_group);
918 MDNode *AliasScope = CB.getMetadata(KindID: LLVMContext::MD_alias_scope);
919 MDNode *NoAlias = CB.getMetadata(KindID: LLVMContext::MD_noalias);
920 if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
921 return;
922
923 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
924 for (Instruction &I : BB) {
925 // This metadata is only relevant for instructions that access memory.
926 if (!I.mayReadOrWriteMemory())
927 continue;
928
929 if (MemParallelLoopAccess) {
930 // TODO: This probably should not overwrite MemParalleLoopAccess.
931 MemParallelLoopAccess = MDNode::concatenate(
932 A: I.getMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access),
933 B: MemParallelLoopAccess);
934 I.setMetadata(KindID: LLVMContext::MD_mem_parallel_loop_access,
935 Node: MemParallelLoopAccess);
936 }
937
938 if (AccessGroup)
939 I.setMetadata(KindID: LLVMContext::MD_access_group, Node: uniteAccessGroups(
940 AccGroups1: I.getMetadata(KindID: LLVMContext::MD_access_group), AccGroups2: AccessGroup));
941
942 if (AliasScope)
943 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MDNode::concatenate(
944 A: I.getMetadata(KindID: LLVMContext::MD_alias_scope), B: AliasScope));
945
946 if (NoAlias)
947 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MDNode::concatenate(
948 A: I.getMetadata(KindID: LLVMContext::MD_noalias), B: NoAlias));
949 }
950 }
951}
952
953/// Bundle operands of the inlined function must be added to inlined call sites.
954static void PropagateOperandBundles(Function::iterator InlinedBB,
955 Instruction *CallSiteEHPad) {
956 for (Instruction &II : llvm::make_early_inc_range(Range&: *InlinedBB)) {
957 CallBase *I = dyn_cast<CallBase>(Val: &II);
958 if (!I)
959 continue;
960 // Skip call sites which already have a "funclet" bundle.
961 if (I->getOperandBundle(ID: LLVMContext::OB_funclet))
962 continue;
963 // Skip call sites which are nounwind intrinsics (as long as they don't
964 // lower into regular function calls in the course of IR transformations).
965 auto *CalledFn =
966 dyn_cast<Function>(Val: I->getCalledOperand()->stripPointerCasts());
967 if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&
968 !IntrinsicInst::mayLowerToFunctionCall(IID: CalledFn->getIntrinsicID()))
969 continue;
970
971 SmallVector<OperandBundleDef, 1> OpBundles;
972 I->getOperandBundlesAsDefs(Defs&: OpBundles);
973 OpBundles.emplace_back(Args: "funclet", Args&: CallSiteEHPad);
974
975 Instruction *NewInst = CallBase::Create(CB: I, Bundles: OpBundles, InsertPt: I->getIterator());
976 NewInst->takeName(V: I);
977 I->replaceAllUsesWith(V: NewInst);
978 I->eraseFromParent();
979 }
980}
981
982namespace {
983/// Utility for cloning !noalias and !alias.scope metadata. When a code region
984/// using scoped alias metadata is inlined, the aliasing relationships may not
985/// hold between the two version. It is necessary to create a deep clone of the
986/// metadata, putting the two versions in separate scope domains.
987class ScopedAliasMetadataDeepCloner {
988 using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;
989 SetVector<const MDNode *> MD;
990 MetadataMap MDMap;
991 void addRecursiveMetadataUses();
992
993public:
994 ScopedAliasMetadataDeepCloner(const Function *F);
995
996 /// Create a new clone of the scoped alias metadata, which will be used by
997 /// subsequent remap() calls.
998 void clone();
999
1000 /// Remap instructions in the given range from the original to the cloned
1001 /// metadata.
1002 void remap(Function::iterator FStart, Function::iterator FEnd);
1003};
1004} // namespace
1005
1006ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
1007 const Function *F) {
1008 for (const BasicBlock &BB : *F) {
1009 for (const Instruction &I : BB) {
1010 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1011 MD.insert(X: M);
1012 if (const MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1013 MD.insert(X: M);
1014
1015 // We also need to clone the metadata in noalias intrinsics.
1016 if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1017 MD.insert(X: Decl->getScopeList());
1018 }
1019 }
1020 addRecursiveMetadataUses();
1021}
1022
1023void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {
1024 SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());
1025 while (!Queue.empty()) {
1026 const MDNode *M = cast<MDNode>(Val: Queue.pop_back_val());
1027 for (const Metadata *Op : M->operands())
1028 if (const MDNode *OpMD = dyn_cast<MDNode>(Val: Op))
1029 if (MD.insert(X: OpMD))
1030 Queue.push_back(Elt: OpMD);
1031 }
1032}
1033
1034void ScopedAliasMetadataDeepCloner::clone() {
1035 assert(MDMap.empty() && "clone() already called ?");
1036
1037 SmallVector<TempMDTuple, 16> DummyNodes;
1038 for (const MDNode *I : MD) {
1039 DummyNodes.push_back(Elt: MDTuple::getTemporary(Context&: I->getContext(), MDs: std::nullopt));
1040 MDMap[I].reset(MD: DummyNodes.back().get());
1041 }
1042
1043 // Create new metadata nodes to replace the dummy nodes, replacing old
1044 // metadata references with either a dummy node or an already-created new
1045 // node.
1046 SmallVector<Metadata *, 4> NewOps;
1047 for (const MDNode *I : MD) {
1048 for (const Metadata *Op : I->operands()) {
1049 if (const MDNode *M = dyn_cast<MDNode>(Val: Op))
1050 NewOps.push_back(Elt: MDMap[M]);
1051 else
1052 NewOps.push_back(Elt: const_cast<Metadata *>(Op));
1053 }
1054
1055 MDNode *NewM = MDNode::get(Context&: I->getContext(), MDs: NewOps);
1056 MDTuple *TempM = cast<MDTuple>(Val&: MDMap[I]);
1057 assert(TempM->isTemporary() && "Expected temporary node");
1058
1059 TempM->replaceAllUsesWith(MD: NewM);
1060 NewOps.clear();
1061 }
1062}
1063
1064void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
1065 Function::iterator FEnd) {
1066 if (MDMap.empty())
1067 return; // Nothing to do.
1068
1069 for (BasicBlock &BB : make_range(x: FStart, y: FEnd)) {
1070 for (Instruction &I : BB) {
1071 // TODO: The null checks for the MDMap.lookup() results should no longer
1072 // be necessary.
1073 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_alias_scope))
1074 if (MDNode *MNew = MDMap.lookup(Val: M))
1075 I.setMetadata(KindID: LLVMContext::MD_alias_scope, Node: MNew);
1076
1077 if (MDNode *M = I.getMetadata(KindID: LLVMContext::MD_noalias))
1078 if (MDNode *MNew = MDMap.lookup(Val: M))
1079 I.setMetadata(KindID: LLVMContext::MD_noalias, Node: MNew);
1080
1081 if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
1082 if (MDNode *MNew = MDMap.lookup(Val: Decl->getScopeList()))
1083 Decl->setScopeList(MNew);
1084 }
1085 }
1086}
1087
1088/// If the inlined function has noalias arguments,
1089/// then add new alias scopes for each noalias argument, tag the mapped noalias
1090/// parameters with noalias metadata specifying the new scope, and tag all
1091/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1092static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1093 const DataLayout &DL, AAResults *CalleeAAR,
1094 ClonedCodeInfo &InlinedFunctionInfo) {
1095 if (!EnableNoAliasConversion)
1096 return;
1097
1098 const Function *CalledFunc = CB.getCalledFunction();
1099 SmallVector<const Argument *, 4> NoAliasArgs;
1100
1101 for (const Argument &Arg : CalledFunc->args())
1102 if (CB.paramHasAttr(ArgNo: Arg.getArgNo(), Attribute::Kind: NoAlias) && !Arg.use_empty())
1103 NoAliasArgs.push_back(Elt: &Arg);
1104
1105 if (NoAliasArgs.empty())
1106 return;
1107
1108 // To do a good job, if a noalias variable is captured, we need to know if
1109 // the capture point dominates the particular use we're considering.
1110 DominatorTree DT;
1111 DT.recalculate(Func&: const_cast<Function&>(*CalledFunc));
1112
1113 // noalias indicates that pointer values based on the argument do not alias
1114 // pointer values which are not based on it. So we add a new "scope" for each
1115 // noalias function argument. Accesses using pointers based on that argument
1116 // become part of that alias scope, accesses using pointers not based on that
1117 // argument are tagged as noalias with that scope.
1118
1119 DenseMap<const Argument *, MDNode *> NewScopes;
1120 MDBuilder MDB(CalledFunc->getContext());
1121
1122 // Create a new scope domain for this function.
1123 MDNode *NewDomain =
1124 MDB.createAnonymousAliasScopeDomain(Name: CalledFunc->getName());
1125 for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {
1126 const Argument *A = NoAliasArgs[i];
1127
1128 std::string Name = std::string(CalledFunc->getName());
1129 if (A->hasName()) {
1130 Name += ": %";
1131 Name += A->getName();
1132 } else {
1133 Name += ": argument ";
1134 Name += utostr(X: i);
1135 }
1136
1137 // Note: We always create a new anonymous root here. This is true regardless
1138 // of the linkage of the callee because the aliasing "scope" is not just a
1139 // property of the callee, but also all control dependencies in the caller.
1140 MDNode *NewScope = MDB.createAnonymousAliasScope(Domain: NewDomain, Name);
1141 NewScopes.insert(KV: std::make_pair(x&: A, y&: NewScope));
1142
1143 if (UseNoAliasIntrinsic) {
1144 // Introduce a llvm.experimental.noalias.scope.decl for the noalias
1145 // argument.
1146 MDNode *AScopeList = MDNode::get(Context&: CalledFunc->getContext(), MDs: NewScope);
1147 auto *NoAliasDecl =
1148 IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(ScopeTag: AScopeList);
1149 // Ignore the result for now. The result will be used when the
1150 // llvm.noalias intrinsic is introduced.
1151 (void)NoAliasDecl;
1152 }
1153 }
1154
1155 // Iterate over all new instructions in the map; for all memory-access
1156 // instructions, add the alias scope metadata.
1157 for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1158 VMI != VMIE; ++VMI) {
1159 if (const Instruction *I = dyn_cast<Instruction>(Val: VMI->first)) {
1160 if (!VMI->second)
1161 continue;
1162
1163 Instruction *NI = dyn_cast<Instruction>(Val&: VMI->second);
1164 if (!NI || InlinedFunctionInfo.isSimplified(From: I, To: NI))
1165 continue;
1166
1167 bool IsArgMemOnlyCall = false, IsFuncCall = false;
1168 SmallVector<const Value *, 2> PtrArgs;
1169
1170 if (const LoadInst *LI = dyn_cast<LoadInst>(Val: I))
1171 PtrArgs.push_back(Elt: LI->getPointerOperand());
1172 else if (const StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1173 PtrArgs.push_back(Elt: SI->getPointerOperand());
1174 else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(Val: I))
1175 PtrArgs.push_back(Elt: VAAI->getPointerOperand());
1176 else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Val: I))
1177 PtrArgs.push_back(Elt: CXI->getPointerOperand());
1178 else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(Val: I))
1179 PtrArgs.push_back(Elt: RMWI->getPointerOperand());
1180 else if (const auto *Call = dyn_cast<CallBase>(Val: I)) {
1181 // If we know that the call does not access memory, then we'll still
1182 // know that about the inlined clone of this call site, and we don't
1183 // need to add metadata.
1184 if (Call->doesNotAccessMemory())
1185 continue;
1186
1187 IsFuncCall = true;
1188 if (CalleeAAR) {
1189 MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
1190
1191 // We'll retain this knowledge without additional metadata.
1192 if (ME.onlyAccessesInaccessibleMem())
1193 continue;
1194
1195 if (ME.onlyAccessesArgPointees())
1196 IsArgMemOnlyCall = true;
1197 }
1198
1199 for (Value *Arg : Call->args()) {
1200 // Only care about pointer arguments. If a noalias argument is
1201 // accessed through a non-pointer argument, it must be captured
1202 // first (e.g. via ptrtoint), and we protect against captures below.
1203 if (!Arg->getType()->isPointerTy())
1204 continue;
1205
1206 PtrArgs.push_back(Elt: Arg);
1207 }
1208 }
1209
1210 // If we found no pointers, then this instruction is not suitable for
1211 // pairing with an instruction to receive aliasing metadata.
1212 // However, if this is a call, this we might just alias with none of the
1213 // noalias arguments.
1214 if (PtrArgs.empty() && !IsFuncCall)
1215 continue;
1216
1217 // It is possible that there is only one underlying object, but you
1218 // need to go through several PHIs to see it, and thus could be
1219 // repeated in the Objects list.
1220 SmallPtrSet<const Value *, 4> ObjSet;
1221 SmallVector<Metadata *, 4> Scopes, NoAliases;
1222
1223 SmallSetVector<const Argument *, 4> NAPtrArgs;
1224 for (const Value *V : PtrArgs) {
1225 SmallVector<const Value *, 4> Objects;
1226 getUnderlyingObjects(V, Objects, /* LI = */ nullptr);
1227
1228 for (const Value *O : Objects)
1229 ObjSet.insert(Ptr: O);
1230 }
1231
1232 // Figure out if we're derived from anything that is not a noalias
1233 // argument.
1234 bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,
1235 UsesUnknownObject = false;
1236 for (const Value *V : ObjSet) {
1237 // Is this value a constant that cannot be derived from any pointer
1238 // value (we need to exclude constant expressions, for example, that
1239 // are formed from arithmetic on global symbols).
1240 bool IsNonPtrConst = isa<ConstantInt>(Val: V) || isa<ConstantFP>(Val: V) ||
1241 isa<ConstantPointerNull>(Val: V) ||
1242 isa<ConstantDataVector>(Val: V) || isa<UndefValue>(Val: V);
1243 if (IsNonPtrConst)
1244 continue;
1245
1246 // If this is anything other than a noalias argument, then we cannot
1247 // completely describe the aliasing properties using alias.scope
1248 // metadata (and, thus, won't add any).
1249 if (const Argument *A = dyn_cast<Argument>(Val: V)) {
1250 if (!CB.paramHasAttr(ArgNo: A->getArgNo(), Attribute::Kind: NoAlias))
1251 UsesAliasingPtr = true;
1252 } else {
1253 UsesAliasingPtr = true;
1254 }
1255
1256 if (isEscapeSource(V)) {
1257 // An escape source can only alias with a noalias argument if it has
1258 // been captured beforehand.
1259 RequiresNoCaptureBefore = true;
1260 } else if (!isa<Argument>(Val: V) && !isIdentifiedObject(V)) {
1261 // If this is neither an escape source, nor some identified object
1262 // (which cannot directly alias a noalias argument), nor some other
1263 // argument (which, by definition, also cannot alias a noalias
1264 // argument), conservatively do not make any assumptions.
1265 UsesUnknownObject = true;
1266 }
1267 }
1268
1269 // Nothing we can do if the used underlying object cannot be reliably
1270 // determined.
1271 if (UsesUnknownObject)
1272 continue;
1273
1274 // A function call can always get captured noalias pointers (via other
1275 // parameters, globals, etc.).
1276 if (IsFuncCall && !IsArgMemOnlyCall)
1277 RequiresNoCaptureBefore = true;
1278
1279 // First, we want to figure out all of the sets with which we definitely
1280 // don't alias. Iterate over all noalias set, and add those for which:
1281 // 1. The noalias argument is not in the set of objects from which we
1282 // definitely derive.
1283 // 2. The noalias argument has not yet been captured.
1284 // An arbitrary function that might load pointers could see captured
1285 // noalias arguments via other noalias arguments or globals, and so we
1286 // must always check for prior capture.
1287 for (const Argument *A : NoAliasArgs) {
1288 if (ObjSet.contains(Ptr: A))
1289 continue; // May be based on a noalias argument.
1290
1291 // It might be tempting to skip the PointerMayBeCapturedBefore check if
1292 // A->hasNoCaptureAttr() is true, but this is incorrect because
1293 // nocapture only guarantees that no copies outlive the function, not
1294 // that the value cannot be locally captured.
1295 if (!RequiresNoCaptureBefore ||
1296 !PointerMayBeCapturedBefore(V: A, /* ReturnCaptures */ false,
1297 /* StoreCaptures */ false, I, DT: &DT))
1298 NoAliases.push_back(Elt: NewScopes[A]);
1299 }
1300
1301 if (!NoAliases.empty())
1302 NI->setMetadata(KindID: LLVMContext::MD_noalias,
1303 Node: MDNode::concatenate(
1304 A: NI->getMetadata(KindID: LLVMContext::MD_noalias),
1305 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: NoAliases)));
1306
1307 // Next, we want to figure out all of the sets to which we might belong.
1308 // We might belong to a set if the noalias argument is in the set of
1309 // underlying objects. If there is some non-noalias argument in our list
1310 // of underlying objects, then we cannot add a scope because the fact
1311 // that some access does not alias with any set of our noalias arguments
1312 // cannot itself guarantee that it does not alias with this access
1313 // (because there is some pointer of unknown origin involved and the
1314 // other access might also depend on this pointer). We also cannot add
1315 // scopes to arbitrary functions unless we know they don't access any
1316 // non-parameter pointer-values.
1317 bool CanAddScopes = !UsesAliasingPtr;
1318 if (CanAddScopes && IsFuncCall)
1319 CanAddScopes = IsArgMemOnlyCall;
1320
1321 if (CanAddScopes)
1322 for (const Argument *A : NoAliasArgs) {
1323 if (ObjSet.count(Ptr: A))
1324 Scopes.push_back(Elt: NewScopes[A]);
1325 }
1326
1327 if (!Scopes.empty())
1328 NI->setMetadata(
1329 KindID: LLVMContext::MD_alias_scope,
1330 Node: MDNode::concatenate(A: NI->getMetadata(KindID: LLVMContext::MD_alias_scope),
1331 B: MDNode::get(Context&: CalledFunc->getContext(), MDs: Scopes)));
1332 }
1333 }
1334}
1335
1336static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
1337 ReturnInst *End) {
1338
1339 assert(Begin->getParent() == End->getParent() &&
1340 "Expected to be in same basic block!");
1341 auto BeginIt = Begin->getIterator();
1342 assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");
1343 return !llvm::isGuaranteedToTransferExecutionToSuccessor(
1344 Begin: ++BeginIt, End: End->getIterator(), ScanLimit: InlinerAttributeWindow + 1);
1345}
1346
1347// Only allow these white listed attributes to be propagated back to the
1348// callee. This is because other attributes may only be valid on the call
1349// itself, i.e. attributes such as signext and zeroext.
1350
1351// Attributes that are always okay to propagate as if they are violated its
1352// immediate UB.
1353static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {
1354 AttrBuilder Valid(CB.getContext());
1355 if (auto DerefBytes = CB.getRetDereferenceableBytes())
1356 Valid.addDereferenceableAttr(Bytes: DerefBytes);
1357 if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())
1358 Valid.addDereferenceableOrNullAttr(Bytes: DerefOrNullBytes);
1359 if (CB.hasRetAttr(Attribute::NoAlias))
1360 Valid.addAttribute(Attribute::NoAlias);
1361 if (CB.hasRetAttr(Attribute::NoUndef))
1362 Valid.addAttribute(Attribute::NoUndef);
1363 return Valid;
1364}
1365
1366// Attributes that need additional checks as propagating them may change
1367// behavior or cause new UB.
1368static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {
1369 AttrBuilder Valid(CB.getContext());
1370 if (CB.hasRetAttr(Attribute::NonNull))
1371 Valid.addAttribute(Attribute::NonNull);
1372 if (CB.hasRetAttr(Attribute::Alignment))
1373 Valid.addAlignmentAttr(Align: CB.getRetAlign());
1374 return Valid;
1375}
1376
1377static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
1378 AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);
1379 AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);
1380 if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())
1381 return;
1382 auto *CalledFunction = CB.getCalledFunction();
1383 auto &Context = CalledFunction->getContext();
1384
1385 for (auto &BB : *CalledFunction) {
1386 auto *RI = dyn_cast<ReturnInst>(Val: BB.getTerminator());
1387 if (!RI || !isa<CallBase>(Val: RI->getOperand(i_nocapture: 0)))
1388 continue;
1389 auto *RetVal = cast<CallBase>(Val: RI->getOperand(i_nocapture: 0));
1390 // Check that the cloned RetVal exists and is a call, otherwise we cannot
1391 // add the attributes on the cloned RetVal. Simplification during inlining
1392 // could have transformed the cloned instruction.
1393 auto *NewRetVal = dyn_cast_or_null<CallBase>(Val: VMap.lookup(Val: RetVal));
1394 if (!NewRetVal)
1395 continue;
1396 // Backward propagation of attributes to the returned value may be incorrect
1397 // if it is control flow dependent.
1398 // Consider:
1399 // @callee {
1400 // %rv = call @foo()
1401 // %rv2 = call @bar()
1402 // if (%rv2 != null)
1403 // return %rv2
1404 // if (%rv == null)
1405 // exit()
1406 // return %rv
1407 // }
1408 // caller() {
1409 // %val = call nonnull @callee()
1410 // }
1411 // Here we cannot add the nonnull attribute on either foo or bar. So, we
1412 // limit the check to both RetVal and RI are in the same basic block and
1413 // there are no throwing/exiting instructions between these instructions.
1414 if (RI->getParent() != RetVal->getParent() ||
1415 MayContainThrowingOrExitingCallAfterCB(Begin: RetVal, End: RI))
1416 continue;
1417 // Add to the existing attributes of NewRetVal, i.e. the cloned call
1418 // instruction.
1419 // NB! When we have the same attribute already existing on NewRetVal, but
1420 // with a differing value, the AttributeList's merge API honours the already
1421 // existing attribute value (i.e. attributes such as dereferenceable,
1422 // dereferenceable_or_null etc). See AttrBuilder::merge for more details.
1423 AttributeList AL = NewRetVal->getAttributes();
1424 if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())
1425 ValidUB.removeAttribute(Attribute::Dereferenceable);
1426 if (ValidUB.getDereferenceableOrNullBytes() <
1427 AL.getRetDereferenceableOrNullBytes())
1428 ValidUB.removeAttribute(Attribute::DereferenceableOrNull);
1429 AttributeList NewAL = AL.addRetAttributes(C&: Context, B: ValidUB);
1430 // Attributes that may generate poison returns are a bit tricky. If we
1431 // propagate them, other uses of the callsite might have their behavior
1432 // change or cause UB (if they have noundef) b.c of the new potential
1433 // poison.
1434 // Take the following three cases:
1435 //
1436 // 1)
1437 // define nonnull ptr @foo() {
1438 // %p = call ptr @bar()
1439 // call void @use(ptr %p) willreturn nounwind
1440 // ret ptr %p
1441 // }
1442 //
1443 // 2)
1444 // define noundef nonnull ptr @foo() {
1445 // %p = call ptr @bar()
1446 // call void @use(ptr %p) willreturn nounwind
1447 // ret ptr %p
1448 // }
1449 //
1450 // 3)
1451 // define nonnull ptr @foo() {
1452 // %p = call noundef ptr @bar()
1453 // ret ptr %p
1454 // }
1455 //
1456 // In case 1, we can't propagate nonnull because poison value in @use may
1457 // change behavior or trigger UB.
1458 // In case 2, we don't need to be concerned about propagating nonnull, as
1459 // any new poison at @use will trigger UB anyways.
1460 // In case 3, we can never propagate nonnull because it may create UB due to
1461 // the noundef on @bar.
1462 if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())
1463 ValidPG.removeAttribute(Attribute::Alignment);
1464 if (ValidPG.hasAttributes()) {
1465 // Three checks.
1466 // If the callsite has `noundef`, then a poison due to violating the
1467 // return attribute will create UB anyways so we can always propagate.
1468 // Otherwise, if the return value (callee to be inlined) has `noundef`, we
1469 // can't propagate as a new poison return will cause UB.
1470 // Finally, check if the return value has no uses whose behavior may
1471 // change/may cause UB if we potentially return poison. At the moment this
1472 // is implemented overly conservatively with a single-use check.
1473 // TODO: Update the single-use check to iterate through uses and only bail
1474 // if we have a potentially dangerous use.
1475
1476 if (CB.hasRetAttr(Attribute::NoUndef) ||
1477 (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))
1478 NewAL = NewAL.addRetAttributes(C&: Context, B: ValidPG);
1479 }
1480 NewRetVal->setAttributes(NewAL);
1481 }
1482}
1483
1484/// If the inlined function has non-byval align arguments, then
1485/// add @llvm.assume-based alignment assumptions to preserve this information.
1486static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
1487 if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
1488 return;
1489
1490 AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());
1491 auto &DL = CB.getCaller()->getParent()->getDataLayout();
1492
1493 // To avoid inserting redundant assumptions, we should check for assumptions
1494 // already in the caller. To do this, we might need a DT of the caller.
1495 DominatorTree DT;
1496 bool DTCalculated = false;
1497
1498 Function *CalledFunc = CB.getCalledFunction();
1499 for (Argument &Arg : CalledFunc->args()) {
1500 if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||
1501 Arg.hasNUses(N: 0))
1502 continue;
1503 MaybeAlign Alignment = Arg.getParamAlign();
1504 if (!Alignment)
1505 continue;
1506
1507 if (!DTCalculated) {
1508 DT.recalculate(Func&: *CB.getCaller());
1509 DTCalculated = true;
1510 }
1511 // If we can already prove the asserted alignment in the context of the
1512 // caller, then don't bother inserting the assumption.
1513 Value *ArgVal = CB.getArgOperand(i: Arg.getArgNo());
1514 if (getKnownAlignment(V: ArgVal, DL, CxtI: &CB, AC, DT: &DT) >= *Alignment)
1515 continue;
1516
1517 CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(
1518 DL, PtrValue: ArgVal, Alignment: Alignment->value());
1519 AC->registerAssumption(CI: cast<AssumeInst>(Val: NewAsmp));
1520 }
1521}
1522
1523static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
1524 Module *M, BasicBlock *InsertBlock,
1525 InlineFunctionInfo &IFI,
1526 Function *CalledFunc) {
1527 IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
1528
1529 Value *Size =
1530 Builder.getInt64(C: M->getDataLayout().getTypeStoreSize(Ty: ByValType));
1531
1532 // Always generate a memcpy of alignment 1 here because we don't know
1533 // the alignment of the src pointer. Other optimizations can infer
1534 // better alignment.
1535 CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
1536 /*SrcAlign*/ Align(1), Size);
1537
1538 // The verifier requires that all calls of debug-info-bearing functions
1539 // from debug-info-bearing functions have a debug location (for inlining
1540 // purposes). Assign a dummy location to satisfy the constraint.
1541 if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
1542 if (DISubprogram *SP = CalledFunc->getSubprogram())
1543 CI->setDebugLoc(DILocation::get(Context&: SP->getContext(), Line: 0, Column: 0, Scope: SP));
1544}
1545
1546/// When inlining a call site that has a byval argument,
1547/// we have to make the implicit memcpy explicit by adding it.
1548static Value *HandleByValArgument(Type *ByValType, Value *Arg,
1549 Instruction *TheCall,
1550 const Function *CalledFunc,
1551 InlineFunctionInfo &IFI,
1552 MaybeAlign ByValAlignment) {
1553 Function *Caller = TheCall->getFunction();
1554 const DataLayout &DL = Caller->getParent()->getDataLayout();
1555
1556 // If the called function is readonly, then it could not mutate the caller's
1557 // copy of the byval'd memory. In this case, it is safe to elide the copy and
1558 // temporary.
1559 if (CalledFunc->onlyReadsMemory()) {
1560 // If the byval argument has a specified alignment that is greater than the
1561 // passed in pointer, then we either have to round up the input pointer or
1562 // give up on this transformation.
1563 if (ByValAlignment.valueOrOne() == 1)
1564 return Arg;
1565
1566 AssumptionCache *AC =
1567 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
1568
1569 // If the pointer is already known to be sufficiently aligned, or if we can
1570 // round it up to a larger alignment, then we don't need a temporary.
1571 if (getOrEnforceKnownAlignment(V: Arg, PrefAlign: *ByValAlignment, DL, CxtI: TheCall, AC) >=
1572 *ByValAlignment)
1573 return Arg;
1574
1575 // Otherwise, we have to make a memcpy to get a safe alignment. This is bad
1576 // for code quality, but rarely happens and is required for correctness.
1577 }
1578
1579 // Create the alloca. If we have DataLayout, use nice alignment.
1580 Align Alignment = DL.getPrefTypeAlign(Ty: ByValType);
1581
1582 // If the byval had an alignment specified, we *must* use at least that
1583 // alignment, as it is required by the byval argument (and uses of the
1584 // pointer inside the callee).
1585 if (ByValAlignment)
1586 Alignment = std::max(a: Alignment, b: *ByValAlignment);
1587
1588 AllocaInst *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(),
1589 nullptr, Alignment, Arg->getName());
1590 NewAlloca->insertBefore(InsertPos: Caller->begin()->begin());
1591 IFI.StaticAllocas.push_back(Elt: NewAlloca);
1592
1593 // Uses of the argument in the function should use our new alloca
1594 // instead.
1595 return NewAlloca;
1596}
1597
1598// Check whether this Value is used by a lifetime intrinsic.
1599static bool isUsedByLifetimeMarker(Value *V) {
1600 for (User *U : V->users())
1601 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: U))
1602 if (II->isLifetimeStartOrEnd())
1603 return true;
1604 return false;
1605}
1606
1607// Check whether the given alloca already has
1608// lifetime.start or lifetime.end intrinsics.
1609static bool hasLifetimeMarkers(AllocaInst *AI) {
1610 Type *Ty = AI->getType();
1611 Type *Int8PtrTy =
1612 PointerType::get(C&: Ty->getContext(), AddressSpace: Ty->getPointerAddressSpace());
1613 if (Ty == Int8PtrTy)
1614 return isUsedByLifetimeMarker(V: AI);
1615
1616 // Do a scan to find all the casts to i8*.
1617 for (User *U : AI->users()) {
1618 if (U->getType() != Int8PtrTy) continue;
1619 if (U->stripPointerCasts() != AI) continue;
1620 if (isUsedByLifetimeMarker(V: U))
1621 return true;
1622 }
1623 return false;
1624}
1625
1626/// Return the result of AI->isStaticAlloca() if AI were moved to the entry
1627/// block. Allocas used in inalloca calls and allocas of dynamic array size
1628/// cannot be static.
1629static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
1630 return isa<Constant>(Val: AI->getArraySize()) && !AI->isUsedWithInAlloca();
1631}
1632
1633/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL
1634/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.
1635static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,
1636 LLVMContext &Ctx,
1637 DenseMap<const MDNode *, MDNode *> &IANodes) {
1638 auto IA = DebugLoc::appendInlinedAt(DL: OrigDL, InlinedAt, Ctx, Cache&: IANodes);
1639 return DILocation::get(Context&: Ctx, Line: OrigDL.getLine(), Column: OrigDL.getCol(),
1640 Scope: OrigDL.getScope(), InlinedAt: IA);
1641}
1642
1643/// Update inlined instructions' line numbers to
1644/// to encode location where these instructions are inlined.
1645static void fixupLineNumbers(Function *Fn, Function::iterator FI,
1646 Instruction *TheCall, bool CalleeHasDebugInfo) {
1647 const DebugLoc &TheCallDL = TheCall->getDebugLoc();
1648 if (!TheCallDL)
1649 return;
1650
1651 auto &Ctx = Fn->getContext();
1652 DILocation *InlinedAtNode = TheCallDL;
1653
1654 // Create a unique call site, not to be confused with any other call from the
1655 // same location.
1656 InlinedAtNode = DILocation::getDistinct(
1657 Context&: Ctx, Line: InlinedAtNode->getLine(), Column: InlinedAtNode->getColumn(),
1658 Scope: InlinedAtNode->getScope(), InlinedAt: InlinedAtNode->getInlinedAt());
1659
1660 // Cache the inlined-at nodes as they're built so they are reused, without
1661 // this every instruction's inlined-at chain would become distinct from each
1662 // other.
1663 DenseMap<const MDNode *, MDNode *> IANodes;
1664
1665 // Check if we are not generating inline line tables and want to use
1666 // the call site location instead.
1667 bool NoInlineLineTables = Fn->hasFnAttribute(Kind: "no-inline-line-tables");
1668
1669 // Helper-util for updating the metadata attached to an instruction.
1670 auto UpdateInst = [&](Instruction &I) {
1671 // Loop metadata needs to be updated so that the start and end locs
1672 // reference inlined-at locations.
1673 auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,
1674 &IANodes](Metadata *MD) -> Metadata * {
1675 if (auto *Loc = dyn_cast_or_null<DILocation>(Val: MD))
1676 return inlineDebugLoc(OrigDL: Loc, InlinedAt: InlinedAtNode, Ctx, IANodes).get();
1677 return MD;
1678 };
1679 updateLoopMetadataDebugLocations(I, Updater: updateLoopInfoLoc);
1680
1681 if (!NoInlineLineTables)
1682 if (DebugLoc DL = I.getDebugLoc()) {
1683 DebugLoc IDL =
1684 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode, Ctx&: I.getContext(), IANodes);
1685 I.setDebugLoc(IDL);
1686 return;
1687 }
1688
1689 if (CalleeHasDebugInfo && !NoInlineLineTables)
1690 return;
1691
1692 // If the inlined instruction has no line number, or if inline info
1693 // is not being generated, make it look as if it originates from the call
1694 // location. This is important for ((__always_inline, __nodebug__))
1695 // functions which must use caller location for all instructions in their
1696 // function body.
1697
1698 // Don't update static allocas, as they may get moved later.
1699 if (auto *AI = dyn_cast<AllocaInst>(Val: &I))
1700 if (allocaWouldBeStaticInEntry(AI))
1701 return;
1702
1703 // Do not force a debug loc for pseudo probes, since they do not need to
1704 // be debuggable, and also they are expected to have a zero/null dwarf
1705 // discriminator at this point which could be violated otherwise.
1706 if (isa<PseudoProbeInst>(Val: I))
1707 return;
1708
1709 I.setDebugLoc(TheCallDL);
1710 };
1711
1712 // Helper-util for updating debug-info records attached to instructions.
1713 auto UpdateDVR = [&](DbgRecord *DVR) {
1714 assert(DVR->getDebugLoc() && "Debug Value must have debug loc");
1715 if (NoInlineLineTables) {
1716 DVR->setDebugLoc(TheCallDL);
1717 return;
1718 }
1719 DebugLoc DL = DVR->getDebugLoc();
1720 DebugLoc IDL =
1721 inlineDebugLoc(OrigDL: DL, InlinedAt: InlinedAtNode,
1722 Ctx&: DVR->getMarker()->getParent()->getContext(), IANodes);
1723 DVR->setDebugLoc(IDL);
1724 };
1725
1726 // Iterate over all instructions, updating metadata and debug-info records.
1727 for (; FI != Fn->end(); ++FI) {
1728 for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
1729 ++BI) {
1730 UpdateInst(*BI);
1731 for (DbgRecord &DVR : BI->getDbgRecordRange()) {
1732 UpdateDVR(&DVR);
1733 }
1734 }
1735
1736 // Remove debug info intrinsics if we're not keeping inline info.
1737 if (NoInlineLineTables) {
1738 BasicBlock::iterator BI = FI->begin();
1739 while (BI != FI->end()) {
1740 if (isa<DbgInfoIntrinsic>(Val: BI)) {
1741 BI = BI->eraseFromParent();
1742 continue;
1743 } else {
1744 BI->dropDbgRecords();
1745 }
1746 ++BI;
1747 }
1748 }
1749 }
1750}
1751
1752#undef DEBUG_TYPE
1753#define DEBUG_TYPE "assignment-tracking"
1754/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.
1755static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,
1756 const CallBase &CB) {
1757 at::StorageToVarsMap EscapedLocals;
1758 SmallPtrSet<const Value *, 4> SeenBases;
1759
1760 LLVM_DEBUG(
1761 errs() << "# Finding caller local variables escaped by callee\n");
1762 for (const Value *Arg : CB.args()) {
1763 LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");
1764 if (!Arg->getType()->isPointerTy()) {
1765 LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");
1766 continue;
1767 }
1768
1769 const Instruction *I = dyn_cast<Instruction>(Val: Arg);
1770 if (!I) {
1771 LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");
1772 continue;
1773 }
1774
1775 // Walk back to the base storage.
1776 assert(Arg->getType()->isPtrOrPtrVectorTy());
1777 APInt TmpOffset(DL.getIndexTypeSizeInBits(Ty: Arg->getType()), 0, false);
1778 const AllocaInst *Base = dyn_cast<AllocaInst>(
1779 Val: Arg->stripAndAccumulateConstantOffsets(DL, Offset&: TmpOffset, AllowNonInbounds: true));
1780 if (!Base) {
1781 LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");
1782 continue;
1783 }
1784
1785 assert(Base);
1786 LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");
1787 // We only need to process each base address once - skip any duplicates.
1788 if (!SeenBases.insert(Ptr: Base).second)
1789 continue;
1790
1791 // Find all local variables associated with the backing storage.
1792 auto CollectAssignsForStorage = [&](auto *DbgAssign) {
1793 // Skip variables from inlined functions - they are not local variables.
1794 if (DbgAssign->getDebugLoc().getInlinedAt())
1795 return;
1796 LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");
1797 EscapedLocals[Base].insert(X: at::VarRecord(DbgAssign));
1798 };
1799 for_each(Range: at::getAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1800 for_each(Range: at::getDVRAssignmentMarkers(Inst: Base), F: CollectAssignsForStorage);
1801 }
1802 return EscapedLocals;
1803}
1804
1805static void trackInlinedStores(Function::iterator Start, Function::iterator End,
1806 const CallBase &CB) {
1807 LLVM_DEBUG(errs() << "trackInlinedStores into "
1808 << Start->getParent()->getName() << " from "
1809 << CB.getCalledFunction()->getName() << "\n");
1810 std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(args: CB.getModule());
1811 at::trackAssignments(Start, End, Vars: collectEscapedLocals(DL: *DL, CB), DL: *DL);
1812}
1813
1814/// Update inlined instructions' DIAssignID metadata. We need to do this
1815/// otherwise a function inlined more than once into the same function
1816/// will cause DIAssignID to be shared by many instructions.
1817static void fixupAssignments(Function::iterator Start, Function::iterator End) {
1818 // Map {Old, New} metadata. Not used directly - use GetNewID.
1819 DenseMap<DIAssignID *, DIAssignID *> Map;
1820 auto GetNewID = [&Map](Metadata *Old) {
1821 DIAssignID *OldID = cast<DIAssignID>(Val: Old);
1822 if (DIAssignID *NewID = Map.lookup(Val: OldID))
1823 return NewID;
1824 DIAssignID *NewID = DIAssignID::getDistinct(Context&: OldID->getContext());
1825 Map[OldID] = NewID;
1826 return NewID;
1827 };
1828 // Loop over all the inlined instructions. If we find a DIAssignID
1829 // attachment or use, replace it with a new version.
1830 for (auto BBI = Start; BBI != End; ++BBI) {
1831 for (Instruction &I : *BBI) {
1832 for (DbgVariableRecord &DVR : filterDbgVars(R: I.getDbgRecordRange())) {
1833 if (DVR.isDbgAssign())
1834 DVR.setAssignId(GetNewID(DVR.getAssignID()));
1835 }
1836 if (auto *ID = I.getMetadata(KindID: LLVMContext::MD_DIAssignID))
1837 I.setMetadata(KindID: LLVMContext::MD_DIAssignID, Node: GetNewID(ID));
1838 else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(Val: &I))
1839 DAI->setAssignId(GetNewID(DAI->getAssignID()));
1840 }
1841 }
1842}
1843#undef DEBUG_TYPE
1844#define DEBUG_TYPE "inline-function"
1845
1846/// Update the block frequencies of the caller after a callee has been inlined.
1847///
1848/// Each block cloned into the caller has its block frequency scaled by the
1849/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
1850/// callee's entry block gets the same frequency as the callsite block and the
1851/// relative frequencies of all cloned blocks remain the same after cloning.
1852static void updateCallerBFI(BasicBlock *CallSiteBlock,
1853 const ValueToValueMapTy &VMap,
1854 BlockFrequencyInfo *CallerBFI,
1855 BlockFrequencyInfo *CalleeBFI,
1856 const BasicBlock &CalleeEntryBlock) {
1857 SmallPtrSet<BasicBlock *, 16> ClonedBBs;
1858 for (auto Entry : VMap) {
1859 if (!isa<BasicBlock>(Val: Entry.first) || !Entry.second)
1860 continue;
1861 auto *OrigBB = cast<BasicBlock>(Val: Entry.first);
1862 auto *ClonedBB = cast<BasicBlock>(Val: Entry.second);
1863 BlockFrequency Freq = CalleeBFI->getBlockFreq(BB: OrigBB);
1864 if (!ClonedBBs.insert(Ptr: ClonedBB).second) {
1865 // Multiple blocks in the callee might get mapped to one cloned block in
1866 // the caller since we prune the callee as we clone it. When that happens,
1867 // we want to use the maximum among the original blocks' frequencies.
1868 BlockFrequency NewFreq = CallerBFI->getBlockFreq(BB: ClonedBB);
1869 if (NewFreq > Freq)
1870 Freq = NewFreq;
1871 }
1872 CallerBFI->setBlockFreq(BB: ClonedBB, Freq);
1873 }
1874 BasicBlock *EntryClone = cast<BasicBlock>(Val: VMap.lookup(Val: &CalleeEntryBlock));
1875 CallerBFI->setBlockFreqAndScale(
1876 ReferenceBB: EntryClone, Freq: CallerBFI->getBlockFreq(BB: CallSiteBlock), BlocksToScale&: ClonedBBs);
1877}
1878
1879/// Update the branch metadata for cloned call instructions.
1880static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
1881 const ProfileCount &CalleeEntryCount,
1882 const CallBase &TheCall, ProfileSummaryInfo *PSI,
1883 BlockFrequencyInfo *CallerBFI) {
1884 if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
1885 return;
1886 auto CallSiteCount =
1887 PSI ? PSI->getProfileCount(CallInst: TheCall, BFI: CallerBFI) : std::nullopt;
1888 int64_t CallCount =
1889 std::min(a: CallSiteCount.value_or(u: 0), b: CalleeEntryCount.getCount());
1890 updateProfileCallee(Callee, EntryDelta: -CallCount, VMap: &VMap);
1891}
1892
1893void llvm::updateProfileCallee(
1894 Function *Callee, int64_t EntryDelta,
1895 const ValueMap<const Value *, WeakTrackingVH> *VMap) {
1896 auto CalleeCount = Callee->getEntryCount();
1897 if (!CalleeCount)
1898 return;
1899
1900 const uint64_t PriorEntryCount = CalleeCount->getCount();
1901
1902 // Since CallSiteCount is an estimate, it could exceed the original callee
1903 // count and has to be set to 0 so guard against underflow.
1904 const uint64_t NewEntryCount =
1905 (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
1906 ? 0
1907 : PriorEntryCount + EntryDelta;
1908
1909 // During inlining ?
1910 if (VMap) {
1911 uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
1912 for (auto Entry : *VMap)
1913 if (isa<CallInst>(Val: Entry.first))
1914 if (auto *CI = dyn_cast_or_null<CallInst>(Val: Entry.second))
1915 CI->updateProfWeight(S: CloneEntryCount, T: PriorEntryCount);
1916 }
1917
1918 if (EntryDelta) {
1919 Callee->setEntryCount(Count: NewEntryCount);
1920
1921 for (BasicBlock &BB : *Callee)
1922 // No need to update the callsite if it is pruned during inlining.
1923 if (!VMap || VMap->count(Val: &BB))
1924 for (Instruction &I : BB)
1925 if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
1926 CI->updateProfWeight(S: NewEntryCount, T: PriorEntryCount);
1927 }
1928}
1929
1930/// An operand bundle "clang.arc.attachedcall" on a call indicates the call
1931/// result is implicitly consumed by a call to retainRV or claimRV immediately
1932/// after the call. This function inlines the retainRV/claimRV calls.
1933///
1934/// There are three cases to consider:
1935///
1936/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned
1937/// object in the callee return block, the autoreleaseRV call and the
1938/// retainRV/claimRV call in the caller cancel out. If the call in the caller
1939/// is a claimRV call, a call to objc_release is emitted.
1940///
1941/// 2. If there is a call in the callee return block that doesn't have operand
1942/// bundle "clang.arc.attachedcall", the operand bundle on the original call
1943/// is transferred to the call in the callee.
1944///
1945/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
1946/// a retainRV call.
1947static void
1948inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
1949 const SmallVectorImpl<ReturnInst *> &Returns) {
1950 Module *Mod = CB.getModule();
1951 assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
1952 bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
1953 IsUnsafeClaimRV = !IsRetainRV;
1954
1955 for (auto *RI : Returns) {
1956 Value *RetOpnd = objcarc::GetRCIdentityRoot(V: RI->getOperand(i_nocapture: 0));
1957 bool InsertRetainCall = IsRetainRV;
1958 IRBuilder<> Builder(RI->getContext());
1959
1960 // Walk backwards through the basic block looking for either a matching
1961 // autoreleaseRV call or an unannotated call.
1962 auto InstRange = llvm::make_range(x: ++(RI->getIterator().getReverse()),
1963 y: RI->getParent()->rend());
1964 for (Instruction &I : llvm::make_early_inc_range(Range&: InstRange)) {
1965 // Ignore casts.
1966 if (isa<CastInst>(Val: I))
1967 continue;
1968
1969 if (auto *II = dyn_cast<IntrinsicInst>(Val: &I)) {
1970 if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
1971 !II->hasNUses(N: 0) ||
1972 objcarc::GetRCIdentityRoot(V: II->getOperand(i_nocapture: 0)) != RetOpnd)
1973 break;
1974
1975 // If we've found a matching authoreleaseRV call:
1976 // - If claimRV is attached to the call, insert a call to objc_release
1977 // and erase the autoreleaseRV call.
1978 // - If retainRV is attached to the call, just erase the autoreleaseRV
1979 // call.
1980 if (IsUnsafeClaimRV) {
1981 Builder.SetInsertPoint(II);
1982 Function *IFn =
1983 Intrinsic::getDeclaration(M: Mod, Intrinsic::id: objc_release);
1984 Builder.CreateCall(Callee: IFn, Args: RetOpnd, Name: "");
1985 }
1986 II->eraseFromParent();
1987 InsertRetainCall = false;
1988 break;
1989 }
1990
1991 auto *CI = dyn_cast<CallInst>(Val: &I);
1992
1993 if (!CI)
1994 break;
1995
1996 if (objcarc::GetRCIdentityRoot(V: CI) != RetOpnd ||
1997 objcarc::hasAttachedCallOpBundle(CB: CI))
1998 break;
1999
2000 // If we've found an unannotated call that defines RetOpnd, add a
2001 // "clang.arc.attachedcall" operand bundle.
2002 Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(CB: &CB)};
2003 OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
2004 auto *NewCall = CallBase::addOperandBundle(
2005 CB: CI, ID: LLVMContext::OB_clang_arc_attachedcall, OB, InsertPt: CI->getIterator());
2006 NewCall->copyMetadata(SrcInst: *CI);
2007 CI->replaceAllUsesWith(V: NewCall);
2008 CI->eraseFromParent();
2009 InsertRetainCall = false;
2010 break;
2011 }
2012
2013 if (InsertRetainCall) {
2014 // The retainRV is attached to the call and we've failed to find a
2015 // matching autoreleaseRV or an annotated call in the callee. Emit a call
2016 // to objc_retain.
2017 Builder.SetInsertPoint(RI);
2018 Function *IFn = Intrinsic::getDeclaration(M: Mod, Intrinsic::id: objc_retain);
2019 Builder.CreateCall(Callee: IFn, Args: RetOpnd, Name: "");
2020 }
2021 }
2022}
2023
2024/// This function inlines the called function into the basic block of the
2025/// caller. This returns false if it is not possible to inline this call.
2026/// The program is still in a well defined state if this occurs though.
2027///
2028/// Note that this only does one level of inlining. For example, if the
2029/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
2030/// exists in the instruction stream. Similarly this will inline a recursive
2031/// function by one level.
2032llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2033 bool MergeAttributes,
2034 AAResults *CalleeAAR,
2035 bool InsertLifetime,
2036 Function *ForwardVarArgsTo) {
2037 assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
2038
2039 // FIXME: we don't inline callbr yet.
2040 if (isa<CallBrInst>(Val: CB))
2041 return InlineResult::failure(Reason: "We don't inline callbr yet.");
2042
2043 // If IFI has any state in it, zap it before we fill it in.
2044 IFI.reset();
2045
2046 Function *CalledFunc = CB.getCalledFunction();
2047 if (!CalledFunc || // Can't inline external function or indirect
2048 CalledFunc->isDeclaration()) // call!
2049 return InlineResult::failure(Reason: "external or indirect");
2050
2051 // The inliner does not know how to inline through calls with operand bundles
2052 // in general ...
2053 Value *ConvergenceControlToken = nullptr;
2054 if (CB.hasOperandBundles()) {
2055 for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {
2056 auto OBUse = CB.getOperandBundleAt(Index: i);
2057 uint32_t Tag = OBUse.getTagID();
2058 // ... but it knows how to inline through "deopt" operand bundles ...
2059 if (Tag == LLVMContext::OB_deopt)
2060 continue;
2061 // ... and "funclet" operand bundles.
2062 if (Tag == LLVMContext::OB_funclet)
2063 continue;
2064 if (Tag == LLVMContext::OB_clang_arc_attachedcall)
2065 continue;
2066 if (Tag == LLVMContext::OB_kcfi)
2067 continue;
2068 if (Tag == LLVMContext::OB_convergencectrl) {
2069 ConvergenceControlToken = OBUse.Inputs[0].get();
2070 continue;
2071 }
2072
2073 return InlineResult::failure(Reason: "unsupported operand bundle");
2074 }
2075 }
2076
2077 // FIXME: The check below is redundant and incomplete. According to spec, if a
2078 // convergent call is missing a token, then the caller is using uncontrolled
2079 // convergence. If the callee has an entry intrinsic, then the callee is using
2080 // controlled convergence, and the call cannot be inlined. A proper
2081 // implemenation of this check requires a whole new analysis that identifies
2082 // convergence in every function. For now, we skip that and just do this one
2083 // cursory check. The underlying assumption is that in a compiler flow that
2084 // fully implements convergence control tokens, there is no mixing of
2085 // controlled and uncontrolled convergent operations in the whole program.
2086 if (CB.isConvergent()) {
2087 auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();
2088 if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(Val: I)) {
2089 if (IntrinsicCall->getIntrinsicID() ==
2090 Intrinsic::experimental_convergence_entry) {
2091 if (!ConvergenceControlToken) {
2092 return InlineResult::failure(
2093 Reason: "convergent call needs convergencectrl operand");
2094 }
2095 }
2096 }
2097 }
2098
2099 // If the call to the callee cannot throw, set the 'nounwind' flag on any
2100 // calls that we inline.
2101 bool MarkNoUnwind = CB.doesNotThrow();
2102
2103 BasicBlock *OrigBB = CB.getParent();
2104 Function *Caller = OrigBB->getParent();
2105
2106 // GC poses two hazards to inlining, which only occur when the callee has GC:
2107 // 1. If the caller has no GC, then the callee's GC must be propagated to the
2108 // caller.
2109 // 2. If the caller has a differing GC, it is invalid to inline.
2110 if (CalledFunc->hasGC()) {
2111 if (!Caller->hasGC())
2112 Caller->setGC(CalledFunc->getGC());
2113 else if (CalledFunc->getGC() != Caller->getGC())
2114 return InlineResult::failure(Reason: "incompatible GC");
2115 }
2116
2117 // Get the personality function from the callee if it contains a landing pad.
2118 Constant *CalledPersonality =
2119 CalledFunc->hasPersonalityFn()
2120 ? CalledFunc->getPersonalityFn()->stripPointerCasts()
2121 : nullptr;
2122
2123 // Find the personality function used by the landing pads of the caller. If it
2124 // exists, then check to see that it matches the personality function used in
2125 // the callee.
2126 Constant *CallerPersonality =
2127 Caller->hasPersonalityFn()
2128 ? Caller->getPersonalityFn()->stripPointerCasts()
2129 : nullptr;
2130 if (CalledPersonality) {
2131 if (!CallerPersonality)
2132 Caller->setPersonalityFn(CalledPersonality);
2133 // If the personality functions match, then we can perform the
2134 // inlining. Otherwise, we can't inline.
2135 // TODO: This isn't 100% true. Some personality functions are proper
2136 // supersets of others and can be used in place of the other.
2137 else if (CalledPersonality != CallerPersonality)
2138 return InlineResult::failure(Reason: "incompatible personality");
2139 }
2140
2141 // We need to figure out which funclet the callsite was in so that we may
2142 // properly nest the callee.
2143 Instruction *CallSiteEHPad = nullptr;
2144 if (CallerPersonality) {
2145 EHPersonality Personality = classifyEHPersonality(Pers: CallerPersonality);
2146 if (isScopedEHPersonality(Pers: Personality)) {
2147 std::optional<OperandBundleUse> ParentFunclet =
2148 CB.getOperandBundle(ID: LLVMContext::OB_funclet);
2149 if (ParentFunclet)
2150 CallSiteEHPad = cast<FuncletPadInst>(Val: ParentFunclet->Inputs.front());
2151
2152 // OK, the inlining site is legal. What about the target function?
2153
2154 if (CallSiteEHPad) {
2155 if (Personality == EHPersonality::MSVC_CXX) {
2156 // The MSVC personality cannot tolerate catches getting inlined into
2157 // cleanup funclets.
2158 if (isa<CleanupPadInst>(Val: CallSiteEHPad)) {
2159 // Ok, the call site is within a cleanuppad. Let's check the callee
2160 // for catchpads.
2161 for (const BasicBlock &CalledBB : *CalledFunc) {
2162 if (isa<CatchSwitchInst>(Val: CalledBB.getFirstNonPHI()))
2163 return InlineResult::failure(Reason: "catch in cleanup funclet");
2164 }
2165 }
2166 } else if (isAsynchronousEHPersonality(Pers: Personality)) {
2167 // SEH is even less tolerant, there may not be any sort of exceptional
2168 // funclet in the callee.
2169 for (const BasicBlock &CalledBB : *CalledFunc) {
2170 if (CalledBB.isEHPad())
2171 return InlineResult::failure(Reason: "SEH in cleanup funclet");
2172 }
2173 }
2174 }
2175 }
2176 }
2177
2178 // Determine if we are dealing with a call in an EHPad which does not unwind
2179 // to caller.
2180 bool EHPadForCallUnwindsLocally = false;
2181 if (CallSiteEHPad && isa<CallInst>(Val: CB)) {
2182 UnwindDestMemoTy FuncletUnwindMap;
2183 Value *CallSiteUnwindDestToken =
2184 getUnwindDestToken(EHPad: CallSiteEHPad, MemoMap&: FuncletUnwindMap);
2185
2186 EHPadForCallUnwindsLocally =
2187 CallSiteUnwindDestToken &&
2188 !isa<ConstantTokenNone>(Val: CallSiteUnwindDestToken);
2189 }
2190
2191 // Get an iterator to the last basic block in the function, which will have
2192 // the new function inlined after it.
2193 Function::iterator LastBlock = --Caller->end();
2194
2195 // Make sure to capture all of the return instructions from the cloned
2196 // function.
2197 SmallVector<ReturnInst*, 8> Returns;
2198 ClonedCodeInfo InlinedFunctionInfo;
2199 Function::iterator FirstNewBlock;
2200
2201 { // Scope to destroy VMap after cloning.
2202 ValueToValueMapTy VMap;
2203 struct ByValInit {
2204 Value *Dst;
2205 Value *Src;
2206 Type *Ty;
2207 };
2208 // Keep a list of pair (dst, src) to emit byval initializations.
2209 SmallVector<ByValInit, 4> ByValInits;
2210
2211 // When inlining a function that contains noalias scope metadata,
2212 // this metadata needs to be cloned so that the inlined blocks
2213 // have different "unique scopes" at every call site.
2214 // Track the metadata that must be cloned. Do this before other changes to
2215 // the function, so that we do not get in trouble when inlining caller ==
2216 // callee.
2217 ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());
2218
2219 auto &DL = Caller->getParent()->getDataLayout();
2220
2221 // Calculate the vector of arguments to pass into the function cloner, which
2222 // matches up the formal to the actual argument values.
2223 auto AI = CB.arg_begin();
2224 unsigned ArgNo = 0;
2225 for (Function::arg_iterator I = CalledFunc->arg_begin(),
2226 E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
2227 Value *ActualArg = *AI;
2228
2229 // When byval arguments actually inlined, we need to make the copy implied
2230 // by them explicit. However, we don't do this if the callee is readonly
2231 // or readnone, because the copy would be unneeded: the callee doesn't
2232 // modify the struct.
2233 if (CB.isByValArgument(ArgNo)) {
2234 ActualArg = HandleByValArgument(ByValType: CB.getParamByValType(ArgNo), Arg: ActualArg,
2235 TheCall: &CB, CalledFunc, IFI,
2236 ByValAlignment: CalledFunc->getParamAlign(ArgNo));
2237 if (ActualArg != *AI)
2238 ByValInits.push_back(
2239 Elt: {.Dst: ActualArg, .Src: (Value *)*AI, .Ty: CB.getParamByValType(ArgNo)});
2240 }
2241
2242 VMap[&*I] = ActualArg;
2243 }
2244
2245 // TODO: Remove this when users have been updated to the assume bundles.
2246 // Add alignment assumptions if necessary. We do this before the inlined
2247 // instructions are actually cloned into the caller so that we can easily
2248 // check what will be known at the start of the inlined code.
2249 AddAlignmentAssumptions(CB, IFI);
2250
2251 AssumptionCache *AC =
2252 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2253
2254 /// Preserve all attributes on of the call and its parameters.
2255 salvageKnowledge(I: &CB, AC);
2256
2257 // We want the inliner to prune the code as it copies. We would LOVE to
2258 // have no dead or constant instructions leftover after inlining occurs
2259 // (which can happen, e.g., because an argument was constant), but we'll be
2260 // happy with whatever the cloner can do.
2261 CloneAndPruneFunctionInto(NewFunc: Caller, OldFunc: CalledFunc, VMap,
2262 /*ModuleLevelChanges=*/false, Returns, NameSuffix: ".i",
2263 CodeInfo: &InlinedFunctionInfo);
2264 // Remember the first block that is newly cloned over.
2265 FirstNewBlock = LastBlock; ++FirstNewBlock;
2266
2267 // Insert retainRV/clainRV runtime calls.
2268 objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(CB: &CB);
2269 if (RVCallKind != objcarc::ARCInstKind::None)
2270 inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
2271
2272 // Updated caller/callee profiles only when requested. For sample loader
2273 // inlining, the context-sensitive inlinee profile doesn't need to be
2274 // subtracted from callee profile, and the inlined clone also doesn't need
2275 // to be scaled based on call site count.
2276 if (IFI.UpdateProfile) {
2277 if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)
2278 // Update the BFI of blocks cloned into the caller.
2279 updateCallerBFI(CallSiteBlock: OrigBB, VMap, CallerBFI: IFI.CallerBFI, CalleeBFI: IFI.CalleeBFI,
2280 CalleeEntryBlock: CalledFunc->front());
2281
2282 if (auto Profile = CalledFunc->getEntryCount())
2283 updateCallProfile(Callee: CalledFunc, VMap, CalleeEntryCount: *Profile, TheCall: CB, PSI: IFI.PSI,
2284 CallerBFI: IFI.CallerBFI);
2285 }
2286
2287 // Inject byval arguments initialization.
2288 for (ByValInit &Init : ByValInits)
2289 HandleByValArgumentInit(ByValType: Init.Ty, Dst: Init.Dst, Src: Init.Src, M: Caller->getParent(),
2290 InsertBlock: &*FirstNewBlock, IFI, CalledFunc);
2291
2292 std::optional<OperandBundleUse> ParentDeopt =
2293 CB.getOperandBundle(ID: LLVMContext::OB_deopt);
2294 if (ParentDeopt) {
2295 SmallVector<OperandBundleDef, 2> OpDefs;
2296
2297 for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
2298 CallBase *ICS = dyn_cast_or_null<CallBase>(Val&: VH);
2299 if (!ICS)
2300 continue; // instruction was DCE'd or RAUW'ed to undef
2301
2302 OpDefs.clear();
2303
2304 OpDefs.reserve(N: ICS->getNumOperandBundles());
2305
2306 for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;
2307 ++COBi) {
2308 auto ChildOB = ICS->getOperandBundleAt(Index: COBi);
2309 if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
2310 // If the inlined call has other operand bundles, let them be
2311 OpDefs.emplace_back(Args&: ChildOB);
2312 continue;
2313 }
2314
2315 // It may be useful to separate this logic (of handling operand
2316 // bundles) out to a separate "policy" component if this gets crowded.
2317 // Prepend the parent's deoptimization continuation to the newly
2318 // inlined call's deoptimization continuation.
2319 std::vector<Value *> MergedDeoptArgs;
2320 MergedDeoptArgs.reserve(n: ParentDeopt->Inputs.size() +
2321 ChildOB.Inputs.size());
2322
2323 llvm::append_range(C&: MergedDeoptArgs, R&: ParentDeopt->Inputs);
2324 llvm::append_range(C&: MergedDeoptArgs, R&: ChildOB.Inputs);
2325
2326 OpDefs.emplace_back(Args: "deopt", Args: std::move(MergedDeoptArgs));
2327 }
2328
2329 Instruction *NewI = CallBase::Create(CB: ICS, Bundles: OpDefs, InsertPt: ICS->getIterator());
2330
2331 // Note: the RAUW does the appropriate fixup in VMap, so we need to do
2332 // this even if the call returns void.
2333 ICS->replaceAllUsesWith(V: NewI);
2334
2335 VH = nullptr;
2336 ICS->eraseFromParent();
2337 }
2338 }
2339
2340 // For 'nodebug' functions, the associated DISubprogram is always null.
2341 // Conservatively avoid propagating the callsite debug location to
2342 // instructions inlined from a function whose DISubprogram is not null.
2343 fixupLineNumbers(Fn: Caller, FI: FirstNewBlock, TheCall: &CB,
2344 CalleeHasDebugInfo: CalledFunc->getSubprogram() != nullptr);
2345
2346 if (isAssignmentTrackingEnabled(M: *Caller->getParent())) {
2347 // Interpret inlined stores to caller-local variables as assignments.
2348 trackInlinedStores(Start: FirstNewBlock, End: Caller->end(), CB);
2349
2350 // Update DIAssignID metadata attachments and uses so that they are
2351 // unique to this inlined instance.
2352 fixupAssignments(Start: FirstNewBlock, End: Caller->end());
2353 }
2354
2355 // Now clone the inlined noalias scope metadata.
2356 SAMetadataCloner.clone();
2357 SAMetadataCloner.remap(FStart: FirstNewBlock, FEnd: Caller->end());
2358
2359 // Add noalias metadata if necessary.
2360 AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2361
2362 // Clone return attributes on the callsite into the calls within the inlined
2363 // function which feed into its return value.
2364 AddReturnAttributes(CB, VMap);
2365
2366 propagateMemProfMetadata(Callee: CalledFunc, CB,
2367 ContainsMemProfMetadata: InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
2368
2369 // Propagate metadata on the callsite if necessary.
2370 PropagateCallSiteMetadata(CB, FStart: FirstNewBlock, FEnd: Caller->end());
2371
2372 // Register any cloned assumptions.
2373 if (IFI.GetAssumptionCache)
2374 for (BasicBlock &NewBlock :
2375 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2376 for (Instruction &I : NewBlock)
2377 if (auto *II = dyn_cast<AssumeInst>(Val: &I))
2378 IFI.GetAssumptionCache(*Caller).registerAssumption(CI: II);
2379 }
2380
2381 if (ConvergenceControlToken) {
2382 auto *I = FirstNewBlock->getFirstNonPHI();
2383 if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(Val: I)) {
2384 if (IntrinsicCall->getIntrinsicID() ==
2385 Intrinsic::experimental_convergence_entry) {
2386 IntrinsicCall->replaceAllUsesWith(V: ConvergenceControlToken);
2387 IntrinsicCall->eraseFromParent();
2388 }
2389 }
2390 }
2391
2392 // If there are any alloca instructions in the block that used to be the entry
2393 // block for the callee, move them to the entry block of the caller. First
2394 // calculate which instruction they should be inserted before. We insert the
2395 // instructions at the end of the current alloca list.
2396 {
2397 BasicBlock::iterator InsertPoint = Caller->begin()->begin();
2398 for (BasicBlock::iterator I = FirstNewBlock->begin(),
2399 E = FirstNewBlock->end(); I != E; ) {
2400 AllocaInst *AI = dyn_cast<AllocaInst>(Val: I++);
2401 if (!AI) continue;
2402
2403 // If the alloca is now dead, remove it. This often occurs due to code
2404 // specialization.
2405 if (AI->use_empty()) {
2406 AI->eraseFromParent();
2407 continue;
2408 }
2409
2410 if (!allocaWouldBeStaticInEntry(AI))
2411 continue;
2412
2413 // Keep track of the static allocas that we inline into the caller.
2414 IFI.StaticAllocas.push_back(Elt: AI);
2415
2416 // Scan for the block of allocas that we can move over, and move them
2417 // all at once.
2418 while (isa<AllocaInst>(Val: I) &&
2419 !cast<AllocaInst>(Val&: I)->use_empty() &&
2420 allocaWouldBeStaticInEntry(AI: cast<AllocaInst>(Val&: I))) {
2421 IFI.StaticAllocas.push_back(Elt: cast<AllocaInst>(Val&: I));
2422 ++I;
2423 }
2424
2425 // Transfer all of the allocas over in a block. Using splice means
2426 // that the instructions aren't removed from the symbol table, then
2427 // reinserted.
2428 I.setTailBit(true);
2429 Caller->getEntryBlock().splice(ToIt: InsertPoint, FromBB: &*FirstNewBlock,
2430 FromBeginIt: AI->getIterator(), FromEndIt: I);
2431 }
2432 }
2433
2434 SmallVector<Value*,4> VarArgsToForward;
2435 SmallVector<AttributeSet, 4> VarArgsAttrs;
2436 for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
2437 i < CB.arg_size(); i++) {
2438 VarArgsToForward.push_back(Elt: CB.getArgOperand(i));
2439 VarArgsAttrs.push_back(Elt: CB.getAttributes().getParamAttrs(ArgNo: i));
2440 }
2441
2442 bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
2443 if (InlinedFunctionInfo.ContainsCalls) {
2444 CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
2445 if (CallInst *CI = dyn_cast<CallInst>(Val: &CB))
2446 CallSiteTailKind = CI->getTailCallKind();
2447
2448 // For inlining purposes, the "notail" marker is the same as no marker.
2449 if (CallSiteTailKind == CallInst::TCK_NoTail)
2450 CallSiteTailKind = CallInst::TCK_None;
2451
2452 for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
2453 ++BB) {
2454 for (Instruction &I : llvm::make_early_inc_range(Range&: *BB)) {
2455 CallInst *CI = dyn_cast<CallInst>(Val: &I);
2456 if (!CI)
2457 continue;
2458
2459 // Forward varargs from inlined call site to calls to the
2460 // ForwardVarArgsTo function, if requested, and to musttail calls.
2461 if (!VarArgsToForward.empty() &&
2462 ((ForwardVarArgsTo &&
2463 CI->getCalledFunction() == ForwardVarArgsTo) ||
2464 CI->isMustTailCall())) {
2465 // Collect attributes for non-vararg parameters.
2466 AttributeList Attrs = CI->getAttributes();
2467 SmallVector<AttributeSet, 8> ArgAttrs;
2468 if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
2469 for (unsigned ArgNo = 0;
2470 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
2471 ArgAttrs.push_back(Elt: Attrs.getParamAttrs(ArgNo));
2472 }
2473
2474 // Add VarArg attributes.
2475 ArgAttrs.append(in_start: VarArgsAttrs.begin(), in_end: VarArgsAttrs.end());
2476 Attrs = AttributeList::get(C&: CI->getContext(), FnAttrs: Attrs.getFnAttrs(),
2477 RetAttrs: Attrs.getRetAttrs(), ArgAttrs);
2478 // Add VarArgs to existing parameters.
2479 SmallVector<Value *, 6> Params(CI->args());
2480 Params.append(in_start: VarArgsToForward.begin(), in_end: VarArgsToForward.end());
2481 CallInst *NewCI = CallInst::Create(
2482 Ty: CI->getFunctionType(), Func: CI->getCalledOperand(), Args: Params, NameStr: "", InsertBefore: CI->getIterator());
2483 NewCI->setDebugLoc(CI->getDebugLoc());
2484 NewCI->setAttributes(Attrs);
2485 NewCI->setCallingConv(CI->getCallingConv());
2486 CI->replaceAllUsesWith(V: NewCI);
2487 CI->eraseFromParent();
2488 CI = NewCI;
2489 }
2490
2491 if (Function *F = CI->getCalledFunction())
2492 InlinedDeoptimizeCalls |=
2493 F->getIntrinsicID() == Intrinsic::experimental_deoptimize;
2494
2495 // We need to reduce the strength of any inlined tail calls. For
2496 // musttail, we have to avoid introducing potential unbounded stack
2497 // growth. For example, if functions 'f' and 'g' are mutually recursive
2498 // with musttail, we can inline 'g' into 'f' so long as we preserve
2499 // musttail on the cloned call to 'f'. If either the inlined call site
2500 // or the cloned call site is *not* musttail, the program already has
2501 // one frame of stack growth, so it's safe to remove musttail. Here is
2502 // a table of example transformations:
2503 //
2504 // f -> musttail g -> musttail f ==> f -> musttail f
2505 // f -> musttail g -> tail f ==> f -> tail f
2506 // f -> g -> musttail f ==> f -> f
2507 // f -> g -> tail f ==> f -> f
2508 //
2509 // Inlined notail calls should remain notail calls.
2510 CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
2511 if (ChildTCK != CallInst::TCK_NoTail)
2512 ChildTCK = std::min(a: CallSiteTailKind, b: ChildTCK);
2513 CI->setTailCallKind(ChildTCK);
2514 InlinedMustTailCalls |= CI->isMustTailCall();
2515
2516 // Call sites inlined through a 'nounwind' call site should be
2517 // 'nounwind' as well. However, avoid marking call sites explicitly
2518 // where possible. This helps expose more opportunities for CSE after
2519 // inlining, commonly when the callee is an intrinsic.
2520 if (MarkNoUnwind && !CI->doesNotThrow())
2521 CI->setDoesNotThrow();
2522 }
2523 }
2524 }
2525
2526 // Leave lifetime markers for the static alloca's, scoping them to the
2527 // function we just inlined.
2528 // We need to insert lifetime intrinsics even at O0 to avoid invalid
2529 // access caused by multithreaded coroutines. The check
2530 // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.
2531 if ((InsertLifetime || Caller->isPresplitCoroutine()) &&
2532 !IFI.StaticAllocas.empty()) {
2533 IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());
2534 for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
2535 AllocaInst *AI = IFI.StaticAllocas[ai];
2536 // Don't mark swifterror allocas. They can't have bitcast uses.
2537 if (AI->isSwiftError())
2538 continue;
2539
2540 // If the alloca is already scoped to something smaller than the whole
2541 // function then there's no need to add redundant, less accurate markers.
2542 if (hasLifetimeMarkers(AI))
2543 continue;
2544
2545 // Try to determine the size of the allocation.
2546 ConstantInt *AllocaSize = nullptr;
2547 if (ConstantInt *AIArraySize =
2548 dyn_cast<ConstantInt>(Val: AI->getArraySize())) {
2549 auto &DL = Caller->getParent()->getDataLayout();
2550 Type *AllocaType = AI->getAllocatedType();
2551 TypeSize AllocaTypeSize = DL.getTypeAllocSize(Ty: AllocaType);
2552 uint64_t AllocaArraySize = AIArraySize->getLimitedValue();
2553
2554 // Don't add markers for zero-sized allocas.
2555 if (AllocaArraySize == 0)
2556 continue;
2557
2558 // Check that array size doesn't saturate uint64_t and doesn't
2559 // overflow when it's multiplied by type size.
2560 if (!AllocaTypeSize.isScalable() &&
2561 AllocaArraySize != std::numeric_limits<uint64_t>::max() &&
2562 std::numeric_limits<uint64_t>::max() / AllocaArraySize >=
2563 AllocaTypeSize.getFixedValue()) {
2564 AllocaSize = ConstantInt::get(Ty: Type::getInt64Ty(C&: AI->getContext()),
2565 V: AllocaArraySize * AllocaTypeSize);
2566 }
2567 }
2568
2569 builder.CreateLifetimeStart(Ptr: AI, Size: AllocaSize);
2570 for (ReturnInst *RI : Returns) {
2571 // Don't insert llvm.lifetime.end calls between a musttail or deoptimize
2572 // call and a return. The return kills all local allocas.
2573 if (InlinedMustTailCalls &&
2574 RI->getParent()->getTerminatingMustTailCall())
2575 continue;
2576 if (InlinedDeoptimizeCalls &&
2577 RI->getParent()->getTerminatingDeoptimizeCall())
2578 continue;
2579 IRBuilder<>(RI).CreateLifetimeEnd(Ptr: AI, Size: AllocaSize);
2580 }
2581 }
2582 }
2583
2584 // If the inlined code contained dynamic alloca instructions, wrap the inlined
2585 // code with llvm.stacksave/llvm.stackrestore intrinsics.
2586 if (InlinedFunctionInfo.ContainsDynamicAllocas) {
2587 // Insert the llvm.stacksave.
2588 CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
2589 .CreateStackSave(Name: "savedstack");
2590
2591 // Insert a call to llvm.stackrestore before any return instructions in the
2592 // inlined function.
2593 for (ReturnInst *RI : Returns) {
2594 // Don't insert llvm.stackrestore calls between a musttail or deoptimize
2595 // call and a return. The return will restore the stack pointer.
2596 if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())
2597 continue;
2598 if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())
2599 continue;
2600 IRBuilder<>(RI).CreateStackRestore(Ptr: SavedPtr);
2601 }
2602 }
2603
2604 // If we are inlining for an invoke instruction, we must make sure to rewrite
2605 // any call instructions into invoke instructions. This is sensitive to which
2606 // funclet pads were top-level in the inlinee, so must be done before
2607 // rewriting the "parent pad" links.
2608 if (auto *II = dyn_cast<InvokeInst>(Val: &CB)) {
2609 BasicBlock *UnwindDest = II->getUnwindDest();
2610 Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
2611 if (isa<LandingPadInst>(Val: FirstNonPHI)) {
2612 HandleInlinedLandingPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
2613 } else {
2614 HandleInlinedEHPad(II, FirstNewBlock: &*FirstNewBlock, InlinedCodeInfo&: InlinedFunctionInfo);
2615 }
2616 }
2617
2618 // Update the lexical scopes of the new funclets and callsites.
2619 // Anything that had 'none' as its parent is now nested inside the callsite's
2620 // EHPad.
2621 if (CallSiteEHPad) {
2622 for (Function::iterator BB = FirstNewBlock->getIterator(),
2623 E = Caller->end();
2624 BB != E; ++BB) {
2625 // Add bundle operands to inlined call sites.
2626 PropagateOperandBundles(InlinedBB: BB, CallSiteEHPad);
2627
2628 // It is problematic if the inlinee has a cleanupret which unwinds to
2629 // caller and we inline it into a call site which doesn't unwind but into
2630 // an EH pad that does. Such an edge must be dynamically unreachable.
2631 // As such, we replace the cleanupret with unreachable.
2632 if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(Val: BB->getTerminator()))
2633 if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)
2634 changeToUnreachable(I: CleanupRet);
2635
2636 Instruction *I = BB->getFirstNonPHI();
2637 if (!I->isEHPad())
2638 continue;
2639
2640 if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Val: I)) {
2641 if (isa<ConstantTokenNone>(Val: CatchSwitch->getParentPad()))
2642 CatchSwitch->setParentPad(CallSiteEHPad);
2643 } else {
2644 auto *FPI = cast<FuncletPadInst>(Val: I);
2645 if (isa<ConstantTokenNone>(Val: FPI->getParentPad()))
2646 FPI->setParentPad(CallSiteEHPad);
2647 }
2648 }
2649 }
2650
2651 if (InlinedDeoptimizeCalls) {
2652 // We need to at least remove the deoptimizing returns from the Return set,
2653 // so that the control flow from those returns does not get merged into the
2654 // caller (but terminate it instead). If the caller's return type does not
2655 // match the callee's return type, we also need to change the return type of
2656 // the intrinsic.
2657 if (Caller->getReturnType() == CB.getType()) {
2658 llvm::erase_if(C&: Returns, P: [](ReturnInst *RI) {
2659 return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;
2660 });
2661 } else {
2662 SmallVector<ReturnInst *, 8> NormalReturns;
2663 Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(
2664 M: Caller->getParent(), Intrinsic::id: experimental_deoptimize,
2665 Tys: {Caller->getReturnType()});
2666
2667 for (ReturnInst *RI : Returns) {
2668 CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();
2669 if (!DeoptCall) {
2670 NormalReturns.push_back(Elt: RI);
2671 continue;
2672 }
2673
2674 // The calling convention on the deoptimize call itself may be bogus,
2675 // since the code we're inlining may have undefined behavior (and may
2676 // never actually execute at runtime); but all
2677 // @llvm.experimental.deoptimize declarations have to have the same
2678 // calling convention in a well-formed module.
2679 auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();
2680 NewDeoptIntrinsic->setCallingConv(CallingConv);
2681 auto *CurBB = RI->getParent();
2682 RI->eraseFromParent();
2683
2684 SmallVector<Value *, 4> CallArgs(DeoptCall->args());
2685
2686 SmallVector<OperandBundleDef, 1> OpBundles;
2687 DeoptCall->getOperandBundlesAsDefs(Defs&: OpBundles);
2688 auto DeoptAttributes = DeoptCall->getAttributes();
2689 DeoptCall->eraseFromParent();
2690 assert(!OpBundles.empty() &&
2691 "Expected at least the deopt operand bundle");
2692
2693 IRBuilder<> Builder(CurBB);
2694 CallInst *NewDeoptCall =
2695 Builder.CreateCall(Callee: NewDeoptIntrinsic, Args: CallArgs, OpBundles);
2696 NewDeoptCall->setCallingConv(CallingConv);
2697 NewDeoptCall->setAttributes(DeoptAttributes);
2698 if (NewDeoptCall->getType()->isVoidTy())
2699 Builder.CreateRetVoid();
2700 else
2701 Builder.CreateRet(V: NewDeoptCall);
2702 // Since the ret type is changed, remove the incompatible attributes.
2703 NewDeoptCall->removeRetAttrs(
2704 AttrsToRemove: AttributeFuncs::typeIncompatible(Ty: NewDeoptCall->getType()));
2705 }
2706
2707 // Leave behind the normal returns so we can merge control flow.
2708 std::swap(LHS&: Returns, RHS&: NormalReturns);
2709 }
2710 }
2711
2712 // Handle any inlined musttail call sites. In order for a new call site to be
2713 // musttail, the source of the clone and the inlined call site must have been
2714 // musttail. Therefore it's safe to return without merging control into the
2715 // phi below.
2716 if (InlinedMustTailCalls) {
2717 // Check if we need to bitcast the result of any musttail calls.
2718 Type *NewRetTy = Caller->getReturnType();
2719 bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;
2720
2721 // Handle the returns preceded by musttail calls separately.
2722 SmallVector<ReturnInst *, 8> NormalReturns;
2723 for (ReturnInst *RI : Returns) {
2724 CallInst *ReturnedMustTail =
2725 RI->getParent()->getTerminatingMustTailCall();
2726 if (!ReturnedMustTail) {
2727 NormalReturns.push_back(Elt: RI);
2728 continue;
2729 }
2730 if (!NeedBitCast)
2731 continue;
2732
2733 // Delete the old return and any preceding bitcast.
2734 BasicBlock *CurBB = RI->getParent();
2735 auto *OldCast = dyn_cast_or_null<BitCastInst>(Val: RI->getReturnValue());
2736 RI->eraseFromParent();
2737 if (OldCast)
2738 OldCast->eraseFromParent();
2739
2740 // Insert a new bitcast and return with the right type.
2741 IRBuilder<> Builder(CurBB);
2742 Builder.CreateRet(V: Builder.CreateBitCast(V: ReturnedMustTail, DestTy: NewRetTy));
2743 }
2744
2745 // Leave behind the normal returns so we can merge control flow.
2746 std::swap(LHS&: Returns, RHS&: NormalReturns);
2747 }
2748
2749 // Now that all of the transforms on the inlined code have taken place but
2750 // before we splice the inlined code into the CFG and lose track of which
2751 // blocks were actually inlined, collect the call sites. We only do this if
2752 // call graph updates weren't requested, as those provide value handle based
2753 // tracking of inlined call sites instead. Calls to intrinsics are not
2754 // collected because they are not inlineable.
2755 if (InlinedFunctionInfo.ContainsCalls) {
2756 // Otherwise just collect the raw call sites that were inlined.
2757 for (BasicBlock &NewBB :
2758 make_range(x: FirstNewBlock->getIterator(), y: Caller->end()))
2759 for (Instruction &I : NewBB)
2760 if (auto *CB = dyn_cast<CallBase>(Val: &I))
2761 if (!(CB->getCalledFunction() &&
2762 CB->getCalledFunction()->isIntrinsic()))
2763 IFI.InlinedCallSites.push_back(Elt: CB);
2764 }
2765
2766 // If we cloned in _exactly one_ basic block, and if that block ends in a
2767 // return instruction, we splice the body of the inlined callee directly into
2768 // the calling basic block.
2769 if (Returns.size() == 1 && std::distance(first: FirstNewBlock, last: Caller->end()) == 1) {
2770 // Move all of the instructions right before the call.
2771 OrigBB->splice(ToIt: CB.getIterator(), FromBB: &*FirstNewBlock, FromBeginIt: FirstNewBlock->begin(),
2772 FromEndIt: FirstNewBlock->end());
2773 // Remove the cloned basic block.
2774 Caller->back().eraseFromParent();
2775
2776 // If the call site was an invoke instruction, add a branch to the normal
2777 // destination.
2778 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
2779 BranchInst *NewBr = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
2780 NewBr->setDebugLoc(Returns[0]->getDebugLoc());
2781 }
2782
2783 // If the return instruction returned a value, replace uses of the call with
2784 // uses of the returned value.
2785 if (!CB.use_empty()) {
2786 ReturnInst *R = Returns[0];
2787 if (&CB == R->getReturnValue())
2788 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
2789 else
2790 CB.replaceAllUsesWith(V: R->getReturnValue());
2791 }
2792 // Since we are now done with the Call/Invoke, we can delete it.
2793 CB.eraseFromParent();
2794
2795 // Since we are now done with the return instruction, delete it also.
2796 Returns[0]->eraseFromParent();
2797
2798 if (MergeAttributes)
2799 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
2800
2801 // We are now done with the inlining.
2802 return InlineResult::success();
2803 }
2804
2805 // Otherwise, we have the normal case, of more than one block to inline or
2806 // multiple return sites.
2807
2808 // We want to clone the entire callee function into the hole between the
2809 // "starter" and "ender" blocks. How we accomplish this depends on whether
2810 // this is an invoke instruction or a call instruction.
2811 BasicBlock *AfterCallBB;
2812 BranchInst *CreatedBranchToNormalDest = nullptr;
2813 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
2814
2815 // Add an unconditional branch to make this look like the CallInst case...
2816 CreatedBranchToNormalDest = BranchInst::Create(IfTrue: II->getNormalDest(), InsertBefore: CB.getIterator());
2817
2818 // Split the basic block. This guarantees that no PHI nodes will have to be
2819 // updated due to new incoming edges, and make the invoke case more
2820 // symmetric to the call case.
2821 AfterCallBB =
2822 OrigBB->splitBasicBlock(I: CreatedBranchToNormalDest->getIterator(),
2823 BBName: CalledFunc->getName() + ".exit");
2824
2825 } else { // It's a call
2826 // If this is a call instruction, we need to split the basic block that
2827 // the call lives in.
2828 //
2829 AfterCallBB = OrigBB->splitBasicBlock(I: CB.getIterator(),
2830 BBName: CalledFunc->getName() + ".exit");
2831 }
2832
2833 if (IFI.CallerBFI) {
2834 // Copy original BB's block frequency to AfterCallBB
2835 IFI.CallerBFI->setBlockFreq(BB: AfterCallBB,
2836 Freq: IFI.CallerBFI->getBlockFreq(BB: OrigBB));
2837 }
2838
2839 // Change the branch that used to go to AfterCallBB to branch to the first
2840 // basic block of the inlined function.
2841 //
2842 Instruction *Br = OrigBB->getTerminator();
2843 assert(Br && Br->getOpcode() == Instruction::Br &&
2844 "splitBasicBlock broken!");
2845 Br->setOperand(i: 0, Val: &*FirstNewBlock);
2846
2847 // Now that the function is correct, make it a little bit nicer. In
2848 // particular, move the basic blocks inserted from the end of the function
2849 // into the space made by splitting the source basic block.
2850 Caller->splice(ToIt: AfterCallBB->getIterator(), FromF: Caller, FromBeginIt: FirstNewBlock,
2851 FromEndIt: Caller->end());
2852
2853 // Handle all of the return instructions that we just cloned in, and eliminate
2854 // any users of the original call/invoke instruction.
2855 Type *RTy = CalledFunc->getReturnType();
2856
2857 PHINode *PHI = nullptr;
2858 if (Returns.size() > 1) {
2859 // The PHI node should go at the front of the new basic block to merge all
2860 // possible incoming values.
2861 if (!CB.use_empty()) {
2862 PHI = PHINode::Create(Ty: RTy, NumReservedValues: Returns.size(), NameStr: CB.getName());
2863 PHI->insertBefore(InsertPos: AfterCallBB->begin());
2864 // Anything that used the result of the function call should now use the
2865 // PHI node as their operand.
2866 CB.replaceAllUsesWith(V: PHI);
2867 }
2868
2869 // Loop over all of the return instructions adding entries to the PHI node
2870 // as appropriate.
2871 if (PHI) {
2872 for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
2873 ReturnInst *RI = Returns[i];
2874 assert(RI->getReturnValue()->getType() == PHI->getType() &&
2875 "Ret value not consistent in function!");
2876 PHI->addIncoming(V: RI->getReturnValue(), BB: RI->getParent());
2877 }
2878 }
2879
2880 // Add a branch to the merge points and remove return instructions.
2881 DebugLoc Loc;
2882 for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
2883 ReturnInst *RI = Returns[i];
2884 BranchInst* BI = BranchInst::Create(IfTrue: AfterCallBB, InsertBefore: RI->getIterator());
2885 Loc = RI->getDebugLoc();
2886 BI->setDebugLoc(Loc);
2887 RI->eraseFromParent();
2888 }
2889 // We need to set the debug location to *somewhere* inside the
2890 // inlined function. The line number may be nonsensical, but the
2891 // instruction will at least be associated with the right
2892 // function.
2893 if (CreatedBranchToNormalDest)
2894 CreatedBranchToNormalDest->setDebugLoc(Loc);
2895 } else if (!Returns.empty()) {
2896 // Otherwise, if there is exactly one return value, just replace anything
2897 // using the return value of the call with the computed value.
2898 if (!CB.use_empty()) {
2899 if (&CB == Returns[0]->getReturnValue())
2900 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
2901 else
2902 CB.replaceAllUsesWith(V: Returns[0]->getReturnValue());
2903 }
2904
2905 // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
2906 BasicBlock *ReturnBB = Returns[0]->getParent();
2907 ReturnBB->replaceAllUsesWith(V: AfterCallBB);
2908
2909 // Splice the code from the return block into the block that it will return
2910 // to, which contains the code that was after the call.
2911 AfterCallBB->splice(ToIt: AfterCallBB->begin(), FromBB: ReturnBB);
2912
2913 if (CreatedBranchToNormalDest)
2914 CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());
2915
2916 // Delete the return instruction now and empty ReturnBB now.
2917 Returns[0]->eraseFromParent();
2918 ReturnBB->eraseFromParent();
2919 } else if (!CB.use_empty()) {
2920 // No returns, but something is using the return value of the call. Just
2921 // nuke the result.
2922 CB.replaceAllUsesWith(V: PoisonValue::get(T: CB.getType()));
2923 }
2924
2925 // Since we are now done with the Call/Invoke, we can delete it.
2926 CB.eraseFromParent();
2927
2928 // If we inlined any musttail calls and the original return is now
2929 // unreachable, delete it. It can only contain a bitcast and ret.
2930 if (InlinedMustTailCalls && pred_empty(BB: AfterCallBB))
2931 AfterCallBB->eraseFromParent();
2932
2933 // We should always be able to fold the entry block of the function into the
2934 // single predecessor of the block...
2935 assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
2936 BasicBlock *CalleeEntry = cast<BranchInst>(Val: Br)->getSuccessor(i: 0);
2937
2938 // Splice the code entry block into calling block, right before the
2939 // unconditional branch.
2940 CalleeEntry->replaceAllUsesWith(V: OrigBB); // Update PHI nodes
2941 OrigBB->splice(ToIt: Br->getIterator(), FromBB: CalleeEntry);
2942
2943 // Remove the unconditional branch.
2944 Br->eraseFromParent();
2945
2946 // Now we can remove the CalleeEntry block, which is now empty.
2947 CalleeEntry->eraseFromParent();
2948
2949 // If we inserted a phi node, check to see if it has a single value (e.g. all
2950 // the entries are the same or undef). If so, remove the PHI so it doesn't
2951 // block other optimizations.
2952 if (PHI) {
2953 AssumptionCache *AC =
2954 IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;
2955 auto &DL = Caller->getParent()->getDataLayout();
2956 if (Value *V = simplifyInstruction(I: PHI, Q: {DL, nullptr, nullptr, AC})) {
2957 PHI->replaceAllUsesWith(V);
2958 PHI->eraseFromParent();
2959 }
2960 }
2961
2962 if (MergeAttributes)
2963 AttributeFuncs::mergeAttributesForInlining(Caller&: *Caller, Callee: *CalledFunc);
2964
2965 return InlineResult::success();
2966}
2967

source code of llvm/lib/Transforms/Utils/InlineFunction.cpp