1//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass promotes "by reference" arguments to be "by value" arguments. In
10// practice, this means looking for internal functions that have pointer
11// arguments. If it can prove, through the use of alias analysis, that an
12// argument is *only* loaded, then it can pass the value into the function
13// instead of the address of the value. This can cause recursive simplification
14// of code and lead to the elimination of allocas (especially in C++ template
15// code like the STL).
16//
17// This pass also handles aggregate arguments that are passed into a function,
18// scalarizing them if the elements of the aggregate are only loaded. Note that
19// by default it refuses to scalarize aggregates which would require passing in
20// more than three operands to the function, because passing thousands of
21// operands for a large array or structure is unprofitable! This limit can be
22// configured or disabled, however.
23//
24// Note that this transformation could also be done for arguments that are only
25// stored to (returning the value instead), but does not currently. This case
26// would be best handled when and if LLVM begins supporting multiple return
27// values from functions.
28//
29//===----------------------------------------------------------------------===//
30
31#include "llvm/Transforms/IPO/ArgumentPromotion.h"
32
33#include "llvm/ADT/DepthFirstIterator.h"
34#include "llvm/ADT/STLExtras.h"
35#include "llvm/ADT/ScopeExit.h"
36#include "llvm/ADT/SmallPtrSet.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/Statistic.h"
39#include "llvm/ADT/Twine.h"
40#include "llvm/Analysis/AssumptionCache.h"
41#include "llvm/Analysis/BasicAliasAnalysis.h"
42#include "llvm/Analysis/CallGraph.h"
43#include "llvm/Analysis/Loads.h"
44#include "llvm/Analysis/MemoryLocation.h"
45#include "llvm/Analysis/TargetTransformInfo.h"
46#include "llvm/Analysis/ValueTracking.h"
47#include "llvm/IR/Argument.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/BasicBlock.h"
50#include "llvm/IR/CFG.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DataLayout.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/InstrTypes.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/Metadata.h"
61#include "llvm/IR/NoFolder.h"
62#include "llvm/IR/PassManager.h"
63#include "llvm/IR/Type.h"
64#include "llvm/IR/Use.h"
65#include "llvm/IR/User.h"
66#include "llvm/IR/Value.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/Debug.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Utils/Local.h"
71#include "llvm/Transforms/Utils/PromoteMemToReg.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <utility>
76#include <vector>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "argpromotion"
81
82STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");
83STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");
84
85namespace {
86
87struct ArgPart {
88 Type *Ty;
89 Align Alignment;
90 /// A representative guaranteed-executed load or store instruction for use by
91 /// metadata transfer.
92 Instruction *MustExecInstr;
93};
94
95using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
96
97} // end anonymous namespace
98
99static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
100 Value *Ptr, Type *ResElemTy, int64_t Offset) {
101 if (Offset != 0) {
102 APInt APOffset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), Offset);
103 Ptr = IRB.CreatePtrAdd(Ptr, Offset: IRB.getInt(AI: APOffset));
104 }
105 return Ptr;
106}
107
108/// DoPromotion - This method actually performs the promotion of the specified
109/// arguments, and returns the new function. At this point, we know that it's
110/// safe to do so.
111static Function *
112doPromotion(Function *F, FunctionAnalysisManager &FAM,
113 const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>
114 &ArgsToPromote) {
115 // Start by computing a new prototype for the function, which is the same as
116 // the old function, but has modified arguments.
117 FunctionType *FTy = F->getFunctionType();
118 std::vector<Type *> Params;
119
120 // Attribute - Keep track of the parameter attributes for the arguments
121 // that we are *not* promoting. For the ones that we do promote, the parameter
122 // attributes are lost
123 SmallVector<AttributeSet, 8> ArgAttrVec;
124 // Mapping from old to new argument indices. -1 for promoted or removed
125 // arguments.
126 SmallVector<unsigned> NewArgIndices;
127 AttributeList PAL = F->getAttributes();
128
129 // First, determine the new argument list
130 unsigned ArgNo = 0, NewArgNo = 0;
131 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
132 ++I, ++ArgNo) {
133 if (!ArgsToPromote.count(Val: &*I)) {
134 // Unchanged argument
135 Params.push_back(x: I->getType());
136 ArgAttrVec.push_back(Elt: PAL.getParamAttrs(ArgNo));
137 NewArgIndices.push_back(Elt: NewArgNo++);
138 } else if (I->use_empty()) {
139 // Dead argument (which are always marked as promotable)
140 ++NumArgumentsDead;
141 NewArgIndices.push_back(Elt: (unsigned)-1);
142 } else {
143 const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second;
144 for (const auto &Pair : ArgParts) {
145 Params.push_back(x: Pair.second.Ty);
146 ArgAttrVec.push_back(Elt: AttributeSet());
147 }
148 ++NumArgumentsPromoted;
149 NewArgIndices.push_back(Elt: (unsigned)-1);
150 NewArgNo += ArgParts.size();
151 }
152 }
153
154 Type *RetTy = FTy->getReturnType();
155
156 // Construct the new function type using the new arguments.
157 FunctionType *NFTy = FunctionType::get(Result: RetTy, Params, isVarArg: FTy->isVarArg());
158
159 // Create the new function body and insert it into the module.
160 Function *NF = Function::Create(Ty: NFTy, Linkage: F->getLinkage(), AddrSpace: F->getAddressSpace(),
161 N: F->getName());
162 NF->copyAttributesFrom(Src: F);
163 NF->copyMetadata(Src: F, Offset: 0);
164 NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);
165
166 // The new function will have the !dbg metadata copied from the original
167 // function. The original function may not be deleted, and dbg metadata need
168 // to be unique, so we need to drop it.
169 F->setSubprogram(nullptr);
170
171 LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
172 << "From: " << *F);
173
174 uint64_t LargestVectorWidth = 0;
175 for (auto *I : Params)
176 if (auto *VT = dyn_cast<llvm::VectorType>(Val: I))
177 LargestVectorWidth = std::max(
178 a: LargestVectorWidth, b: VT->getPrimitiveSizeInBits().getKnownMinValue());
179
180 // Recompute the parameter attributes list based on the new arguments for
181 // the function.
182 NF->setAttributes(AttributeList::get(C&: F->getContext(), FnAttrs: PAL.getFnAttrs(),
183 RetAttrs: PAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
184
185 // Remap argument indices in allocsize attribute.
186 if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {
187 unsigned Arg1 = NewArgIndices[AllocSize->first];
188 assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");
189 std::optional<unsigned> Arg2;
190 if (AllocSize->second) {
191 Arg2 = NewArgIndices[*AllocSize->second];
192 assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");
193 }
194 NF->addFnAttr(Attr: Attribute::getWithAllocSizeArgs(Context&: F->getContext(), ElemSizeArg: Arg1, NumElemsArg: Arg2));
195 }
196
197 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *NF, Width: LargestVectorWidth);
198 ArgAttrVec.clear();
199
200 F->getParent()->getFunctionList().insert(where: F->getIterator(), New: NF);
201 NF->takeName(V: F);
202
203 // Loop over all the callers of the function, transforming the call sites to
204 // pass in the loaded pointers.
205 SmallVector<Value *, 16> Args;
206 const DataLayout &DL = F->getParent()->getDataLayout();
207 SmallVector<WeakTrackingVH, 16> DeadArgs;
208
209 while (!F->use_empty()) {
210 CallBase &CB = cast<CallBase>(Val&: *F->user_back());
211 assert(CB.getCalledFunction() == F);
212 const AttributeList &CallPAL = CB.getAttributes();
213 IRBuilder<NoFolder> IRB(&CB);
214
215 // Loop over the operands, inserting GEP and loads in the caller as
216 // appropriate.
217 auto *AI = CB.arg_begin();
218 ArgNo = 0;
219 for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
220 ++I, ++AI, ++ArgNo) {
221 if (!ArgsToPromote.count(Val: &*I)) {
222 Args.push_back(Elt: *AI); // Unmodified argument
223 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
224 } else if (!I->use_empty()) {
225 Value *V = *AI;
226 const auto &ArgParts = ArgsToPromote.find(Val: &*I)->second;
227 for (const auto &Pair : ArgParts) {
228 LoadInst *LI = IRB.CreateAlignedLoad(
229 Ty: Pair.second.Ty,
230 Ptr: createByteGEP(IRB, DL, Ptr: V, ResElemTy: Pair.second.Ty, Offset: Pair.first),
231 Align: Pair.second.Alignment, Name: V->getName() + ".val");
232 if (Pair.second.MustExecInstr) {
233 LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
234 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
235 WL: {LLVMContext::MD_dereferenceable,
236 LLVMContext::MD_dereferenceable_or_null,
237 LLVMContext::MD_noundef,
238 LLVMContext::MD_nontemporal});
239 // Only transfer poison-generating metadata if we also have
240 // !noundef.
241 // TODO: Without !noundef, we could merge this metadata across
242 // all promoted loads.
243 if (LI->hasMetadata(KindID: LLVMContext::MD_noundef))
244 LI->copyMetadata(SrcInst: *Pair.second.MustExecInstr,
245 WL: {LLVMContext::MD_range, LLVMContext::MD_nonnull,
246 LLVMContext::MD_align});
247 }
248 Args.push_back(Elt: LI);
249 ArgAttrVec.push_back(Elt: AttributeSet());
250 }
251 } else {
252 assert(ArgsToPromote.count(&*I) && I->use_empty());
253 DeadArgs.emplace_back(Args: AI->get());
254 }
255 }
256
257 // Push any varargs arguments on the list.
258 for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
259 Args.push_back(Elt: *AI);
260 ArgAttrVec.push_back(Elt: CallPAL.getParamAttrs(ArgNo));
261 }
262
263 SmallVector<OperandBundleDef, 1> OpBundles;
264 CB.getOperandBundlesAsDefs(Defs&: OpBundles);
265
266 CallBase *NewCS = nullptr;
267 if (InvokeInst *II = dyn_cast<InvokeInst>(Val: &CB)) {
268 NewCS = InvokeInst::Create(Func: NF, IfNormal: II->getNormalDest(), IfException: II->getUnwindDest(),
269 Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
270 } else {
271 auto *NewCall =
272 CallInst::Create(Func: NF, Args, Bundles: OpBundles, NameStr: "", InsertBefore: CB.getIterator());
273 NewCall->setTailCallKind(cast<CallInst>(Val: &CB)->getTailCallKind());
274 NewCS = NewCall;
275 }
276 NewCS->setCallingConv(CB.getCallingConv());
277 NewCS->setAttributes(AttributeList::get(C&: F->getContext(),
278 FnAttrs: CallPAL.getFnAttrs(),
279 RetAttrs: CallPAL.getRetAttrs(), ArgAttrs: ArgAttrVec));
280 NewCS->copyMetadata(SrcInst: CB, WL: {LLVMContext::MD_prof, LLVMContext::MD_dbg});
281 Args.clear();
282 ArgAttrVec.clear();
283
284 AttributeFuncs::updateMinLegalVectorWidthAttr(Fn&: *CB.getCaller(),
285 Width: LargestVectorWidth);
286
287 if (!CB.use_empty()) {
288 CB.replaceAllUsesWith(V: NewCS);
289 NewCS->takeName(V: &CB);
290 }
291
292 // Finally, remove the old call from the program, reducing the use-count of
293 // F.
294 CB.eraseFromParent();
295 }
296
297 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: DeadArgs);
298
299 // Since we have now created the new function, splice the body of the old
300 // function right into the new function, leaving the old rotting hulk of the
301 // function empty.
302 NF->splice(ToIt: NF->begin(), FromF: F);
303
304 // We will collect all the new created allocas to promote them into registers
305 // after the following loop
306 SmallVector<AllocaInst *, 4> Allocas;
307
308 // Loop over the argument list, transferring uses of the old arguments over to
309 // the new arguments, also transferring over the names as well.
310 Function::arg_iterator I2 = NF->arg_begin();
311 for (Argument &Arg : F->args()) {
312 if (!ArgsToPromote.count(Val: &Arg)) {
313 // If this is an unmodified argument, move the name and users over to the
314 // new version.
315 Arg.replaceAllUsesWith(V: &*I2);
316 I2->takeName(V: &Arg);
317 ++I2;
318 continue;
319 }
320
321 // There potentially are metadata uses for things like llvm.dbg.value.
322 // Replace them with undef, after handling the other regular uses.
323 auto RauwUndefMetadata = make_scope_exit(
324 F: [&]() { Arg.replaceAllUsesWith(V: UndefValue::get(T: Arg.getType())); });
325
326 if (Arg.use_empty())
327 continue;
328
329 // Otherwise, if we promoted this argument, we have to create an alloca in
330 // the callee for every promotable part and store each of the new incoming
331 // arguments into the corresponding alloca, what lets the old code (the
332 // store instructions if they are allowed especially) a chance to work as
333 // before.
334 assert(Arg.getType()->isPointerTy() &&
335 "Only arguments with a pointer type are promotable");
336
337 IRBuilder<NoFolder> IRB(&NF->begin()->front());
338
339 // Add only the promoted elements, so parts from ArgsToPromote
340 SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;
341 for (const auto &Pair : ArgsToPromote.find(Val: &Arg)->second) {
342 int64_t Offset = Pair.first;
343 const ArgPart &Part = Pair.second;
344
345 Argument *NewArg = I2++;
346 NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");
347
348 AllocaInst *NewAlloca = IRB.CreateAlloca(
349 Ty: Part.Ty, ArraySize: nullptr, Name: Arg.getName() + "." + Twine(Offset) + ".allc");
350 NewAlloca->setAlignment(Pair.second.Alignment);
351 IRB.CreateAlignedStore(Val: NewArg, Ptr: NewAlloca, Align: Pair.second.Alignment);
352
353 // Collect the alloca to retarget the users to
354 OffsetToAlloca.insert(KV: {Offset, NewAlloca});
355 }
356
357 auto GetAlloca = [&](Value *Ptr) {
358 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
359 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
360 /* AllowNonInbounds */ true);
361 assert(Ptr == &Arg && "Not constant offset from arg?");
362 return OffsetToAlloca.lookup(Val: Offset.getSExtValue());
363 };
364
365 // Cleanup the code from the dead instructions: GEPs and BitCasts in between
366 // the original argument and its users: loads and stores. Retarget every
367 // user to the new created alloca.
368 SmallVector<Value *, 16> Worklist;
369 SmallVector<Instruction *, 16> DeadInsts;
370 append_range(C&: Worklist, R: Arg.users());
371 while (!Worklist.empty()) {
372 Value *V = Worklist.pop_back_val();
373 if (isa<BitCastInst>(Val: V) || isa<GetElementPtrInst>(Val: V)) {
374 DeadInsts.push_back(Elt: cast<Instruction>(Val: V));
375 append_range(C&: Worklist, R: V->users());
376 continue;
377 }
378
379 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
380 Value *Ptr = LI->getPointerOperand();
381 LI->setOperand(i_nocapture: LoadInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
382 continue;
383 }
384
385 if (auto *SI = dyn_cast<StoreInst>(Val: V)) {
386 assert(!SI->isVolatile() && "Volatile operations can't be promoted.");
387 Value *Ptr = SI->getPointerOperand();
388 SI->setOperand(i_nocapture: StoreInst::getPointerOperandIndex(), Val_nocapture: GetAlloca(Ptr));
389 continue;
390 }
391
392 llvm_unreachable("Unexpected user");
393 }
394
395 for (Instruction *I : DeadInsts) {
396 I->replaceAllUsesWith(V: PoisonValue::get(T: I->getType()));
397 I->eraseFromParent();
398 }
399
400 // Collect the allocas for promotion
401 for (const auto &Pair : OffsetToAlloca) {
402 assert(isAllocaPromotable(Pair.second) &&
403 "By design, only promotable allocas should be produced.");
404 Allocas.push_back(Elt: Pair.second);
405 }
406 }
407
408 LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()
409 << " alloca(s) are promotable by Mem2Reg\n");
410
411 if (!Allocas.empty()) {
412 // And we are able to call the `promoteMemoryToRegister()` function.
413 // Our earlier checks have ensured that PromoteMemToReg() will
414 // succeed.
415 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: *NF);
416 auto &AC = FAM.getResult<AssumptionAnalysis>(IR&: *NF);
417 PromoteMemToReg(Allocas, DT, AC: &AC);
418 }
419
420 return NF;
421}
422
423/// Return true if we can prove that all callees pass in a valid pointer for the
424/// specified function argument.
425static bool allCallersPassValidPointerForArgument(Argument *Arg,
426 Align NeededAlign,
427 uint64_t NeededDerefBytes) {
428 Function *Callee = Arg->getParent();
429 const DataLayout &DL = Callee->getParent()->getDataLayout();
430 APInt Bytes(64, NeededDerefBytes);
431
432 // Check if the argument itself is marked dereferenceable and aligned.
433 if (isDereferenceableAndAlignedPointer(V: Arg, Alignment: NeededAlign, Size: Bytes, DL))
434 return true;
435
436 // Look at all call sites of the function. At this point we know we only have
437 // direct callees.
438 return all_of(Range: Callee->users(), P: [&](User *U) {
439 CallBase &CB = cast<CallBase>(Val&: *U);
440 return isDereferenceableAndAlignedPointer(V: CB.getArgOperand(i: Arg->getArgNo()),
441 Alignment: NeededAlign, Size: Bytes, DL);
442 });
443}
444
445/// Determine that this argument is safe to promote, and find the argument
446/// parts it can be promoted into.
447static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
448 unsigned MaxElements, bool IsRecursive,
449 SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
450 // Quick exit for unused arguments
451 if (Arg->use_empty())
452 return true;
453
454 // We can only promote this argument if all the uses are loads at known
455 // offsets.
456 //
457 // Promoting the argument causes it to be loaded in the caller
458 // unconditionally. This is only safe if we can prove that either the load
459 // would have happened in the callee anyway (ie, there is a load in the entry
460 // block) or the pointer passed in at every call site is guaranteed to be
461 // valid.
462 // In the former case, invalid loads can happen, but would have happened
463 // anyway, in the latter case, invalid loads won't happen. This prevents us
464 // from introducing an invalid load that wouldn't have happened in the
465 // original code.
466
467 SmallDenseMap<int64_t, ArgPart, 4> ArgParts;
468 Align NeededAlign(1);
469 uint64_t NeededDerefBytes = 0;
470
471 // And if this is a byval argument we also allow to have store instructions.
472 // Only handle in such way arguments with specified alignment;
473 // if it's unspecified, the actual alignment of the argument is
474 // target-specific.
475 bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();
476
477 // An end user of a pointer argument is a load or store instruction.
478 // Returns std::nullopt if this load or store is not based on the argument.
479 // Return true if we can promote the instruction, false otherwise.
480 auto HandleEndUser = [&](auto *I, Type *Ty,
481 bool GuaranteedToExecute) -> std::optional<bool> {
482 // Don't promote volatile or atomic instructions.
483 if (!I->isSimple())
484 return false;
485
486 Value *Ptr = I->getPointerOperand();
487 APInt Offset(DL.getIndexTypeSizeInBits(Ty: Ptr->getType()), 0);
488 Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
489 /* AllowNonInbounds */ true);
490 if (Ptr != Arg)
491 return std::nullopt;
492
493 if (Offset.getSignificantBits() >= 64)
494 return false;
495
496 TypeSize Size = DL.getTypeStoreSize(Ty);
497 // Don't try to promote scalable types.
498 if (Size.isScalable())
499 return false;
500
501 // If this is a recursive function and one of the types is a pointer,
502 // then promoting it might lead to recursive promotion.
503 if (IsRecursive && Ty->isPointerTy())
504 return false;
505
506 int64_t Off = Offset.getSExtValue();
507 auto Pair = ArgParts.try_emplace(
508 Key: Off, Args: ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});
509 ArgPart &Part = Pair.first->second;
510 bool OffsetNotSeenBefore = Pair.second;
511
512 // We limit promotion to only promoting up to a fixed number of elements of
513 // the aggregate.
514 if (MaxElements > 0 && ArgParts.size() > MaxElements) {
515 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
516 << "more than " << MaxElements << " parts\n");
517 return false;
518 }
519
520 // For now, we only support loading/storing one specific type at a given
521 // offset.
522 if (Part.Ty != Ty) {
523 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
524 << "accessed as both " << *Part.Ty << " and " << *Ty
525 << " at offset " << Off << "\n");
526 return false;
527 }
528
529 // If this instruction is not guaranteed to execute, and we haven't seen a
530 // load or store at this offset before (or it had lower alignment), then we
531 // need to remember that requirement.
532 // Note that skipping instructions of previously seen offsets is only
533 // correct because we only allow a single type for a given offset, which
534 // also means that the number of accessed bytes will be the same.
535 if (!GuaranteedToExecute &&
536 (OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {
537 // We won't be able to prove dereferenceability for negative offsets.
538 if (Off < 0)
539 return false;
540
541 // If the offset is not aligned, an aligned base pointer won't help.
542 if (!isAligned(I->getAlign(), Off))
543 return false;
544
545 NeededDerefBytes = std::max(a: NeededDerefBytes, b: Off + Size.getFixedValue());
546 NeededAlign = std::max(NeededAlign, I->getAlign());
547 }
548
549 Part.Alignment = std::max(Part.Alignment, I->getAlign());
550 return true;
551 };
552
553 // Look for loads and stores that are guaranteed to execute on entry.
554 for (Instruction &I : Arg->getParent()->getEntryBlock()) {
555 std::optional<bool> Res{};
556 if (LoadInst *LI = dyn_cast<LoadInst>(Val: &I))
557 Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);
558 else if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
559 Res = HandleEndUser(SI, SI->getValueOperand()->getType(),
560 /* GuaranteedToExecute */ true);
561 if (Res && !*Res)
562 return false;
563
564 if (!isGuaranteedToTransferExecutionToSuccessor(I: &I))
565 break;
566 }
567
568 // Now look at all loads of the argument. Remember the load instructions
569 // for the aliasing check below.
570 SmallVector<const Use *, 16> Worklist;
571 SmallPtrSet<const Use *, 16> Visited;
572 SmallVector<LoadInst *, 16> Loads;
573 auto AppendUses = [&](const Value *V) {
574 for (const Use &U : V->uses())
575 if (Visited.insert(Ptr: &U).second)
576 Worklist.push_back(Elt: &U);
577 };
578 AppendUses(Arg);
579 while (!Worklist.empty()) {
580 const Use *U = Worklist.pop_back_val();
581 Value *V = U->getUser();
582 if (isa<BitCastInst>(Val: V)) {
583 AppendUses(V);
584 continue;
585 }
586
587 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: V)) {
588 if (!GEP->hasAllConstantIndices())
589 return false;
590 AppendUses(V);
591 continue;
592 }
593
594 if (auto *LI = dyn_cast<LoadInst>(Val: V)) {
595 if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))
596 return false;
597 Loads.push_back(Elt: LI);
598 continue;
599 }
600
601 // Stores are allowed for byval arguments
602 auto *SI = dyn_cast<StoreInst>(Val: V);
603 if (AreStoresAllowed && SI &&
604 U->getOperandNo() == StoreInst::getPointerOperandIndex()) {
605 if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),
606 /* GuaranteedToExecute */ false))
607 return false;
608 continue;
609 // Only stores TO the argument is allowed, all the other stores are
610 // unknown users
611 }
612
613 // Unknown user.
614 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
615 << "unknown user " << *V << "\n");
616 return false;
617 }
618
619 if (NeededDerefBytes || NeededAlign > 1) {
620 // Try to prove a required deref / aligned requirement.
621 if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
622 NeededDerefBytes)) {
623 LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
624 << "not dereferenceable or aligned\n");
625 return false;
626 }
627 }
628
629 if (ArgParts.empty())
630 return true; // No users, this is a dead argument.
631
632 // Sort parts by offset.
633 append_range(C&: ArgPartsVec, R&: ArgParts);
634 sort(C&: ArgPartsVec, Comp: llvm::less_first());
635
636 // Make sure the parts are non-overlapping.
637 int64_t Offset = ArgPartsVec[0].first;
638 for (const auto &Pair : ArgPartsVec) {
639 if (Pair.first < Offset)
640 return false; // Overlap with previous part.
641
642 Offset = Pair.first + DL.getTypeStoreSize(Ty: Pair.second.Ty);
643 }
644
645 // If store instructions are allowed, the path from the entry of the function
646 // to each load may be not free of instructions that potentially invalidate
647 // the load, and this is an admissible situation.
648 if (AreStoresAllowed)
649 return true;
650
651 // Okay, now we know that the argument is only used by load instructions, and
652 // it is safe to unconditionally perform all of them. Use alias analysis to
653 // check to see if the pointer is guaranteed to not be modified from entry of
654 // the function to each of the load instructions.
655
656 for (LoadInst *Load : Loads) {
657 // Check to see if the load is invalidated from the start of the block to
658 // the load itself.
659 BasicBlock *BB = Load->getParent();
660
661 MemoryLocation Loc = MemoryLocation::get(LI: Load);
662 if (AAR.canInstructionRangeModRef(I1: BB->front(), I2: *Load, Loc, Mode: ModRefInfo::Mod))
663 return false; // Pointer is invalidated!
664
665 // Now check every path from the entry block to the load for transparency.
666 // To do this, we perform a depth first search on the inverse CFG from the
667 // loading block.
668 for (BasicBlock *P : predecessors(BB)) {
669 for (BasicBlock *TranspBB : inverse_depth_first(G: P))
670 if (AAR.canBasicBlockModify(BB: *TranspBB, Loc))
671 return false;
672 }
673 }
674
675 // If the path from the entry of the function to each load is free of
676 // instructions that potentially invalidate the load, we can make the
677 // transformation!
678 return true;
679}
680
681/// Check if callers and callee agree on how promoted arguments would be
682/// passed.
683static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
684 const TargetTransformInfo &TTI) {
685 return all_of(Range: F.uses(), P: [&](const Use &U) {
686 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
687 if (!CB)
688 return false;
689
690 const Function *Caller = CB->getCaller();
691 const Function *Callee = CB->getCalledFunction();
692 return TTI.areTypesABICompatible(Caller, Callee, Types);
693 });
694}
695
696/// PromoteArguments - This method checks the specified function to see if there
697/// are any promotable arguments and if it is safe to promote the function (for
698/// example, all callers are direct). If safe to promote some arguments, it
699/// calls the DoPromotion method.
700static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
701 unsigned MaxElements, bool IsRecursive) {
702 // Don't perform argument promotion for naked functions; otherwise we can end
703 // up removing parameters that are seemingly 'not used' as they are referred
704 // to in the assembly.
705 if (F->hasFnAttribute(Attribute::Naked))
706 return nullptr;
707
708 // Make sure that it is local to this module.
709 if (!F->hasLocalLinkage())
710 return nullptr;
711
712 // Don't promote arguments for variadic functions. Adding, removing, or
713 // changing non-pack parameters can change the classification of pack
714 // parameters. Frontends encode that classification at the call site in the
715 // IR, while in the callee the classification is determined dynamically based
716 // on the number of registers consumed so far.
717 if (F->isVarArg())
718 return nullptr;
719
720 // Don't transform functions that receive inallocas, as the transformation may
721 // not be safe depending on calling convention.
722 if (F->getAttributes().hasAttrSomewhere(Attribute::Kind: InAlloca))
723 return nullptr;
724
725 // First check: see if there are any pointer arguments! If not, quick exit.
726 SmallVector<Argument *, 16> PointerArgs;
727 for (Argument &I : F->args())
728 if (I.getType()->isPointerTy())
729 PointerArgs.push_back(Elt: &I);
730 if (PointerArgs.empty())
731 return nullptr;
732
733 // Second check: make sure that all callers are direct callers. We can't
734 // transform functions that have indirect callers. Also see if the function
735 // is self-recursive.
736 for (Use &U : F->uses()) {
737 CallBase *CB = dyn_cast<CallBase>(Val: U.getUser());
738 // Must be a direct call.
739 if (CB == nullptr || !CB->isCallee(U: &U) ||
740 CB->getFunctionType() != F->getFunctionType())
741 return nullptr;
742
743 // Can't change signature of musttail callee
744 if (CB->isMustTailCall())
745 return nullptr;
746
747 if (CB->getFunction() == F)
748 IsRecursive = true;
749 }
750
751 // Can't change signature of musttail caller
752 // FIXME: Support promoting whole chain of musttail functions
753 for (BasicBlock &BB : *F)
754 if (BB.getTerminatingMustTailCall())
755 return nullptr;
756
757 const DataLayout &DL = F->getParent()->getDataLayout();
758 auto &AAR = FAM.getResult<AAManager>(IR&: *F);
759 const auto &TTI = FAM.getResult<TargetIRAnalysis>(IR&: *F);
760
761 // Check to see which arguments are promotable. If an argument is promotable,
762 // add it to ArgsToPromote.
763 DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
764 unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
765 for (Argument *PtrArg : PointerArgs) {
766 // Replace sret attribute with noalias. This reduces register pressure by
767 // avoiding a register copy.
768 if (PtrArg->hasStructRetAttr()) {
769 unsigned ArgNo = PtrArg->getArgNo();
770 F->removeParamAttr(ArgNo, Attribute::StructRet);
771 F->addParamAttr(ArgNo, Attribute::NoAlias);
772 for (Use &U : F->uses()) {
773 CallBase &CB = cast<CallBase>(Val&: *U.getUser());
774 CB.removeParamAttr(ArgNo, Attribute::StructRet);
775 CB.addParamAttr(ArgNo, Attribute::NoAlias);
776 }
777 }
778
779 // If we can promote the pointer to its value.
780 SmallVector<OffsetAndArgPart, 4> ArgParts;
781
782 if (findArgParts(Arg: PtrArg, DL, AAR, MaxElements, IsRecursive, ArgPartsVec&: ArgParts)) {
783 SmallVector<Type *, 4> Types;
784 for (const auto &Pair : ArgParts)
785 Types.push_back(Elt: Pair.second.Ty);
786
787 if (areTypesABICompatible(Types, F: *F, TTI)) {
788 NumArgsAfterPromote += ArgParts.size() - 1;
789 ArgsToPromote.insert(KV: {PtrArg, std::move(ArgParts)});
790 }
791 }
792 }
793
794 // No promotable pointer arguments.
795 if (ArgsToPromote.empty())
796 return nullptr;
797
798 if (NumArgsAfterPromote > TTI.getMaxNumArgs())
799 return nullptr;
800
801 return doPromotion(F, FAM, ArgsToPromote);
802}
803
804PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
805 CGSCCAnalysisManager &AM,
806 LazyCallGraph &CG,
807 CGSCCUpdateResult &UR) {
808 bool Changed = false, LocalChange;
809
810 // Iterate until we stop promoting from this SCC.
811 do {
812 LocalChange = false;
813
814 FunctionAnalysisManager &FAM =
815 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(IR&: C, ExtraArgs&: CG).getManager();
816
817 bool IsRecursive = C.size() > 1;
818 for (LazyCallGraph::Node &N : C) {
819 Function &OldF = N.getFunction();
820 Function *NewF = promoteArguments(F: &OldF, FAM, MaxElements, IsRecursive);
821 if (!NewF)
822 continue;
823 LocalChange = true;
824
825 // Directly substitute the functions in the call graph. Note that this
826 // requires the old function to be completely dead and completely
827 // replaced by the new function. It does no call graph updates, it merely
828 // swaps out the particular function mapped to a particular node in the
829 // graph.
830 C.getOuterRefSCC().replaceNodeFunction(N, NewF&: *NewF);
831 FAM.clear(IR&: OldF, Name: OldF.getName());
832 OldF.eraseFromParent();
833
834 PreservedAnalyses FuncPA;
835 FuncPA.preserveSet<CFGAnalyses>();
836 for (auto *U : NewF->users()) {
837 auto *UserF = cast<CallBase>(Val: U)->getFunction();
838 FAM.invalidate(IR&: *UserF, PA: FuncPA);
839 }
840 }
841
842 Changed |= LocalChange;
843 } while (LocalChange);
844
845 if (!Changed)
846 return PreservedAnalyses::all();
847
848 PreservedAnalyses PA;
849 // We've cleared out analyses for deleted functions.
850 PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
851 // We've manually invalidated analyses for functions we've modified.
852 PA.preserveSet<AllAnalysesOn<Function>>();
853 return PA;
854}
855

source code of llvm/lib/Transforms/IPO/ArgumentPromotion.cpp