1 | //===- AArch64StackTagging.cpp - Stack tagging in IR --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | //===----------------------------------------------------------------------===// |
9 | |
10 | #include "AArch64.h" |
11 | #include "AArch64InstrInfo.h" |
12 | #include "AArch64Subtarget.h" |
13 | #include "AArch64TargetMachine.h" |
14 | #include "llvm/ADT/MapVector.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/Statistic.h" |
17 | #include "llvm/Analysis/AliasAnalysis.h" |
18 | #include "llvm/Analysis/CFG.h" |
19 | #include "llvm/Analysis/LoopInfo.h" |
20 | #include "llvm/Analysis/PostDominators.h" |
21 | #include "llvm/Analysis/ScalarEvolution.h" |
22 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
23 | #include "llvm/Analysis/StackSafetyAnalysis.h" |
24 | #include "llvm/CodeGen/LiveRegUnits.h" |
25 | #include "llvm/CodeGen/MachineBasicBlock.h" |
26 | #include "llvm/CodeGen/MachineFunction.h" |
27 | #include "llvm/CodeGen/MachineFunctionPass.h" |
28 | #include "llvm/CodeGen/MachineInstr.h" |
29 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
30 | #include "llvm/CodeGen/MachineLoopInfo.h" |
31 | #include "llvm/CodeGen/MachineOperand.h" |
32 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
33 | #include "llvm/CodeGen/TargetPassConfig.h" |
34 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
35 | #include "llvm/IR/DebugLoc.h" |
36 | #include "llvm/IR/Dominators.h" |
37 | #include "llvm/IR/Function.h" |
38 | #include "llvm/IR/GetElementPtrTypeIterator.h" |
39 | #include "llvm/IR/IRBuilder.h" |
40 | #include "llvm/IR/InstIterator.h" |
41 | #include "llvm/IR/Instruction.h" |
42 | #include "llvm/IR/Instructions.h" |
43 | #include "llvm/IR/IntrinsicInst.h" |
44 | #include "llvm/IR/IntrinsicsAArch64.h" |
45 | #include "llvm/IR/Metadata.h" |
46 | #include "llvm/IR/ValueHandle.h" |
47 | #include "llvm/InitializePasses.h" |
48 | #include "llvm/Pass.h" |
49 | #include "llvm/Support/Casting.h" |
50 | #include "llvm/Support/Debug.h" |
51 | #include "llvm/Support/raw_ostream.h" |
52 | #include "llvm/Transforms/Utils/Local.h" |
53 | #include "llvm/Transforms/Utils/MemoryTaggingSupport.h" |
54 | #include <cassert> |
55 | #include <iterator> |
56 | #include <memory> |
57 | #include <utility> |
58 | |
59 | using namespace llvm; |
60 | |
61 | #define DEBUG_TYPE "aarch64-stack-tagging" |
62 | |
63 | static cl::opt<bool> ClMergeInit( |
64 | "stack-tagging-merge-init" , cl::Hidden, cl::init(Val: true), |
65 | cl::desc("merge stack variable initializers with tagging when possible" )); |
66 | |
67 | static cl::opt<bool> |
68 | ClUseStackSafety("stack-tagging-use-stack-safety" , cl::Hidden, |
69 | cl::init(Val: true), |
70 | cl::desc("Use Stack Safety analysis results" )); |
71 | |
72 | static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit" , |
73 | cl::init(Val: 40), cl::Hidden); |
74 | |
75 | static cl::opt<unsigned> |
76 | ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit" , cl::init(Val: 272), |
77 | cl::Hidden); |
78 | |
79 | static cl::opt<size_t> ClMaxLifetimes( |
80 | "stack-tagging-max-lifetimes-for-alloca" , cl::Hidden, cl::init(Val: 3), |
81 | cl::ReallyHidden, |
82 | cl::desc("How many lifetime ends to handle for a single alloca." ), |
83 | cl::Optional); |
84 | |
85 | static const Align kTagGranuleSize = Align(16); |
86 | |
87 | namespace { |
88 | |
89 | class InitializerBuilder { |
90 | uint64_t Size; |
91 | const DataLayout *DL; |
92 | Value *BasePtr; |
93 | Function *SetTagFn; |
94 | Function *SetTagZeroFn; |
95 | Function *StgpFn; |
96 | |
97 | // List of initializers sorted by start offset. |
98 | struct Range { |
99 | uint64_t Start, End; |
100 | Instruction *Inst; |
101 | }; |
102 | SmallVector<Range, 4> Ranges; |
103 | // 8-aligned offset => 8-byte initializer |
104 | // Missing keys are zero initialized. |
105 | std::map<uint64_t, Value *> Out; |
106 | |
107 | public: |
108 | InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr, |
109 | Function *SetTagFn, Function *SetTagZeroFn, |
110 | Function *StgpFn) |
111 | : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn), |
112 | SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {} |
113 | |
114 | bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) { |
115 | auto I = |
116 | llvm::lower_bound(Range&: Ranges, Value&: Start, C: [](const Range &LHS, uint64_t RHS) { |
117 | return LHS.End <= RHS; |
118 | }); |
119 | if (I != Ranges.end() && End > I->Start) { |
120 | // Overlap - bail. |
121 | return false; |
122 | } |
123 | Ranges.insert(I, Elt: {.Start: Start, .End: End, .Inst: Inst}); |
124 | return true; |
125 | } |
126 | |
127 | bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) { |
128 | int64_t StoreSize = DL->getTypeStoreSize(Ty: SI->getOperand(i_nocapture: 0)->getType()); |
129 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: SI)) |
130 | return false; |
131 | IRBuilder<> IRB(SI); |
132 | applyStore(IRB, Start: Offset, End: Offset + StoreSize, StoredValue: SI->getOperand(i_nocapture: 0)); |
133 | return true; |
134 | } |
135 | |
136 | bool addMemSet(uint64_t Offset, MemSetInst *MSI) { |
137 | uint64_t StoreSize = cast<ConstantInt>(Val: MSI->getLength())->getZExtValue(); |
138 | if (!addRange(Start: Offset, End: Offset + StoreSize, Inst: MSI)) |
139 | return false; |
140 | IRBuilder<> IRB(MSI); |
141 | applyMemSet(IRB, Start: Offset, End: Offset + StoreSize, |
142 | V: cast<ConstantInt>(Val: MSI->getValue())); |
143 | return true; |
144 | } |
145 | |
146 | void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End, |
147 | ConstantInt *V) { |
148 | // Out[] does not distinguish between zero and undef, and we already know |
149 | // that this memset does not overlap with any other initializer. Nothing to |
150 | // do for memset(0). |
151 | if (V->isZero()) |
152 | return; |
153 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
154 | uint64_t Cst = 0x0101010101010101UL; |
155 | int LowBits = Offset < Start ? (Start - Offset) * 8 : 0; |
156 | if (LowBits) |
157 | Cst = (Cst >> LowBits) << LowBits; |
158 | int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0; |
159 | if (HighBits) |
160 | Cst = (Cst << HighBits) >> HighBits; |
161 | ConstantInt *C = |
162 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Cst * V->getZExtValue()); |
163 | |
164 | Value *&CurrentV = Out[Offset]; |
165 | if (!CurrentV) { |
166 | CurrentV = C; |
167 | } else { |
168 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: C); |
169 | } |
170 | } |
171 | } |
172 | |
173 | // Take a 64-bit slice of the value starting at the given offset (in bytes). |
174 | // Offset can be negative. Pad with zeroes on both sides when necessary. |
175 | Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) { |
176 | if (Offset > 0) { |
177 | V = IRB.CreateLShr(LHS: V, RHS: Offset * 8); |
178 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
179 | } else if (Offset < 0) { |
180 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
181 | V = IRB.CreateShl(LHS: V, RHS: -Offset * 8); |
182 | } else { |
183 | V = IRB.CreateZExtOrTrunc(V, DestTy: IRB.getInt64Ty()); |
184 | } |
185 | return V; |
186 | } |
187 | |
188 | void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End, |
189 | Value *StoredValue) { |
190 | StoredValue = flatten(IRB, V: StoredValue); |
191 | for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) { |
192 | Value *V = sliceValue(IRB, V: StoredValue, Offset: Offset - Start); |
193 | Value *&CurrentV = Out[Offset]; |
194 | if (!CurrentV) { |
195 | CurrentV = V; |
196 | } else { |
197 | CurrentV = IRB.CreateOr(LHS: CurrentV, RHS: V); |
198 | } |
199 | } |
200 | } |
201 | |
202 | void generate(IRBuilder<> &IRB) { |
203 | LLVM_DEBUG(dbgs() << "Combined initializer\n" ); |
204 | // No initializers => the entire allocation is undef. |
205 | if (Ranges.empty()) { |
206 | emitUndef(IRB, Offset: 0, Size); |
207 | return; |
208 | } |
209 | |
210 | // Look through 8-byte initializer list 16 bytes at a time; |
211 | // If one of the two 8-byte halfs is non-zero non-undef, emit STGP. |
212 | // Otherwise, emit zeroes up to next available item. |
213 | uint64_t LastOffset = 0; |
214 | for (uint64_t Offset = 0; Offset < Size; Offset += 16) { |
215 | auto I1 = Out.find(x: Offset); |
216 | auto I2 = Out.find(x: Offset + 8); |
217 | if (I1 == Out.end() && I2 == Out.end()) |
218 | continue; |
219 | |
220 | if (Offset > LastOffset) |
221 | emitZeroes(IRB, Offset: LastOffset, Size: Offset - LastOffset); |
222 | |
223 | Value *Store1 = I1 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
224 | : I1->second; |
225 | Value *Store2 = I2 == Out.end() ? Constant::getNullValue(Ty: IRB.getInt64Ty()) |
226 | : I2->second; |
227 | emitPair(IRB, Offset, A: Store1, B: Store2); |
228 | LastOffset = Offset + 16; |
229 | } |
230 | |
231 | // memset(0) does not update Out[], therefore the tail can be either undef |
232 | // or zero. |
233 | if (LastOffset < Size) |
234 | emitZeroes(IRB, Offset: LastOffset, Size: Size - LastOffset); |
235 | |
236 | for (const auto &R : Ranges) { |
237 | R.Inst->eraseFromParent(); |
238 | } |
239 | } |
240 | |
241 | void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
242 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
243 | << ") zero\n" ); |
244 | Value *Ptr = BasePtr; |
245 | if (Offset) |
246 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
247 | IRB.CreateCall(Callee: SetTagZeroFn, |
248 | Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
249 | } |
250 | |
251 | void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) { |
252 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size |
253 | << ") undef\n" ); |
254 | Value *Ptr = BasePtr; |
255 | if (Offset) |
256 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
257 | IRB.CreateCall(Callee: SetTagFn, Args: {Ptr, ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
258 | } |
259 | |
260 | void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) { |
261 | LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n" ); |
262 | LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n" ); |
263 | Value *Ptr = BasePtr; |
264 | if (Offset) |
265 | Ptr = IRB.CreateConstGEP1_32(Ty: IRB.getInt8Ty(), Ptr, Idx0: Offset); |
266 | IRB.CreateCall(Callee: StgpFn, Args: {Ptr, A, B}); |
267 | } |
268 | |
269 | Value *flatten(IRBuilder<> &IRB, Value *V) { |
270 | if (V->getType()->isIntegerTy()) |
271 | return V; |
272 | // vector of pointers -> vector of ints |
273 | if (VectorType *VecTy = dyn_cast<VectorType>(Val: V->getType())) { |
274 | LLVMContext &Ctx = IRB.getContext(); |
275 | Type *EltTy = VecTy->getElementType(); |
276 | if (EltTy->isPointerTy()) { |
277 | uint32_t EltSize = DL->getTypeSizeInBits(Ty: EltTy); |
278 | auto *NewTy = FixedVectorType::get( |
279 | ElementType: IntegerType::get(C&: Ctx, NumBits: EltSize), |
280 | NumElts: cast<FixedVectorType>(Val: VecTy)->getNumElements()); |
281 | V = IRB.CreatePointerCast(V, DestTy: NewTy); |
282 | } |
283 | } |
284 | return IRB.CreateBitOrPointerCast( |
285 | V, DestTy: IRB.getIntNTy(N: DL->getTypeStoreSize(Ty: V->getType()) * 8)); |
286 | } |
287 | }; |
288 | |
289 | class AArch64StackTagging : public FunctionPass { |
290 | const bool MergeInit; |
291 | const bool UseStackSafety; |
292 | |
293 | public: |
294 | static char ID; // Pass ID, replacement for typeid |
295 | |
296 | AArch64StackTagging(bool IsOptNone = false) |
297 | : FunctionPass(ID), |
298 | MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone), |
299 | UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety |
300 | : !IsOptNone) { |
301 | initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry()); |
302 | } |
303 | |
304 | void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr, |
305 | uint64_t Size); |
306 | void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size); |
307 | |
308 | Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr, |
309 | uint64_t Size, InitializerBuilder &IB); |
310 | |
311 | Instruction *insertBaseTaggedPointer( |
312 | const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas, |
313 | const DominatorTree *DT); |
314 | bool runOnFunction(Function &F) override; |
315 | |
316 | StringRef getPassName() const override { return "AArch64 Stack Tagging" ; } |
317 | |
318 | private: |
319 | Function *F = nullptr; |
320 | Function *SetTagFunc = nullptr; |
321 | const DataLayout *DL = nullptr; |
322 | AAResults *AA = nullptr; |
323 | const StackSafetyGlobalInfo *SSI = nullptr; |
324 | |
325 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
326 | AU.setPreservesCFG(); |
327 | if (UseStackSafety) |
328 | AU.addRequired<StackSafetyGlobalInfoWrapperPass>(); |
329 | if (MergeInit) |
330 | AU.addRequired<AAResultsWrapperPass>(); |
331 | } |
332 | }; |
333 | |
334 | } // end anonymous namespace |
335 | |
336 | char AArch64StackTagging::ID = 0; |
337 | |
338 | INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
339 | false, false) |
340 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) |
341 | INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) |
342 | INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging" , |
343 | false, false) |
344 | |
345 | FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) { |
346 | return new AArch64StackTagging(IsOptNone); |
347 | } |
348 | |
349 | Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst, |
350 | Value *StartPtr, |
351 | uint64_t Size, |
352 | InitializerBuilder &IB) { |
353 | MemoryLocation AllocaLoc{StartPtr, Size}; |
354 | Instruction *LastInst = StartInst; |
355 | BasicBlock::iterator BI(StartInst); |
356 | |
357 | unsigned Count = 0; |
358 | for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) { |
359 | if (!isa<DbgInfoIntrinsic>(Val: *BI)) |
360 | ++Count; |
361 | |
362 | if (isNoModRef(MRI: AA->getModRefInfo(I: &*BI, OptLoc: AllocaLoc))) |
363 | continue; |
364 | |
365 | if (!isa<StoreInst>(Val: BI) && !isa<MemSetInst>(Val: BI)) { |
366 | // If the instruction is readnone, ignore it, otherwise bail out. We |
367 | // don't even allow readonly here because we don't want something like: |
368 | // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). |
369 | if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) |
370 | break; |
371 | continue; |
372 | } |
373 | |
374 | if (StoreInst *NextStore = dyn_cast<StoreInst>(Val&: BI)) { |
375 | if (!NextStore->isSimple()) |
376 | break; |
377 | |
378 | // Check to see if this store is to a constant offset from the start ptr. |
379 | std::optional<int64_t> Offset = |
380 | NextStore->getPointerOperand()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
381 | if (!Offset) |
382 | break; |
383 | |
384 | if (!IB.addStore(Offset: *Offset, SI: NextStore, DL)) |
385 | break; |
386 | LastInst = NextStore; |
387 | } else { |
388 | MemSetInst *MSI = cast<MemSetInst>(Val&: BI); |
389 | |
390 | if (MSI->isVolatile() || !isa<ConstantInt>(Val: MSI->getLength())) |
391 | break; |
392 | |
393 | if (!isa<ConstantInt>(Val: MSI->getValue())) |
394 | break; |
395 | |
396 | // Check to see if this store is to a constant offset from the start ptr. |
397 | std::optional<int64_t> Offset = |
398 | MSI->getDest()->getPointerOffsetFrom(Other: StartPtr, DL: *DL); |
399 | if (!Offset) |
400 | break; |
401 | |
402 | if (!IB.addMemSet(Offset: *Offset, MSI)) |
403 | break; |
404 | LastInst = MSI; |
405 | } |
406 | } |
407 | return LastInst; |
408 | } |
409 | |
410 | void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
411 | Value *Ptr, uint64_t Size) { |
412 | auto SetTagZeroFunc = |
413 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero); |
414 | auto StgpFunc = |
415 | Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp); |
416 | |
417 | InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc); |
418 | bool LittleEndian = |
419 | Triple(AI->getModule()->getTargetTriple()).isLittleEndian(); |
420 | // Current implementation of initializer merging assumes little endianness. |
421 | if (MergeInit && !F->hasOptNone() && LittleEndian && |
422 | Size < ClMergeInitSizeLimit) { |
423 | LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI |
424 | << ", size = " << Size << "\n" ); |
425 | InsertBefore = collectInitializers(StartInst: InsertBefore, StartPtr: Ptr, Size, IB); |
426 | } |
427 | |
428 | IRBuilder<> IRB(InsertBefore); |
429 | IB.generate(IRB); |
430 | } |
431 | |
432 | void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore, |
433 | uint64_t Size) { |
434 | IRBuilder<> IRB(InsertBefore); |
435 | IRB.CreateCall(Callee: SetTagFunc, Args: {IRB.CreatePointerCast(V: AI, DestTy: IRB.getPtrTy()), |
436 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Size)}); |
437 | } |
438 | |
439 | Instruction *AArch64StackTagging::insertBaseTaggedPointer( |
440 | const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument, |
441 | const DominatorTree *DT) { |
442 | BasicBlock *PrologueBB = nullptr; |
443 | // Try sinking IRG as deep as possible to avoid hurting shrink wrap. |
444 | for (auto &I : AllocasToInstrument) { |
445 | const memtag::AllocaInfo &Info = I.second; |
446 | AllocaInst *AI = Info.AI; |
447 | if (!PrologueBB) { |
448 | PrologueBB = AI->getParent(); |
449 | continue; |
450 | } |
451 | PrologueBB = DT->findNearestCommonDominator(A: PrologueBB, B: AI->getParent()); |
452 | } |
453 | assert(PrologueBB); |
454 | |
455 | IRBuilder<> IRB(&PrologueBB->front()); |
456 | Function *IRG_SP = |
457 | Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: aarch64_irg_sp); |
458 | Instruction *Base = |
459 | IRB.CreateCall(Callee: IRG_SP, Args: {Constant::getNullValue(Ty: IRB.getInt64Ty())}); |
460 | Base->setName("basetag" ); |
461 | return Base; |
462 | } |
463 | |
464 | // FIXME: check for MTE extension |
465 | bool AArch64StackTagging::runOnFunction(Function &Fn) { |
466 | if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag)) |
467 | return false; |
468 | |
469 | if (UseStackSafety) |
470 | SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult(); |
471 | F = &Fn; |
472 | DL = &Fn.getParent()->getDataLayout(); |
473 | if (MergeInit) |
474 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
475 | |
476 | memtag::StackInfoBuilder SIB(SSI); |
477 | for (Instruction &I : instructions(F)) |
478 | SIB.visit(Inst&: I); |
479 | memtag::StackInfo &SInfo = SIB.get(); |
480 | |
481 | if (SInfo.AllocasToInstrument.empty()) |
482 | return false; |
483 | |
484 | std::unique_ptr<DominatorTree> DeleteDT; |
485 | DominatorTree *DT = nullptr; |
486 | if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>()) |
487 | DT = &P->getDomTree(); |
488 | |
489 | if (DT == nullptr) { |
490 | DeleteDT = std::make_unique<DominatorTree>(args&: *F); |
491 | DT = DeleteDT.get(); |
492 | } |
493 | |
494 | std::unique_ptr<PostDominatorTree> DeletePDT; |
495 | PostDominatorTree *PDT = nullptr; |
496 | if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>()) |
497 | PDT = &P->getPostDomTree(); |
498 | |
499 | if (PDT == nullptr) { |
500 | DeletePDT = std::make_unique<PostDominatorTree>(args&: *F); |
501 | PDT = DeletePDT.get(); |
502 | } |
503 | |
504 | std::unique_ptr<LoopInfo> DeleteLI; |
505 | LoopInfo *LI = nullptr; |
506 | if (auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>()) { |
507 | LI = &LIWP->getLoopInfo(); |
508 | } else { |
509 | DeleteLI = std::make_unique<LoopInfo>(args&: *DT); |
510 | LI = DeleteLI.get(); |
511 | } |
512 | |
513 | SetTagFunc = |
514 | Intrinsic::getDeclaration(M: F->getParent(), Intrinsic::id: aarch64_settag); |
515 | |
516 | Instruction *Base = insertBaseTaggedPointer(AllocasToInstrument: SInfo.AllocasToInstrument, DT); |
517 | |
518 | int NextTag = 0; |
519 | for (auto &I : SInfo.AllocasToInstrument) { |
520 | memtag::AllocaInfo &Info = I.second; |
521 | assert(Info.AI && SIB.isInterestingAlloca(*Info.AI)); |
522 | memtag::alignAndPadAlloca(Info, Align: kTagGranuleSize); |
523 | AllocaInst *AI = Info.AI; |
524 | int Tag = NextTag; |
525 | NextTag = (NextTag + 1) % 16; |
526 | // Replace alloca with tagp(alloca). |
527 | IRBuilder<> IRB(Info.AI->getNextNode()); |
528 | Function *TagP = Intrinsic::getDeclaration( |
529 | M: F->getParent(), Intrinsic::id: aarch64_tagp, Tys: {Info.AI->getType()}); |
530 | Instruction *TagPCall = |
531 | IRB.CreateCall(Callee: TagP, Args: {Constant::getNullValue(Ty: Info.AI->getType()), Base, |
532 | ConstantInt::get(Ty: IRB.getInt64Ty(), V: Tag)}); |
533 | if (Info.AI->hasName()) |
534 | TagPCall->setName(Info.AI->getName() + ".tag" ); |
535 | // Does not replace metadata, so we don't have to handle DbgVariableRecords. |
536 | Info.AI->replaceUsesWithIf(New: TagPCall, ShouldReplace: [&](const Use &U) { |
537 | return !memtag::isLifetimeIntrinsic(V: U.getUser()); |
538 | }); |
539 | TagPCall->setOperand(i: 0, Val: Info.AI); |
540 | |
541 | // Calls to functions that may return twice (e.g. setjmp) confuse the |
542 | // postdominator analysis, and will leave us to keep memory tagged after |
543 | // function return. Work around this by always untagging at every return |
544 | // statement if return_twice functions are called. |
545 | bool StandardLifetime = |
546 | !SInfo.CallsReturnTwice && |
547 | SInfo.UnrecognizedLifetimes.empty() && |
548 | memtag::isStandardLifetime(LifetimeStart: Info.LifetimeStart, LifetimeEnd: Info.LifetimeEnd, DT, LI, |
549 | MaxLifetimes: ClMaxLifetimes); |
550 | if (StandardLifetime) { |
551 | IntrinsicInst *Start = Info.LifetimeStart[0]; |
552 | uint64_t Size = |
553 | cast<ConstantInt>(Val: Start->getArgOperand(i: 0))->getZExtValue(); |
554 | Size = alignTo(Size, A: kTagGranuleSize); |
555 | tagAlloca(AI, InsertBefore: Start->getNextNode(), Ptr: TagPCall, Size); |
556 | |
557 | auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, InsertBefore: Node, Size); }; |
558 | if (!DT || !PDT || |
559 | !memtag::forAllReachableExits(DT: *DT, PDT: *PDT, LI: *LI, Start, Ends: Info.LifetimeEnd, |
560 | RetVec: SInfo.RetVec, Callback: TagEnd)) { |
561 | for (auto *End : Info.LifetimeEnd) |
562 | End->eraseFromParent(); |
563 | } |
564 | } else { |
565 | uint64_t Size = *Info.AI->getAllocationSize(DL: *DL); |
566 | Value *Ptr = IRB.CreatePointerCast(V: TagPCall, DestTy: IRB.getPtrTy()); |
567 | tagAlloca(AI, InsertBefore: &*IRB.GetInsertPoint(), Ptr, Size); |
568 | for (auto *RI : SInfo.RetVec) { |
569 | untagAlloca(AI, InsertBefore: RI, Size); |
570 | } |
571 | // We may have inserted tag/untag outside of any lifetime interval. |
572 | // Remove all lifetime intrinsics for this alloca. |
573 | for (auto *II : Info.LifetimeStart) |
574 | II->eraseFromParent(); |
575 | for (auto *II : Info.LifetimeEnd) |
576 | II->eraseFromParent(); |
577 | } |
578 | } |
579 | |
580 | // If we have instrumented at least one alloca, all unrecognized lifetime |
581 | // intrinsics have to go. |
582 | for (auto *I : SInfo.UnrecognizedLifetimes) |
583 | I->eraseFromParent(); |
584 | |
585 | return true; |
586 | } |
587 | |