DeadStoreElimination.cpp source code [llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp]

1	//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The code below implements dead store elimination using MemorySSA. It uses
10	// the following general approach: given a MemoryDef, walk upwards to find
11	// clobbering MemoryDefs that may be killed by the starting def. Then check
12	// that there are no uses that may read the location of the original MemoryDef
13	// in between both MemoryDefs. A bit more concretely:
14	//
15	// For all MemoryDefs StartDef:
16	// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
17	// upwards.
18	// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
19	// checking all uses starting at MaybeDeadAccess and walking until we see
20	// StartDef.
21	// 3. For each found CurrentDef, check that:
22	// 1. There are no barrier instructions between CurrentDef and StartDef (like
23	// throws or stores with ordering constraints).
24	// 2. StartDef is executed whenever CurrentDef is executed.
25	// 3. StartDef completely overwrites CurrentDef.
26	// 4. Erase CurrentDef from the function and MemorySSA.
27	//
28	//===----------------------------------------------------------------------===//
29
30	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
31	#include "llvm/ADT/APInt.h"
32	#include "llvm/ADT/DenseMap.h"
33	#include "llvm/ADT/MapVector.h"
34	#include "llvm/ADT/PostOrderIterator.h"
35	#include "llvm/ADT/SetVector.h"
36	#include "llvm/ADT/SmallPtrSet.h"
37	#include "llvm/ADT/SmallVector.h"
38	#include "llvm/ADT/Statistic.h"
39	#include "llvm/ADT/StringRef.h"
40	#include "llvm/Analysis/AliasAnalysis.h"
41	#include "llvm/Analysis/CaptureTracking.h"
42	#include "llvm/Analysis/GlobalsModRef.h"
43	#include "llvm/Analysis/LoopInfo.h"
44	#include "llvm/Analysis/MemoryBuiltins.h"
45	#include "llvm/Analysis/MemoryLocation.h"
46	#include "llvm/Analysis/MemorySSA.h"
47	#include "llvm/Analysis/MemorySSAUpdater.h"
48	#include "llvm/Analysis/MustExecute.h"
49	#include "llvm/Analysis/PostDominators.h"
50	#include "llvm/Analysis/TargetLibraryInfo.h"
51	#include "llvm/Analysis/ValueTracking.h"
52	#include "llvm/IR/Argument.h"
53	#include "llvm/IR/BasicBlock.h"
54	#include "llvm/IR/Constant.h"
55	#include "llvm/IR/Constants.h"
56	#include "llvm/IR/DataLayout.h"
57	#include "llvm/IR/DebugInfo.h"
58	#include "llvm/IR/Dominators.h"
59	#include "llvm/IR/Function.h"
60	#include "llvm/IR/IRBuilder.h"
61	#include "llvm/IR/InstIterator.h"
62	#include "llvm/IR/InstrTypes.h"
63	#include "llvm/IR/Instruction.h"
64	#include "llvm/IR/Instructions.h"
65	#include "llvm/IR/IntrinsicInst.h"
66	#include "llvm/IR/Module.h"
67	#include "llvm/IR/PassManager.h"
68	#include "llvm/IR/PatternMatch.h"
69	#include "llvm/IR/Value.h"
70	#include "llvm/Support/Casting.h"
71	#include "llvm/Support/CommandLine.h"
72	#include "llvm/Support/Debug.h"
73	#include "llvm/Support/DebugCounter.h"
74	#include "llvm/Support/ErrorHandling.h"
75	#include "llvm/Support/raw_ostream.h"
76	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
77	#include "llvm/Transforms/Utils/BuildLibCalls.h"
78	#include "llvm/Transforms/Utils/Local.h"
79	#include <algorithm>
80	#include <cassert>
81	#include <cstdint>
82	#include <iterator>
83	#include <map>
84	#include <optional>
85	#include <utility>
86
87	using namespace llvm;
88	using namespace PatternMatch;
89
90	#define DEBUG_TYPE "dse"
91
92	STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
93	STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
94	STATISTIC(NumFastStores, "Number of stores deleted");
95	STATISTIC(NumFastOther, "Number of other instrs removed");
96	STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
97	STATISTIC(NumModifiedStores, "Number of stores modified");
98	STATISTIC(NumCFGChecks, "Number of stores modified");
99	STATISTIC(NumCFGTries, "Number of stores modified");
100	STATISTIC(NumCFGSuccess, "Number of stores modified");
101	STATISTIC(NumGetDomMemoryDefPassed,
102	"Number of times a valid candidate is returned from getDomMemoryDef");
103	STATISTIC(NumDomMemDefChecks,
104	"Number iterations check for reads in getDomMemoryDef");
105
106	DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
107	"Controls which MemoryDefs are eliminated.");
108
109	static cl::opt<bool>
110	EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
111	cl::init(Val: true), cl::Hidden,
112	cl::desc ("Enable partial-overwrite tracking in DSE"));
113
114	static cl::opt<bool>
115	EnablePartialStoreMerging("enable-dse-partial-store-merging",
116	cl::init(Val: true), cl::Hidden,
117	cl::desc ("Enable partial store merging in DSE"));
118
119	static cl::opt<unsigned>
120	MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(Val: `150`), cl::Hidden,
121	cl::desc ("The number of memory instructions to scan for "
122	"dead store elimination (default = 150)"));
123	static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
124	"dse-memoryssa-walklimit", cl::init(Val: `90`), cl::Hidden,
125	cl::desc ("The maximum number of steps while walking upwards to find "
126	"MemoryDefs that may be killed (default = 90)"));
127
128	static cl::opt<unsigned> MemorySSAPartialStoreLimit(
129	"dse-memoryssa-partial-store-limit", cl::init(Val: `5`), cl::Hidden,
130	cl::desc ("The maximum number candidates that only partially overwrite the "
131	"killing MemoryDef to consider"
132	" (default = 5)"));
133
134	static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
135	"dse-memoryssa-defs-per-block-limit", cl::init(Val: `5000`), cl::Hidden,
136	cl::desc ("The number of MemoryDefs we consider as candidates to eliminated "
137	"other stores per basic block (default = 5000)"));
138
139	static cl::opt<unsigned> MemorySSASameBBStepCost(
140	"dse-memoryssa-samebb-cost", cl::init(Val: `1`), cl::Hidden,
141	cl::desc (
142	"The cost of a step in the same basic block as the killing MemoryDef"
143	"(default = 1)"));
144
145	static cl::opt<unsigned>
146	MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(Val: `5`),
147	cl::Hidden,
148	cl::desc ("The cost of a step in a different basic "
149	"block than the killing MemoryDef"
150	"(default = 5)"));
151
152	static cl::opt<unsigned> MemorySSAPathCheckLimit(
153	"dse-memoryssa-path-check-limit", cl::init(Val: `50`), cl::Hidden,
154	cl::desc ("The maximum number of blocks to check when trying to prove that "
155	"all paths to an exit go through a killing block (default = 50)"));
156
157	// This flags allows or disallows DSE to optimize MemorySSA during its
158	// traversal. Note that DSE optimizing MemorySSA may impact other passes
159	// downstream of the DSE invocation and can lead to issues not being
160	// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
161	// those cases, the flag can be used to check if DSE's MemorySSA optimizations
162	// impact follow-up passes.
163	static cl::opt<bool>
164	OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(Val: true), cl::Hidden,
165	cl::desc ("Allow DSE to optimize memory accesses."));
166
167	//===----------------------------------------------------------------------===//
168	// Helper functions
169	//===----------------------------------------------------------------------===//
170	using OverlapIntervalsTy = std::map<int64_t, int64_t>;
171	using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
172
173	/// Returns true if the end of this instruction can be safely shortened in
174	/// length.
175	static bool isShortenableAtTheEnd(Instruction *I) {
176	// Don't shorten stores for now
177	if (isa<StoreInst>(Val: I))
178	return false;
179
180	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
181	switch (II->getIntrinsicID()) {
182	default: return false;
183	case Intrinsic::memset:
184	case Intrinsic::memcpy:
185	case Intrinsic::memcpy_element_unordered_atomic:
186	case Intrinsic::memset_element_unordered_atomic:
187	// Do shorten memory intrinsics.
188	// FIXME: Add memmove if it's also safe to transform.
189	return true;
190	}
191	}
192
193	// Don't shorten libcalls calls for now.
194
195	return false;
196	}
197
198	/// Returns true if the beginning of this instruction can be safely shortened
199	/// in length.
200	static bool isShortenableAtTheBeginning(Instruction *I) {
201	// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
202	// easily done by offsetting the source address.
203	return isa<AnyMemSetInst>(Val: I);
204	}
205
206	static std::optional<TypeSize> getPointerSize(const Value *V,
207	const DataLayout &DL,
208	const TargetLibraryInfo &TLI,
209	const Function *F) {
210	uint64_t Size;
211	ObjectSizeOpts Opts;
212	Opts.NullIsUnknownSize = NullPointerIsDefined(F);
213
214	if (getObjectSize(Ptr: V, Size, DL, TLI: &TLI, Opts))
215	return TypeSize::getFixed(ExactSize: Size);
216	return std::nullopt;
217	}
218
219	namespace {
220
221	enum OverwriteResult {
222	OW_Begin,
223	OW_Complete,
224	OW_End,
225	OW_PartialEarlierWithFullLater,
226	OW_MaybePartial,
227	OW_None,
228	OW_Unknown
229	};
230
231	} // end anonymous namespace
232
233	/// Check if two instruction are masked stores that completely
234	/// overwrite one another. More specifically, \p KillingI has to
235	/// overwrite \p DeadI.
236	static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
237	const Instruction *DeadI,
238	BatchAAResults &AA) {
239	const auto *KillingII = dyn_cast<IntrinsicInst>(Val: KillingI);
240	const auto *DeadII = dyn_cast<IntrinsicInst>(Val: DeadI);
241	if (KillingII == nullptr \|\| DeadII == nullptr)
242	return OW_Unknown;
243	if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
244	return OW_Unknown;
245	if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
246	// Type size.
247	VectorType *KillingTy =
248	cast<VectorType>(Val: KillingII->getArgOperand(i: `0`)->getType());
249	VectorType *DeadTy = cast<VectorType>(Val: DeadII->getArgOperand(i: `0`)->getType());
250	if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
251	return OW_Unknown;
252	// Element count.
253	if (KillingTy->getElementCount() != DeadTy->getElementCount())
254	return OW_Unknown;
255	// Pointers.
256	Value *KillingPtr = KillingII->getArgOperand(i: `1`)->stripPointerCasts();
257	Value *DeadPtr = DeadII->getArgOperand(i: `1`)->stripPointerCasts();
258	if (KillingPtr != DeadPtr && !AA.isMustAlias(V1: KillingPtr, V2: DeadPtr))
259	return OW_Unknown;
260	// Masks.
261	// TODO: check that KillingII's mask is a superset of the DeadII's mask.
262	if (KillingII->getArgOperand(i: `3`) != DeadII->getArgOperand(i: `3`))
263	return OW_Unknown;
264	return OW_Complete;
265	}
266	return OW_Unknown;
267	}
268
269	/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
270	/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
271	/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
272	/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
273	/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
274	/// overwritten by a killing (smaller) store which doesn't write outside the big
275	/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
276	/// NOTE: This function must only be called if both \p KillingLoc and \p
277	/// DeadLoc belong to the same underlying object with valid \p KillingOff and
278	/// \p DeadOff.
279	static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
280	const MemoryLocation &DeadLoc,
281	int64_t KillingOff, int64_t DeadOff,
282	Instruction *DeadI,
283	InstOverlapIntervalsTy &IOL) {
284	const uint64_t KillingSize = KillingLoc.Size.getValue();
285	const uint64_t DeadSize = DeadLoc.Size.getValue();
286	// We may now overlap, although the overlap is not complete. There might also
287	// be other incomplete overlaps, and together, they might cover the complete
288	// dead store.
289	// Note: The correctness of this logic depends on the fact that this function
290	// is not even called providing DepWrite when there are any intervening reads.
291	if (EnablePartialOverwriteTracking &&
292	KillingOff < int64_t(DeadOff + DeadSize) &&
293	int64_t(KillingOff + KillingSize) >= DeadOff) {
294
295	// Insert our part of the overlap into the map.
296	auto &IM = IOL [DeadI];
297	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
298	<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
299	<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
300	<< ")\n");
301
302	// Make sure that we only insert non-overlapping intervals and combine
303	// adjacent intervals. The intervals are stored in the map with the ending
304	// offset as the key (in the half-open sense) and the starting offset as
305	// the value.
306	int64_t KillingIntStart = KillingOff;
307	int64_t KillingIntEnd = KillingOff + KillingSize;
308
309	// Find any intervals ending at, or after, KillingIntStart which start
310	// before KillingIntEnd.
311	auto ILI = IM.lower_bound(x: KillingIntStart);
312	if (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
313	// This existing interval is overlapped with the current store somewhere
314	// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
315	// intervals and adjusting our start and end.
316	KillingIntStart = std::min(a: KillingIntStart, b: ILI ->second);
317	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
318	ILI = IM.erase(position: ILI);
319
320	// Continue erasing and adjusting our end in case other previous
321	// intervals are also overlapped with the current store.
322	//
323	// \|--- dead 1 ---\| \|--- dead 2 ---\|
324	// \|------- killing---------\|
325	//
326	while (ILI != IM.end() && ILI ->second <= KillingIntEnd) {
327	assert(ILI ->second > KillingIntStart && "Unexpected interval");
328	KillingIntEnd = std::max(a: KillingIntEnd, b: ILI ->first);
329	ILI = IM.erase(position: ILI);
330	}
331	}
332
333	IM [KillingIntEnd] = KillingIntStart;
334
335	ILI = IM.begin();
336	if (ILI ->second <= DeadOff && ILI ->first >= int64_t(DeadOff + DeadSize)) {
337	LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
338	<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
339	<< ") Composite KillingLoc [" << ILI ->second << ", "
340	<< ILI ->first << ")\n");
341	++NumCompletePartials;
342	return OW_Complete;
343	}
344	}
345
346	// Check for a dead store which writes to all the memory locations that
347	// the killing store writes to.
348	if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
349	int64_t(DeadOff + DeadSize) > KillingOff &&
350	uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
351	LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
352	<< ", " << int64_t(DeadOff + DeadSize)
353	<< ") by a killing store [" << KillingOff << ", "
354	<< int64_t(KillingOff + KillingSize) << ")\n");
355	// TODO: Maybe come up with a better name?
356	return OW_PartialEarlierWithFullLater;
357	}
358
359	// Another interesting case is if the killing store overwrites the end of the
360	// dead store.
361	//
362	// \|--dead--\|
363	// \|-- killing --\|
364	//
365	// In this case we may want to trim the size of dead store to avoid
366	// generating stores to addresses which will definitely be overwritten killing
367	// store.
368	if (!EnablePartialOverwriteTracking &&
369	(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
370	int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
371	return OW_End;
372
373	// Finally, we also need to check if the killing store overwrites the
374	// beginning of the dead store.
375	//
376	// \|--dead--\|
377	// \|-- killing --\|
378	//
379	// In this case we may want to move the destination address and trim the size
380	// of dead store to avoid generating stores to addresses which will definitely
381	// be overwritten killing store.
382	if (!EnablePartialOverwriteTracking &&
383	(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
384	assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
385	"Expect to be handled as OW_Complete");
386	return OW_Begin;
387	}
388	// Otherwise, they don't completely overlap.
389	return OW_Unknown;
390	}
391
392	/// Returns true if the memory which is accessed by the second instruction is not
393	/// modified between the first and the second instruction.
394	/// Precondition: Second instruction must be dominated by the first
395	/// instruction.
396	static bool
397	memoryIsNotModifiedBetween(Instruction FirstI, Instruction SecondI,
398	BatchAAResults &AA, const DataLayout &DL,
399	DominatorTree *DT) {
400	// Do a backwards scan through the CFG from SecondI to FirstI. Look for
401	// instructions which can modify the memory location accessed by SecondI.
402	//
403	// While doing the walk keep track of the address to check. It might be
404	// different in different basic blocks due to PHI translation.
405	using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
406	SmallVector<BlockAddressPair, `16`> WorkList;
407	// Keep track of the address we visited each block with. Bail out if we
408	// visit a block with different addresses.
409	DenseMap<BasicBlock , Value > Visited;
410
411	BasicBlock::iterator FirstBBI(FirstI);
412	++FirstBBI;
413	BasicBlock::iterator SecondBBI(SecondI);
414	BasicBlock *FirstBB = FirstI->getParent();
415	BasicBlock *SecondBB = SecondI->getParent();
416	MemoryLocation MemLoc;
417	if (auto *MemSet = dyn_cast<MemSetInst>(Val: SecondI))
418	MemLoc = MemoryLocation::getForDest(MI: MemSet);
419	else
420	MemLoc = MemoryLocation::get(Inst: SecondI);
421
422	auto MemLocPtr = const_cast<Value >(MemLoc.Ptr);
423
424	// Start checking the SecondBB.
425	WorkList.push_back(
426	Elt: std::make_pair(x&: SecondBB, y: PHITransAddr (MemLocPtr, DL, nullptr)));
427	bool isFirstBlock = true;
428
429	// Check all blocks going backward until we reach the FirstBB.
430	while (!WorkList.empty()) {
431	BlockAddressPair Current = WorkList.pop_back_val();
432	BasicBlock *B = Current.first;
433	PHITransAddr &Addr = Current.second;
434	Value *Ptr = Addr.getAddr();
435
436	// Ignore instructions before FirstI if this is the FirstBB.
437	BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
438
439	BasicBlock::iterator EI;
440	if (isFirstBlock) {
441	// Ignore instructions after SecondI if this is the first visit of SecondBB.
442	assert(B == SecondBB && "first block is not the store block");
443	EI = SecondBBI;
444	isFirstBlock = false;
445	} else {
446	// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
447	// In this case we also have to look at instructions after SecondI.
448	EI = B->end();
449	}
450	for (; BI != EI; ++BI) {
451	Instruction I = &BI;
452	if (I->mayWriteToMemory() && I != SecondI)
453	if (isModSet(MRI: AA.getModRefInfo(I, OptLoc: MemLoc.getWithNewPtr(NewPtr: Ptr))))
454	return false;
455	}
456	if (B != FirstBB) {
457	assert(B != &FirstBB->getParent()->getEntryBlock() &&
458	"Should not hit the entry block because SI must be dominated by LI");
459	for (BasicBlock *Pred : predecessors(BB: B)) {
460	PHITransAddr PredAddr = Addr;
461	if (PredAddr.needsPHITranslationFromBlock(BB: B)) {
462	if (!PredAddr.isPotentiallyPHITranslatable())
463	return false;
464	if (!PredAddr.translateValue(CurBB: B, PredBB: Pred, DT, MustDominate: false))
465	return false;
466	}
467	Value *TranslatedPtr = PredAddr.getAddr();
468	auto Inserted = Visited.insert(KV: std::make_pair(x&: Pred, y&: TranslatedPtr));
469	if (!Inserted.second) {
470	// We already visited this block before. If it was with a different
471	// address - bail out!
472	if (TranslatedPtr != Inserted.first ->second)
473	return false;
474	// ... otherwise just skip it.
475	continue;
476	}
477	WorkList.push_back(Elt: std::make_pair(x&: Pred, y&: PredAddr));
478	}
479	}
480	}
481	return true;
482	}
483
484	static void shortenAssignment(Instruction Inst, Value OriginalDest,
485	uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
486	uint64_t NewSizeInBits, bool IsOverwriteEnd) {
487	const DataLayout &DL = Inst->getModule()->getDataLayout();
488	uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
489	uint64_t DeadSliceOffsetInBits =
490	OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : `0`);
491	auto SetDeadFragExpr = [](auto *Assign,
492	DIExpression::FragmentInfo DeadFragment) {
493	// createFragmentExpression expects an offset relative to the existing
494	// fragment offset if there is one.
495	uint64_t RelativeOffset = DeadFragment.OffsetInBits -
496	Assign->getExpression()
497	->getFragmentInfo()
498	.value_or(DIExpression::FragmentInfo (`0`, `0`))
499	.OffsetInBits;
500	if (auto NewExpr = DIExpression::createFragmentExpression(
501	Expr: Assign->getExpression(), OffsetInBits: RelativeOffset, SizeInBits: DeadFragment.SizeInBits)) {
502	Assign->setExpression(*NewExpr);
503	return;
504	}
505	// Failed to create a fragment expression for this so discard the value,
506	// making this a kill location.
507	auto Expr = DIExpression::createFragmentExpression(
508	Expr: DIExpression::get(Context&: Assign->getContext(), Elements: std::nullopt),
509	OffsetInBits: DeadFragment.OffsetInBits, SizeInBits: DeadFragment.SizeInBits);
510	Assign->setExpression(Expr);
511	Assign->setKillLocation();
512	};
513
514	// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
515	// link to any instructions. Created in the loop below (once).
516	DIAssignID LinkToNothing = nullptr*;
517	LLVMContext &Ctx = Inst->getContext();
518	auto GetDeadLink = [&Ctx, &LinkToNothing]() {
519	if (!LinkToNothing)
520	LinkToNothing = DIAssignID::getDistinct(Context&: Ctx);
521	return LinkToNothing;
522	};
523
524	// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
525	// overlapping dbg.assign intrinsic. The loop invalidates the iterators
526	// returned by getAssignmentMarkers so save a copy of the markers to iterate
527	// over.
528	auto LinkedRange = at::getAssignmentMarkers(Inst);
529	SmallVector<DPValue *> LinkedDPVAssigns = at::getDPVAssignmentMarkers(Inst);
530	SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
531	LinkedRange.end());
532	auto InsertAssignForOverlap = [&](auto *Assign) {
533	std::optional<DIExpression::FragmentInfo> NewFragment;
534	if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
535	DeadSliceSizeInBits, Assign,
536	NewFragment) \|\|
537	!NewFragment) {
538	// We couldn't calculate the intersecting fragment for some reason. Be
539	// cautious and unlink the whole assignment from the store.
540	Assign->setKillAddress();
541	Assign->setAssignId(GetDeadLink ());
542	return;
543	}
544	// No intersect.
545	if (NewFragment ->SizeInBits == `0`)
546	return;
547
548	// Fragments overlap: insert a new dbg.assign for this dead part.
549	auto NewAssign = static_cast<decltype*(Assign)>(Assign->clone());
550	NewAssign->insertAfter(Assign);
551	NewAssign->setAssignId(GetDeadLink ());
552	if (NewFragment)
553	SetDeadFragExpr(NewAssign, *NewFragment);
554	NewAssign->setKillAddress();
555	};
556	for_each(Range&: Linked, F: InsertAssignForOverlap);
557	for_each(Range&: LinkedDPVAssigns, F: InsertAssignForOverlap);
558	}
559
560	static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
561	uint64_t &DeadSize, int64_t KillingStart,
562	uint64_t KillingSize, bool IsOverwriteEnd) {
563	auto *DeadIntrinsic = cast<AnyMemIntrinsic>(Val: DeadI);
564	Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
565
566	// We assume that memet/memcpy operates in chunks of the "largest" native
567	// type size and aligned on the same value. That means optimal start and size
568	// of memset/memcpy should be modulo of preferred alignment of that type. That
569	// is it there is no any sense in trying to reduce store size any further
570	// since any "extra" stores comes for free anyway.
571	// On the other hand, maximum alignment we can achieve is limited by alignment
572	// of initial store.
573
574	// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
575	// "largest" native type.
576	// Note: What is the proper way to get that value?
577	// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
578	// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
579
580	int64_t ToRemoveStart = `0`;
581	uint64_t ToRemoveSize = `0`;
582	// Compute start and size of the region to remove. Make sure 'PrefAlign' is
583	// maintained on the remaining store.
584	if (IsOverwriteEnd) {
585	// Calculate required adjustment for 'KillingStart' in order to keep
586	// remaining store size aligned on 'PerfAlign'.
587	uint64_t Off =
588	offsetToAlignment(Value: uint64_t(KillingStart - DeadStart), Alignment: PrefAlign);
589	ToRemoveStart = KillingStart + Off;
590	if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
591	return false;
592	ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
593	} else {
594	ToRemoveStart = DeadStart;
595	assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
596	"Not overlapping accesses?");
597	ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
598	// Calculate required adjustment for 'ToRemoveSize'in order to keep
599	// start of the remaining store aligned on 'PerfAlign'.
600	uint64_t Off = offsetToAlignment(Value: ToRemoveSize, Alignment: PrefAlign);
601	if (Off != `0`) {
602	if (ToRemoveSize <= (PrefAlign.value() - Off))
603	return false;
604	ToRemoveSize -= PrefAlign.value() - Off;
605	}
606	assert(isAligned(PrefAlign, ToRemoveSize) &&
607	"Should preserve selected alignment");
608	}
609
610	assert(ToRemoveSize > `0` && "Shouldn't reach here if nothing to remove");
611	assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
612
613	uint64_t NewSize = DeadSize - ToRemoveSize;
614	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(Val: DeadI)) {
615	// When shortening an atomic memory intrinsic, the newly shortened
616	// length must remain an integer multiple of the element size.
617	const uint32_t ElementSize = AMI->getElementSizeInBytes();
618	if (`0` != NewSize % ElementSize)
619	return false;
620	}
621
622	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
623	<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
624	<< "\n KILLER [" << ToRemoveStart << ", "
625	<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
626
627	Value *DeadWriteLength = DeadIntrinsic->getLength();
628	Value *TrimmedLength = ConstantInt::get(Ty: DeadWriteLength->getType(), V: NewSize);
629	DeadIntrinsic->setLength(TrimmedLength);
630	DeadIntrinsic->setDestAlignment(PrefAlign);
631
632	Value *OrigDest = DeadIntrinsic->getRawDest();
633	if (!IsOverwriteEnd) {
634	Value *Indices[`1`] = {
635	ConstantInt::get(Ty: DeadWriteLength->getType(), V: ToRemoveSize)};
636	Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
637	PointeeType: Type::getInt8Ty(C&: DeadIntrinsic->getContext()), Ptr: OrigDest, IdxList: Indices, NameStr: "", InsertBefore: DeadI);
638	NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
639	DeadIntrinsic->setDest(NewDestGEP);
640	}
641
642	// Update attached dbg.assign intrinsics. Assume 8-bit byte.
643	shortenAssignment(Inst: DeadI, OriginalDest: OrigDest, OldOffsetInBits: DeadStart * `8`, OldSizeInBits: DeadSize * `8`, NewSizeInBits: NewSize * `8`,
644	IsOverwriteEnd);
645
646	// Finally update start and size of dead access.
647	if (!IsOverwriteEnd)
648	DeadStart += ToRemoveSize;
649	DeadSize = NewSize;
650
651	return true;
652	}
653
654	static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
655	int64_t &DeadStart, uint64_t &DeadSize) {
656	if (IntervalMap.empty() \|\| !isShortenableAtTheEnd(I: DeadI))
657	return false;
658
659	OverlapIntervalsTy::iterator OII = --IntervalMap.end();
660	int64_t KillingStart = OII ->second;
661	uint64_t KillingSize = OII ->first - KillingStart;
662
663	assert(OII ->first - KillingStart >= `0` && "Size expected to be positive");
664
665	if (KillingStart > DeadStart &&
666	// Note: "KillingStart - KillingStart" is known to be positive due to
667	// preceding check.
668	(uint64_t)(KillingStart - DeadStart) < DeadSize &&
669	// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
670	// be non negative due to preceding checks.
671	KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
672	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
673	IsOverwriteEnd: true)) {
674	IntervalMap.erase(position: OII);
675	return true;
676	}
677	}
678	return false;
679	}
680
681	static bool tryToShortenBegin(Instruction *DeadI,
682	OverlapIntervalsTy &IntervalMap,
683	int64_t &DeadStart, uint64_t &DeadSize) {
684	if (IntervalMap.empty() \|\| !isShortenableAtTheBeginning(I: DeadI))
685	return false;
686
687	OverlapIntervalsTy::iterator OII = IntervalMap.begin();
688	int64_t KillingStart = OII ->second;
689	uint64_t KillingSize = OII ->first - KillingStart;
690
691	assert(OII ->first - KillingStart >= `0` && "Size expected to be positive");
692
693	if (KillingStart <= DeadStart &&
694	// Note: "DeadStart - KillingStart" is known to be non negative due to
695	// preceding check.
696	KillingSize > (uint64_t)(DeadStart - KillingStart)) {
697	// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
698	// be positive due to preceding checks.
699	assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
700	"Should have been handled as OW_Complete");
701	if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
702	IsOverwriteEnd: false)) {
703	IntervalMap.erase(position: OII);
704	return true;
705	}
706	}
707	return false;
708	}
709
710	static Constant *
711	tryToMergePartialOverlappingStores(StoreInst KillingI, StoreInst DeadI,
712	int64_t KillingOffset, int64_t DeadOffset,
713	const DataLayout &DL, BatchAAResults &AA,
714	DominatorTree *DT) {
715
716	if (DeadI && isa<ConstantInt>(Val: DeadI->getValueOperand()) &&
717	DL.typeSizeEqualsStoreSize(Ty: DeadI->getValueOperand()->getType()) &&
718	KillingI && isa<ConstantInt>(Val: KillingI->getValueOperand()) &&
719	DL.typeSizeEqualsStoreSize(Ty: KillingI->getValueOperand()->getType()) &&
720	memoryIsNotModifiedBetween(FirstI: DeadI, SecondI: KillingI, AA, DL, DT)) {
721	// If the store we find is:
722	// a) partially overwritten by the store to 'Loc'
723	// b) the killing store is fully contained in the dead one and
724	// c) they both have a constant value
725	// d) none of the two stores need padding
726	// Merge the two stores, replacing the dead store's value with a
727	// merge of both values.
728	// TODO: Deal with other constant types (vectors, etc), and probably
729	// some mem intrinsics (if needed)
730
731	APInt DeadValue = cast<ConstantInt>(Val: DeadI->getValueOperand())->getValue();
732	APInt KillingValue =
733	cast<ConstantInt>(Val: KillingI->getValueOperand())->getValue();
734	unsigned KillingBits = KillingValue.getBitWidth();
735	assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
736	KillingValue = KillingValue.zext(width: DeadValue.getBitWidth());
737
738	// Offset of the smaller store inside the larger store
739	unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * `8`;
740	unsigned LShiftAmount =
741	DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
742	: BitOffsetDiff;
743	APInt Mask = APInt::getBitsSet(numBits: DeadValue.getBitWidth(), loBit: LShiftAmount,
744	hiBit: LShiftAmount + KillingBits);
745	// Clear the bits we'll be replacing, then OR with the smaller
746	// store, shifted appropriately.
747	APInt Merged = (DeadValue & ~Mask) \| (KillingValue << LShiftAmount);
748	LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
749	<< "\n Killing: " << *KillingI
750	<< "\n Merged Value: " << Merged << `'\n'`);
751	return ConstantInt::get(Ty: DeadI->getValueOperand()->getType(), V: Merged);
752	}
753	return nullptr;
754	}
755
756	namespace {
757	// Returns true if \p I is an intrinsic that does not read or write memory.
758	bool isNoopIntrinsic(Instruction *I) {
759	if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) {
760	switch (II->getIntrinsicID()) {
761	case Intrinsic::lifetime_start:
762	case Intrinsic::lifetime_end:
763	case Intrinsic::invariant_end:
764	case Intrinsic::launder_invariant_group:
765	case Intrinsic::assume:
766	return true;
767	case Intrinsic::dbg_declare:
768	case Intrinsic::dbg_label:
769	case Intrinsic::dbg_value:
770	llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
771	default:
772	return false;
773	}
774	}
775	return false;
776	}
777
778	// Check if we can ignore \p D for DSE.
779	bool canSkipDef(MemoryDef D, bool* DefVisibleToCaller) {
780	Instruction *DI = D->getMemoryInst();
781	// Calls that only access inaccessible memory cannot read or write any memory
782	// locations we consider for elimination.
783	if (auto *CB = dyn_cast<CallBase>(Val: DI))
784	if (CB->onlyAccessesInaccessibleMemory())
785	return true;
786
787	// We can eliminate stores to locations not visible to the caller across
788	// throwing instructions.
789	if (DI->mayThrow() && !DefVisibleToCaller)
790	return true;
791
792	// We can remove the dead stores, irrespective of the fence and its ordering
793	// (release/acquire/seq_cst). Fences only constraints the ordering of
794	// already visible stores, it does not make a store visible to other
795	// threads. So, skipping over a fence does not change a store from being
796	// dead.
797	if (isa<FenceInst>(Val: DI))
798	return true;
799
800	// Skip intrinsics that do not really read or modify memory.
801	if (isNoopIntrinsic(I: DI))
802	return true;
803
804	return false;
805	}
806
807	struct DSEState {
808	Function &F;
809	AliasAnalysis &AA;
810	EarliestEscapeInfo EI;
811
812	/// The single BatchAA instance that is used to cache AA queries. It will
813	/// not be invalidated over the whole run. This is safe, because:
814	/// 1. Only memory writes are removed, so the alias cache for memory
815	/// locations remains valid.
816	/// 2. No new instructions are added (only instructions removed), so cached
817	/// information for a deleted value cannot be accessed by a re-used new
818	/// value pointer.
819	BatchAAResults BatchAA;
820
821	MemorySSA &MSSA;
822	DominatorTree &DT;
823	PostDominatorTree &PDT;
824	const TargetLibraryInfo &TLI;
825	const DataLayout &DL;
826	const LoopInfo &LI;
827
828	// Whether the function contains any irreducible control flow, useful for
829	// being accurately able to detect loops.
830	bool ContainsIrreducibleLoops;
831
832	// All MemoryDefs that potentially could kill other MemDefs.
833	SmallVector<MemoryDef *, `64`> MemDefs;
834	// Any that should be skipped as they are already deleted
835	SmallPtrSet<MemoryAccess *, `4`> SkipStores;
836	// Keep track whether a given object is captured before return or not.
837	DenseMap<const Value , bool*> CapturedBeforeReturn;
838	// Keep track of all of the objects that are invisible to the caller after
839	// the function returns.
840	DenseMap<const Value , bool*> InvisibleToCallerAfterRet;
841	// Keep track of blocks with throwing instructions not modeled in MemorySSA.
842	SmallPtrSet<BasicBlock *, `16`> ThrowingBlocks;
843	// Post-order numbers for each basic block. Used to figure out if memory
844	// accesses are executed before another access.
845	DenseMap<BasicBlock , unsigned*> PostOrderNumbers;
846
847	/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
848	/// basic block.
849	MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
850	// Check if there are root nodes that are terminated by UnreachableInst.
851	// Those roots pessimize post-dominance queries. If there are such roots,
852	// fall back to CFG scan starting from all non-unreachable roots.
853	bool AnyUnreachableExit;
854
855	// Whether or not we should iterate on removing dead stores at the end of the
856	// function due to removing a store causing a previously captured pointer to
857	// no longer be captured.
858	bool ShouldIterateEndOfFunctionDSE;
859
860	// Class contains self-reference, make sure it's not copied/moved.
861	DSEState(const DSEState &) = delete;
862	DSEState &operator=(const DSEState &) = delete;
863
864	DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
865	PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
866	const LoopInfo &LI)
867	: F(F), AA(AA), EI (DT, &LI), BatchAA (AA, &EI), MSSA(MSSA), DT(DT),
868	PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
869	// Collect blocks with throwing instructions not modeled in MemorySSA and
870	// alloc-like objects.
871	unsigned PO = `0`;
872	for (BasicBlock *BB : post_order(G: &F)) {
873	PostOrderNumbers [BB] = PO++;
874	for (Instruction &I : *BB) {
875	MemoryAccess *MA = MSSA.getMemoryAccess(I: &I);
876	if (I.mayThrow() && !MA)
877	ThrowingBlocks.insert(Ptr: I.getParent());
878
879	auto *MD = dyn_cast_or_null<MemoryDef>(Val: MA);
880	if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
881	(getLocForWrite(I: &I) \|\| isMemTerminatorInst(I: &I)))
882	MemDefs.push_back(Elt: MD);
883	}
884	}
885
886	// Treat byval or inalloca arguments the same as Allocas, stores to them are
887	// dead at the end of the function.
888	for (Argument &AI : F.args())
889	if (AI.hasPassPointeeByValueCopyAttr())
890	InvisibleToCallerAfterRet.insert(KV: {&AI, true});
891
892	// Collect whether there is any irreducible control flow in the function.
893	ContainsIrreducibleLoops = mayContainIrreducibleControl(F, LI: &LI);
894
895	AnyUnreachableExit = any_of(Range: PDT.roots(), P: [](const BasicBlock *E) {
896	return isa<UnreachableInst>(Val: E->getTerminator());
897	});
898	}
899
900	LocationSize strengthenLocationSize(const Instruction *I,
901	LocationSize Size) const {
902	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
903	LibFunc F;
904	if (TLI.getLibFunc(CB: *CB, F) && TLI.has(F) &&
905	(F == LibFunc_memset_chk \|\| F == LibFunc_memcpy_chk)) {
906	// Use the precise location size specified by the 3rd argument
907	// for determining KillingI overwrites DeadLoc if it is a memset_chk
908	// instruction. memset_chk will write either the amount specified as 3rd
909	// argument or the function will immediately abort and exit the program.
910	// NOTE: AA may determine NoAlias if it can prove that the access size
911	// is larger than the allocation size due to that being UB. To avoid
912	// returning potentially invalid NoAlias results by AA, limit the use of
913	// the precise location size to isOverwrite.
914	if (const auto *Len = dyn_cast<ConstantInt>(Val: CB->getArgOperand(i: `2`)))
915	return LocationSize::precise(Value: Len->getZExtValue());
916	}
917	}
918	return Size;
919	}
920
921	/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
922	/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
923	/// location (by \p DeadI instruction).
924	/// Return OW_MaybePartial if \p KillingI does not completely overwrite
925	/// \p DeadI, but they both write to the same underlying object. In that
926	/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
927	/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
928	/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
929	OverwriteResult isOverwrite(const Instruction *KillingI,
930	const Instruction *DeadI,
931	const MemoryLocation &KillingLoc,
932	const MemoryLocation &DeadLoc,
933	int64_t &KillingOff, int64_t &DeadOff) {
934	// AliasAnalysis does not always account for loops. Limit overwrite checks
935	// to dependencies for which we can guarantee they are independent of any
936	// loops they are in.
937	if (!isGuaranteedLoopIndependent(Current: DeadI, KillingDef: KillingI, CurrentLoc: DeadLoc))
938	return OW_Unknown;
939
940	LocationSize KillingLocSize =
941	strengthenLocationSize(I: KillingI, Size: KillingLoc.Size);
942	const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
943	const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
944	const Value *DeadUndObj = getUnderlyingObject(V: DeadPtr);
945	const Value *KillingUndObj = getUnderlyingObject(V: KillingPtr);
946
947	// Check whether the killing store overwrites the whole object, in which
948	// case the size/offset of the dead store does not matter.
949	if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise() &&
950	isIdentifiedObject(V: KillingUndObj)) {
951	std::optional<TypeSize> KillingUndObjSize =
952	getPointerSize(V: KillingUndObj, DL, TLI, F: &F);
953	if (KillingUndObjSize && *KillingUndObjSize == KillingLocSize.getValue())
954	return OW_Complete;
955	}
956
957	// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
958	// get imprecise values here, though (except for unknown sizes).
959	if (!KillingLocSize.isPrecise() \|\| !DeadLoc.Size.isPrecise()) {
960	// In case no constant size is known, try to an IR values for the number
961	// of bytes written and check if they match.
962	const auto *KillingMemI = dyn_cast<MemIntrinsic>(Val: KillingI);
963	const auto *DeadMemI = dyn_cast<MemIntrinsic>(Val: DeadI);
964	if (KillingMemI && DeadMemI) {
965	const Value *KillingV = KillingMemI->getLength();
966	const Value *DeadV = DeadMemI->getLength();
967	if (KillingV == DeadV && BatchAA.isMustAlias(LocA: DeadLoc, LocB: KillingLoc))
968	return OW_Complete;
969	}
970
971	// Masked stores have imprecise locations, but we can reason about them
972	// to some extent.
973	return isMaskedStoreOverwrite(KillingI, DeadI, AA&: BatchAA);
974	}
975
976	const TypeSize KillingSize = KillingLocSize.getValue();
977	const TypeSize DeadSize = DeadLoc.Size.getValue();
978	// Bail on doing Size comparison which depends on AA for now
979	// TODO: Remove AnyScalable once Alias Analysis deal with scalable vectors
980	const bool AnyScalable =
981	DeadSize.isScalable() \|\| KillingLocSize.isScalable();
982
983	if (AnyScalable)
984	return OW_Unknown;
985	// Query the alias information
986	AliasResult AAR = BatchAA.alias(LocA: KillingLoc, LocB: DeadLoc);
987
988	// If the start pointers are the same, we just have to compare sizes to see if
989	// the killing store was larger than the dead store.
990	if (AAR == AliasResult::MustAlias) {
991	// Make sure that the KillingSize size is >= the DeadSize size.
992	if (KillingSize >= DeadSize)
993	return OW_Complete;
994	}
995
996	// If we hit a partial alias we may have a full overwrite
997	if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
998	int32_t Off = AAR.getOffset();
999	if (Off >= `0` && (uint64_t)Off + DeadSize <= KillingSize)
1000	return OW_Complete;
1001	}
1002
1003	// If we can't resolve the same pointers to the same object, then we can't
1004	// analyze them at all.
1005	if (DeadUndObj != KillingUndObj) {
1006	// Non aliasing stores to different objects don't overlap. Note that
1007	// if the killing store is known to overwrite whole object (out of
1008	// bounds access overwrites whole object as well) then it is assumed to
1009	// completely overwrite any store to the same object even if they don't
1010	// actually alias (see next check).
1011	if (AAR == AliasResult::NoAlias)
1012	return OW_None;
1013	return OW_Unknown;
1014	}
1015
1016	// Okay, we have stores to two completely different pointers. Try to
1017	// decompose the pointer into a "base + constant_offset" form. If the base
1018	// pointers are equal, then we can reason about the two stores.
1019	DeadOff = `0`;
1020	KillingOff = `0`;
1021	const Value *DeadBasePtr =
1022	GetPointerBaseWithConstantOffset(Ptr: DeadPtr, Offset&: DeadOff, DL);
1023	const Value *KillingBasePtr =
1024	GetPointerBaseWithConstantOffset(Ptr: KillingPtr, Offset&: KillingOff, DL);
1025
1026	// If the base pointers still differ, we have two completely different
1027	// stores.
1028	if (DeadBasePtr != KillingBasePtr)
1029	return OW_Unknown;
1030
1031	// The killing access completely overlaps the dead store if and only if
1032	// both start and end of the dead one is "inside" the killing one:
1033	// \|<->\|--dead--\|<->\|
1034	// \|-----killing------\|
1035	// Accesses may overlap if and only if start of one of them is "inside"
1036	// another one:
1037	// \|<->\|--dead--\|<-------->\|
1038	// \|-------killing--------\|
1039	// OR
1040	// \|-------dead-------\|
1041	// \|<->\|---killing---\|<----->\|
1042	//
1043	// We have to be careful here as Off is signed while .Size is unsigned.
1044
1045	// Check if the dead access starts "not before" the killing one.
1046	if (DeadOff >= KillingOff) {
1047	// If the dead access ends "not after" the killing access then the
1048	// dead one is completely overwritten by the killing one.
1049	if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
1050	return OW_Complete;
1051	// If start of the dead access is "before" end of the killing access
1052	// then accesses overlap.
1053	else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
1054	return OW_MaybePartial;
1055	}
1056	// If start of the killing access is "before" end of the dead access then
1057	// accesses overlap.
1058	else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
1059	return OW_MaybePartial;
1060	}
1061
1062	// Can reach here only if accesses are known not to overlap.
1063	return OW_None;
1064	}
1065
1066	bool isInvisibleToCallerAfterRet(const Value *V) {
1067	if (isa<AllocaInst>(Val: V))
1068	return true;
1069	auto I = InvisibleToCallerAfterRet.insert(KV: {V, false});
1070	if (I.second) {
1071	if (!isInvisibleToCallerOnUnwind(V)) {
1072	I.first ->second = false;
1073	} else if (isNoAliasCall(V)) {
1074	I.first ->second = !PointerMayBeCaptured(V, ReturnCaptures: true, StoreCaptures: false);
1075	}
1076	}
1077	return I.first ->second;
1078	}
1079
1080	bool isInvisibleToCallerOnUnwind(const Value *V) {
1081	bool RequiresNoCaptureBeforeUnwind;
1082	if (!isNotVisibleOnUnwind(Object: V, RequiresNoCaptureBeforeUnwind))
1083	return false;
1084	if (!RequiresNoCaptureBeforeUnwind)
1085	return true;
1086
1087	auto I = CapturedBeforeReturn.insert(KV: {V, true});
1088	if (I.second)
1089	// NOTE: This could be made more precise by PointerMayBeCapturedBefore
1090	// with the killing MemoryDef. But we refrain from doing so for now to
1091	// limit compile-time and this does not cause any changes to the number
1092	// of stores removed on a large test set in practice.
1093	I.first ->second = PointerMayBeCaptured(V, ReturnCaptures: false, StoreCaptures: true);
1094	return !I.first ->second;
1095	}
1096
1097	std::optional<MemoryLocation> getLocForWrite(Instruction I) const* {
1098	if (!I->mayWriteToMemory())
1099	return std::nullopt;
1100
1101	if (auto *CB = dyn_cast<CallBase>(Val: I))
1102	return MemoryLocation::getForDest(CI: CB, TLI);
1103
1104	return MemoryLocation::getOrNone(Inst: I);
1105	}
1106
1107	/// Assuming this instruction has a dead analyzable write, can we delete
1108	/// this instruction?
1109	bool isRemovable(Instruction *I) {
1110	assert(getLocForWrite(I) && "Must have analyzable write");
1111
1112	// Don't remove volatile/atomic stores.
1113	if (StoreInst *SI = dyn_cast<StoreInst>(Val: I))
1114	return SI->isUnordered();
1115
1116	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1117	// Don't remove volatile memory intrinsics.
1118	if (auto *MI = dyn_cast<MemIntrinsic>(Val: CB))
1119	return !MI->isVolatile();
1120
1121	// Never remove dead lifetime intrinsics, e.g. because they are followed
1122	// by a free.
1123	if (CB->isLifetimeStartOrEnd())
1124	return false;
1125
1126	return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
1127	!CB->isTerminator();
1128	}
1129
1130	return false;
1131	}
1132
1133	/// Returns true if \p UseInst completely overwrites \p DefLoc
1134	/// (stored by \p DefInst).
1135	bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
1136	Instruction *UseInst) {
1137	// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
1138	// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
1139	// MemoryDef.
1140	if (!UseInst->mayWriteToMemory())
1141	return false;
1142
1143	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1144	if (CB->onlyAccessesInaccessibleMemory())
1145	return false;
1146
1147	int64_t InstWriteOffset, DepWriteOffset;
1148	if (auto CC = getLocForWrite(I: UseInst))
1149	return isOverwrite(KillingI: UseInst, DeadI: DefInst, KillingLoc: *CC, DeadLoc: DefLoc, KillingOff&: InstWriteOffset,
1150	DeadOff&: DepWriteOffset) == OW_Complete;
1151	return false;
1152	}
1153
1154	/// Returns true if \p Def is not read before returning from the function.
1155	bool isWriteAtEndOfFunction(MemoryDef *Def) {
1156	LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
1157	<< *Def->getMemoryInst()
1158	<< ") is at the end the function \n");
1159
1160	auto MaybeLoc = getLocForWrite(I: Def->getMemoryInst());
1161	if (!MaybeLoc) {
1162	LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
1163	return false;
1164	}
1165
1166	SmallVector<MemoryAccess *, `4`> WorkList;
1167	SmallPtrSet<MemoryAccess *, `8`> Visited;
1168	auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
1169	if (!Visited.insert(Ptr: Acc).second)
1170	return;
1171	for (Use &U : Acc->uses())
1172	WorkList.push_back(Elt: cast<MemoryAccess>(Val: U.getUser()));
1173	};
1174	PushMemUses(Def);
1175	for (unsigned I = `0`; I < WorkList.size(); I++) {
1176	if (WorkList.size() >= MemorySSAScanLimit) {
1177	LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
1178	return false;
1179	}
1180
1181	MemoryAccess *UseAccess = WorkList [I];
1182	if (isa<MemoryPhi>(Val: UseAccess)) {
1183	// AliasAnalysis does not account for loops. Limit elimination to
1184	// candidates for which we can guarantee they always store to the same
1185	// memory location.
1186	if (!isGuaranteedLoopInvariant(Ptr: MaybeLoc ->Ptr))
1187	return false;
1188
1189	PushMemUses(cast<MemoryPhi>(Val: UseAccess));
1190	continue;
1191	}
1192	// TODO: Checking for aliasing is expensive. Consider reducing the amount
1193	// of times this is called and/or caching it.
1194	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1195	if (isReadClobber(DefLoc: *MaybeLoc, UseInst)) {
1196	LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
1197	return false;
1198	}
1199
1200	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess))
1201	PushMemUses(UseDef);
1202	}
1203	return true;
1204	}
1205
1206	/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
1207	/// pair with the MemoryLocation terminated by \p I and a boolean flag
1208	/// indicating whether \p I is a free-like call.
1209	std::optional<std::pair<MemoryLocation, bool>>
1210	getLocForTerminator(Instruction I) const* {
1211	uint64_t Len;
1212	Value *Ptr;
1213	if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
1214	m_Value(Ptr))))
1215	return {std::make_pair(x: MemoryLocation (Ptr, Len), y: false)};
1216
1217	if (auto *CB = dyn_cast<CallBase>(Val: I)) {
1218	if (Value *FreedOp = getFreedOperand(CB, TLI: &TLI))
1219	return {std::make_pair(x: MemoryLocation::getAfter(Ptr: FreedOp), y: true)};
1220	}
1221
1222	return std::nullopt;
1223	}
1224
1225	/// Returns true if \p I is a memory terminator instruction like
1226	/// llvm.lifetime.end or free.
1227	bool isMemTerminatorInst(Instruction I) const* {
1228	auto *CB = dyn_cast<CallBase>(Val: I);
1229	return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end \|\|
1230	getFreedOperand(CB, TLI: &TLI) != nullptr);
1231	}
1232
1233	/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
1234	/// instruction \p AccessI.
1235	bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
1236	Instruction *MaybeTerm) {
1237	std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
1238	getLocForTerminator(I: MaybeTerm);
1239
1240	if (!MaybeTermLoc)
1241	return false;
1242
1243	// If the terminator is a free-like call, all accesses to the underlying
1244	// object can be considered terminated.
1245	if (getUnderlyingObject(V: Loc.Ptr) !=
1246	getUnderlyingObject(V: MaybeTermLoc ->first.Ptr))
1247	return false;
1248
1249	auto TermLoc = MaybeTermLoc ->first;
1250	if (MaybeTermLoc ->second) {
1251	const Value *LocUO = getUnderlyingObject(V: Loc.Ptr);
1252	return BatchAA.isMustAlias(V1: TermLoc.Ptr, V2: LocUO);
1253	}
1254	int64_t InstWriteOffset = `0`;
1255	int64_t DepWriteOffset = `0`;
1256	return isOverwrite(KillingI: MaybeTerm, DeadI: AccessI, KillingLoc: TermLoc, DeadLoc: Loc, KillingOff&: InstWriteOffset,
1257	DeadOff&: DepWriteOffset) == OW_Complete;
1258	}
1259
1260	// Returns true if \p Use may read from \p DefLoc.
1261	bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
1262	if (isNoopIntrinsic(I: UseInst))
1263	return false;
1264
1265	// Monotonic or weaker atomic stores can be re-ordered and do not need to be
1266	// treated as read clobber.
1267	if (auto SI = dyn_cast<StoreInst>(Val: UseInst))
1268	return isStrongerThan(AO: SI->getOrdering(), Other: AtomicOrdering::Monotonic);
1269
1270	if (!UseInst->mayReadFromMemory())
1271	return false;
1272
1273	if (auto *CB = dyn_cast<CallBase>(Val: UseInst))
1274	if (CB->onlyAccessesInaccessibleMemory())
1275	return false;
1276
1277	return isRefSet(MRI: BatchAA.getModRefInfo(I: UseInst, OptLoc: DefLoc));
1278	}
1279
1280	/// Returns true if a dependency between \p Current and \p KillingDef is
1281	/// guaranteed to be loop invariant for the loops that they are in. Either
1282	/// because they are known to be in the same block, in the same loop level or
1283	/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
1284	/// during execution of the containing function.
1285	bool isGuaranteedLoopIndependent(const Instruction *Current,
1286	const Instruction *KillingDef,
1287	const MemoryLocation &CurrentLoc) {
1288	// If the dependency is within the same block or loop level (being careful
1289	// of irreducible loops), we know that AA will return a valid result for the
1290	// memory dependency. (Both at the function level, outside of any loop,
1291	// would also be valid but we currently disable that to limit compile time).
1292	if (Current->getParent() == KillingDef->getParent())
1293	return true;
1294	const Loop *CurrentLI = LI.getLoopFor(BB: Current->getParent());
1295	if (!ContainsIrreducibleLoops && CurrentLI &&
1296	CurrentLI == LI.getLoopFor(BB: KillingDef->getParent()))
1297	return true;
1298	// Otherwise check the memory location is invariant to any loops.
1299	return isGuaranteedLoopInvariant(Ptr: CurrentLoc.Ptr);
1300	}
1301
1302	/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1303	/// loop. In particular, this guarantees that it only references a single
1304	/// MemoryLocation during execution of the containing function.
1305	bool isGuaranteedLoopInvariant(const Value *Ptr) {
1306	Ptr = Ptr->stripPointerCasts();
1307	if (auto *GEP = dyn_cast<GEPOperator>(Val: Ptr))
1308	if (GEP->hasAllConstantIndices())
1309	Ptr = GEP->getPointerOperand()->stripPointerCasts();
1310
1311	if (auto *I = dyn_cast<Instruction>(Val: Ptr)) {
1312	return I->getParent()->isEntryBlock() \|\|
1313	(!ContainsIrreducibleLoops && !LI.getLoopFor(BB: I->getParent()));
1314	}
1315	return true;
1316	}
1317
1318	// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
1319	// with no read access between them or on any other path to a function exit
1320	// block if \p KillingLoc is not accessible after the function returns. If
1321	// there is no such MemoryDef, return std::nullopt. The returned value may not
1322	// (completely) overwrite \p KillingLoc. Currently we bail out when we
1323	// encounter an aliasing MemoryUse (read).
1324	std::optional<MemoryAccess *>
1325	getDomMemoryDef(MemoryDef KillingDef, MemoryAccess StartAccess,
1326	const MemoryLocation &KillingLoc, const Value *KillingUndObj,
1327	unsigned &ScanLimit, unsigned &WalkerStepLimit,
1328	bool IsMemTerm, unsigned &PartialLimit) {
1329	if (ScanLimit == `0` \|\| WalkerStepLimit == `0`) {
1330	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1331	return std::nullopt;
1332	}
1333
1334	MemoryAccess *Current = StartAccess;
1335	Instruction *KillingI = KillingDef->getMemoryInst();
1336	LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
1337
1338	// Only optimize defining access of KillingDef when directly starting at its
1339	// defining access. The defining access also must only access KillingLoc. At
1340	// the moment we only support instructions with a single write location, so
1341	// it should be sufficient to disable optimizations for instructions that
1342	// also read from memory.
1343	bool CanOptimize = OptimizeMemorySSA &&
1344	KillingDef->getDefiningAccess() == StartAccess &&
1345	!KillingI->mayReadFromMemory();
1346
1347	// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
1348	std::optional<MemoryLocation> CurrentLoc;
1349	for (;; Current = cast<MemoryDef>(Val: Current)->getDefiningAccess()) {
1350	LLVM_DEBUG({
1351	dbgs() << " visiting " << *Current;
1352	if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
1353	dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
1354	<< ")";
1355	dbgs() << "\n";
1356	});
1357
1358	// Reached TOP.
1359	if (MSSA.isLiveOnEntryDef(MA: Current)) {
1360	LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
1361	if (CanOptimize && Current != KillingDef->getDefiningAccess())
1362	// The first clobbering def is... none.
1363	KillingDef->setOptimized(Current);
1364	return std::nullopt;
1365	}
1366
1367	// Cost of a step. Accesses in the same block are more likely to be valid
1368	// candidates for elimination, hence consider them cheaper.
1369	unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
1370	? MemorySSASameBBStepCost
1371	: MemorySSAOtherBBStepCost;
1372	if (WalkerStepLimit <= StepCost) {
1373	LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
1374	return std::nullopt;
1375	}
1376	WalkerStepLimit -= StepCost;
1377
1378	// Return for MemoryPhis. They cannot be eliminated directly and the
1379	// caller is responsible for traversing them.
1380	if (isa<MemoryPhi>(Val: Current)) {
1381	LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
1382	return Current;
1383	}
1384
1385	// Below, check if CurrentDef is a valid candidate to be eliminated by
1386	// KillingDef. If it is not, check the next candidate.
1387	MemoryDef *CurrentDef = cast<MemoryDef>(Val: Current);
1388	Instruction *CurrentI = CurrentDef->getMemoryInst();
1389
1390	if (canSkipDef(D: CurrentDef, DefVisibleToCaller: !isInvisibleToCallerOnUnwind(V: KillingUndObj))) {
1391	CanOptimize = false;
1392	continue;
1393	}
1394
1395	// Before we try to remove anything, check for any extra throwing
1396	// instructions that block us from DSEing
1397	if (mayThrowBetween(KillingI, DeadI: CurrentI, KillingUndObj)) {
1398	LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
1399	return std::nullopt;
1400	}
1401
1402	// Check for anything that looks like it will be a barrier to further
1403	// removal
1404	if (isDSEBarrier(KillingUndObj, DeadI: CurrentI)) {
1405	LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
1406	return std::nullopt;
1407	}
1408
1409	// If Current is known to be on path that reads DefLoc or is a read
1410	// clobber, bail out, as the path is not profitable. We skip this check
1411	// for intrinsic calls, because the code knows how to handle memcpy
1412	// intrinsics.
1413	if (!isa<IntrinsicInst>(Val: CurrentI) && isReadClobber(DefLoc: KillingLoc, UseInst: CurrentI))
1414	return std::nullopt;
1415
1416	// Quick check if there are direct uses that are read-clobbers.
1417	if (any_of(Range: Current->uses(), P: [this, &KillingLoc, StartAccess](Use &U) {
1418	if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(Val: U.getUser()))
1419	return !MSSA.dominates(A: StartAccess, B: UseOrDef) &&
1420	isReadClobber(DefLoc: KillingLoc, UseInst: UseOrDef->getMemoryInst());
1421	return false;
1422	})) {
1423	LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
1424	return std::nullopt;
1425	}
1426
1427	// If Current does not have an analyzable write location or is not
1428	// removable, skip it.
1429	CurrentLoc = getLocForWrite(I: CurrentI);
1430	if (!CurrentLoc \|\| !isRemovable(I: CurrentI)) {
1431	CanOptimize = false;
1432	continue;
1433	}
1434
1435	// AliasAnalysis does not account for loops. Limit elimination to
1436	// candidates for which we can guarantee they always store to the same
1437	// memory location and not located in different loops.
1438	if (!isGuaranteedLoopIndependent(Current: CurrentI, KillingDef: KillingI, CurrentLoc: *CurrentLoc)) {
1439	LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
1440	CanOptimize = false;
1441	continue;
1442	}
1443
1444	if (IsMemTerm) {
1445	// If the killing def is a memory terminator (e.g. lifetime.end), check
1446	// the next candidate if the current Current does not write the same
1447	// underlying object as the terminator.
1448	if (!isMemTerminator(Loc: *CurrentLoc, AccessI: CurrentI, MaybeTerm: KillingI)) {
1449	CanOptimize = false;
1450	continue;
1451	}
1452	} else {
1453	int64_t KillingOffset = `0`;
1454	int64_t DeadOffset = `0`;
1455	auto OR = isOverwrite(KillingI, DeadI: CurrentI, KillingLoc, DeadLoc: *CurrentLoc,
1456	KillingOff&: KillingOffset, DeadOff&: DeadOffset);
1457	if (CanOptimize) {
1458	// CurrentDef is the earliest write clobber of KillingDef. Use it as
1459	// optimized access. Do not optimize if CurrentDef is already the
1460	// defining access of KillingDef.
1461	if (CurrentDef != KillingDef->getDefiningAccess() &&
1462	(OR == OW_Complete \|\| OR == OW_MaybePartial))
1463	KillingDef->setOptimized(CurrentDef);
1464
1465	// Once a may-aliasing def is encountered do not set an optimized
1466	// access.
1467	if (OR != OW_None)
1468	CanOptimize = false;
1469	}
1470
1471	// If Current does not write to the same object as KillingDef, check
1472	// the next candidate.
1473	if (OR == OW_Unknown \|\| OR == OW_None)
1474	continue;
1475	else if (OR == OW_MaybePartial) {
1476	// If KillingDef only partially overwrites Current, check the next
1477	// candidate if the partial step limit is exceeded. This aggressively
1478	// limits the number of candidates for partial store elimination,
1479	// which are less likely to be removable in the end.
1480	if (PartialLimit <= `1`) {
1481	WalkerStepLimit -= `1`;
1482	LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
1483	continue;
1484	}
1485	PartialLimit -= `1`;
1486	}
1487	}
1488	break;
1489	};
1490
1491	// Accesses to objects accessible after the function returns can only be
1492	// eliminated if the access is dead along all paths to the exit. Collect
1493	// the blocks with killing (=completely overwriting MemoryDefs) and check if
1494	// they cover all paths from MaybeDeadAccess to any function exit.
1495	SmallPtrSet<Instruction *, `16`> KillingDefs;
1496	KillingDefs.insert(Ptr: KillingDef->getMemoryInst());
1497	MemoryAccess *MaybeDeadAccess = Current;
1498	MemoryLocation MaybeDeadLoc = *CurrentLoc;
1499	Instruction *MaybeDeadI = cast<MemoryDef>(Val: MaybeDeadAccess)->getMemoryInst();
1500	LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
1501	<< *MaybeDeadI << ")\n");
1502
1503	SmallSetVector<MemoryAccess *, `32`> WorkList;
1504	auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
1505	for (Use &U : Acc->uses())
1506	WorkList.insert(X: cast<MemoryAccess>(Val: U.getUser()));
1507	};
1508	PushMemUses(MaybeDeadAccess);
1509
1510	// Check if DeadDef may be read.
1511	for (unsigned I = `0`; I < WorkList.size(); I++) {
1512	MemoryAccess *UseAccess = WorkList [I];
1513
1514	LLVM_DEBUG(dbgs() << " " << *UseAccess);
1515	// Bail out if the number of accesses to check exceeds the scan limit.
1516	if (ScanLimit < (WorkList.size() - I)) {
1517	LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
1518	return std::nullopt;
1519	}
1520	--ScanLimit;
1521	NumDomMemDefChecks ++;
1522
1523	if (isa<MemoryPhi>(Val: UseAccess)) {
1524	if (any_of(Range&: KillingDefs, P: [this, UseAccess](Instruction *KI) {
1525	return DT.properlyDominates(A: KI->getParent(),
1526	B: UseAccess->getBlock());
1527	})) {
1528	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
1529	continue;
1530	}
1531	LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
1532	PushMemUses(UseAccess);
1533	continue;
1534	}
1535
1536	Instruction *UseInst = cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst();
1537	LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
1538
1539	if (any_of(Range&: KillingDefs, P: [this, UseInst](Instruction *KI) {
1540	return DT.dominates(Def: KI, User: UseInst);
1541	})) {
1542	LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
1543	continue;
1544	}
1545
1546	// A memory terminator kills all preceeding MemoryDefs and all succeeding
1547	// MemoryAccesses. We do not have to check it's users.
1548	if (isMemTerminator(Loc: MaybeDeadLoc, AccessI: MaybeDeadI, MaybeTerm: UseInst)) {
1549	LLVM_DEBUG(
1550	dbgs()
1551	<< " ... skipping, memterminator invalidates following accesses\n");
1552	continue;
1553	}
1554
1555	if (isNoopIntrinsic(I: cast<MemoryUseOrDef>(Val: UseAccess)->getMemoryInst())) {
1556	LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
1557	PushMemUses(UseAccess);
1558	continue;
1559	}
1560
1561	if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj)) {
1562	LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
1563	return std::nullopt;
1564	}
1565
1566	// Uses which may read the original MemoryDef mean we cannot eliminate the
1567	// original MD. Stop walk.
1568	if (isReadClobber(DefLoc: MaybeDeadLoc, UseInst)) {
1569	LLVM_DEBUG(dbgs() << " ... found read clobber\n");
1570	return std::nullopt;
1571	}
1572
1573	// If this worklist walks back to the original memory access (and the
1574	// pointer is not guarenteed loop invariant) then we cannot assume that a
1575	// store kills itself.
1576	if (MaybeDeadAccess == UseAccess &&
1577	!isGuaranteedLoopInvariant(Ptr: MaybeDeadLoc.Ptr)) {
1578	LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
1579	return std::nullopt;
1580	}
1581	// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
1582	// if it reads the memory location.
1583	// TODO: It would probably be better to check for self-reads before
1584	// calling the function.
1585	if (KillingDef == UseAccess \|\| MaybeDeadAccess == UseAccess) {
1586	LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
1587	continue;
1588	}
1589
1590	// Check all uses for MemoryDefs, except for defs completely overwriting
1591	// the original location. Otherwise we have to check uses of all
1592	// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
1593	// miss cases like the following
1594	// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
1595	// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
1596	// Use(2) ; MayAlias 2 and* 1, loads [0, 3].*
1597	// (The Use points to the first* Def it may alias)*
1598	// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
1599	// stores [0,1]
1600	if (MemoryDef *UseDef = dyn_cast<MemoryDef>(Val: UseAccess)) {
1601	if (isCompleteOverwrite(DefLoc: MaybeDeadLoc, DefInst: MaybeDeadI, UseInst)) {
1602	BasicBlock *MaybeKillingBlock = UseInst->getParent();
1603	if (PostOrderNumbers.find(Val: MaybeKillingBlock)->second <
1604	PostOrderNumbers.find(Val: MaybeDeadAccess->getBlock())->second) {
1605	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1606	LLVM_DEBUG(dbgs()
1607	<< " ... found killing def " << *UseInst << "\n");
1608	KillingDefs.insert(Ptr: UseInst);
1609	}
1610	} else {
1611	LLVM_DEBUG(dbgs()
1612	<< " ... found preceeding def " << *UseInst << "\n");
1613	return std::nullopt;
1614	}
1615	} else
1616	PushMemUses(UseDef);
1617	}
1618	}
1619
1620	// For accesses to locations visible after the function returns, make sure
1621	// that the location is dead (=overwritten) along all paths from
1622	// MaybeDeadAccess to the exit.
1623	if (!isInvisibleToCallerAfterRet(V: KillingUndObj)) {
1624	SmallPtrSet<BasicBlock *, `16`> KillingBlocks;
1625	for (Instruction *KD : KillingDefs)
1626	KillingBlocks.insert(Ptr: KD->getParent());
1627	assert(!KillingBlocks.empty() &&
1628	"Expected at least a single killing block");
1629
1630	// Find the common post-dominator of all killing blocks.
1631	BasicBlock CommonPred = KillingBlocks.begin();
1632	for (BasicBlock *BB : llvm::drop_begin(RangeOrContainer&: KillingBlocks)) {
1633	if (!CommonPred)
1634	break;
1635	CommonPred = PDT.findNearestCommonDominator(A: CommonPred, B: BB);
1636	}
1637
1638	// If the common post-dominator does not post-dominate MaybeDeadAccess,
1639	// there is a path from MaybeDeadAccess to an exit not going through a
1640	// killing block.
1641	if (!PDT.dominates(A: CommonPred, B: MaybeDeadAccess->getBlock())) {
1642	if (!AnyUnreachableExit)
1643	return std::nullopt;
1644
1645	// Fall back to CFG scan starting at all non-unreachable roots if not
1646	// all paths to the exit go through CommonPred.
1647	CommonPred = nullptr;
1648	}
1649
1650	// If CommonPred itself is in the set of killing blocks, we're done.
1651	if (KillingBlocks.count(Ptr: CommonPred))
1652	return {MaybeDeadAccess};
1653
1654	SetVector<BasicBlock *> WorkList;
1655	// If CommonPred is null, there are multiple exits from the function.
1656	// They all have to be added to the worklist.
1657	if (CommonPred)
1658	WorkList.insert(X: CommonPred);
1659	else
1660	for (BasicBlock *R : PDT.roots()) {
1661	if (!isa<UnreachableInst>(Val: R->getTerminator()))
1662	WorkList.insert(X: R);
1663	}
1664
1665	NumCFGTries ++;
1666	// Check if all paths starting from an exit node go through one of the
1667	// killing blocks before reaching MaybeDeadAccess.
1668	for (unsigned I = `0`; I < WorkList.size(); I++) {
1669	NumCFGChecks ++;
1670	BasicBlock *Current = WorkList [I];
1671	if (KillingBlocks.count(Ptr: Current))
1672	continue;
1673	if (Current == MaybeDeadAccess->getBlock())
1674	return std::nullopt;
1675
1676	// MaybeDeadAccess is reachable from the entry, so we don't have to
1677	// explore unreachable blocks further.
1678	if (!DT.isReachableFromEntry(A: Current))
1679	continue;
1680
1681	for (BasicBlock *Pred : predecessors(BB: Current))
1682	WorkList.insert(X: Pred);
1683
1684	if (WorkList.size() >= MemorySSAPathCheckLimit)
1685	return std::nullopt;
1686	}
1687	NumCFGSuccess ++;
1688	}
1689
1690	// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
1691	// potentially dead.
1692	return {MaybeDeadAccess};
1693	}
1694
1695	// Delete dead memory defs
1696	void deleteDeadInstruction(Instruction *SI) {
1697	MemorySSAUpdater Updater(&MSSA);
1698	SmallVector<Instruction *, `32`> NowDeadInsts;
1699	NowDeadInsts.push_back(Elt: SI);
1700	--NumFastOther;
1701
1702	while (!NowDeadInsts.empty()) {
1703	Instruction *DeadInst = NowDeadInsts.pop_back_val();
1704	++NumFastOther;
1705
1706	// Try to preserve debug information attached to the dead instruction.
1707	salvageDebugInfo(I&: *DeadInst);
1708	salvageKnowledge(I: DeadInst);
1709
1710	// Remove the Instruction from MSSA.
1711	if (MemoryAccess *MA = MSSA.getMemoryAccess(I: DeadInst)) {
1712	if (MemoryDef *MD = dyn_cast<MemoryDef>(Val: MA)) {
1713	SkipStores.insert(Ptr: MD);
1714	if (auto *SI = dyn_cast<StoreInst>(Val: MD->getMemoryInst())) {
1715	if (SI->getValueOperand()->getType()->isPointerTy()) {
1716	const Value *UO = getUnderlyingObject(V: SI->getValueOperand());
1717	if (CapturedBeforeReturn.erase(Val: UO))
1718	ShouldIterateEndOfFunctionDSE = true;
1719	InvisibleToCallerAfterRet.erase(Val: UO);
1720	}
1721	}
1722	}
1723
1724	Updater.removeMemoryAccess(MA);
1725	}
1726
1727	auto I = IOLs.find(Key: DeadInst->getParent());
1728	if (I != IOLs.end())
1729	I->second.erase(Val: DeadInst);
1730	// Remove its operands
1731	for (Use &O : DeadInst->operands())
1732	if (Instruction *OpI = dyn_cast<Instruction>(Val&: O)) {
1733	O = nullptr;
1734	if (isInstructionTriviallyDead(I: OpI, TLI: &TLI))
1735	NowDeadInsts.push_back(Elt: OpI);
1736	}
1737
1738	EI.removeInstruction(I: DeadInst);
1739	DeadInst->eraseFromParent();
1740	}
1741	}
1742
1743	// Check for any extra throws between \p KillingI and \p DeadI that block
1744	// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
1745	// MemoryDef that may throw are handled during the walk from one def to the
1746	// next.
1747	bool mayThrowBetween(Instruction KillingI, Instruction DeadI,
1748	const Value *KillingUndObj) {
1749	// First see if we can ignore it by using the fact that KillingI is an
1750	// alloca/alloca like object that is not visible to the caller during
1751	// execution of the function.
1752	if (KillingUndObj && isInvisibleToCallerOnUnwind(V: KillingUndObj))
1753	return false;
1754
1755	if (KillingI->getParent() == DeadI->getParent())
1756	return ThrowingBlocks.count(Ptr: KillingI->getParent());
1757	return !ThrowingBlocks.empty();
1758	}
1759
1760	// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
1761	// instructions act as barriers:
1762	// A memory instruction that may throw and \p KillingI accesses a non-stack*
1763	// object.
1764	// Atomic stores stronger that monotonic.*
1765	bool isDSEBarrier(const Value KillingUndObj, Instruction DeadI) {
1766	// If DeadI may throw it acts as a barrier, unless we are to an
1767	// alloca/alloca like object that does not escape.
1768	if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(V: KillingUndObj))
1769	return true;
1770
1771	// If DeadI is an atomic load/store stronger than monotonic, do not try to
1772	// eliminate/reorder it.
1773	if (DeadI->isAtomic()) {
1774	if (auto *LI = dyn_cast<LoadInst>(Val: DeadI))
1775	return isStrongerThanMonotonic(AO: LI->getOrdering());
1776	if (auto *SI = dyn_cast<StoreInst>(Val: DeadI))
1777	return isStrongerThanMonotonic(AO: SI->getOrdering());
1778	if (auto *ARMW = dyn_cast<AtomicRMWInst>(Val: DeadI))
1779	return isStrongerThanMonotonic(AO: ARMW->getOrdering());
1780	if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(Val: DeadI))
1781	return isStrongerThanMonotonic(AO: CmpXchg->getSuccessOrdering()) \|\|
1782	isStrongerThanMonotonic(AO: CmpXchg->getFailureOrdering());
1783	llvm_unreachable("other instructions should be skipped in MemorySSA");
1784	}
1785	return false;
1786	}
1787
1788	/// Eliminate writes to objects that are not visible in the caller and are not
1789	/// accessed before returning from the function.
1790	bool eliminateDeadWritesAtEndOfFunction() {
1791	bool MadeChange = false;
1792	LLVM_DEBUG(
1793	dbgs()
1794	<< "Trying to eliminate MemoryDefs at the end of the function\n");
1795	do {
1796	ShouldIterateEndOfFunctionDSE = false;
1797	for (MemoryDef *Def : llvm::reverse(C&: MemDefs)) {
1798	if (SkipStores.contains(Ptr: Def))
1799	continue;
1800
1801	Instruction *DefI = Def->getMemoryInst();
1802	auto DefLoc = getLocForWrite(I: DefI);
1803	if (!DefLoc \|\| !isRemovable(I: DefI))
1804	continue;
1805
1806	// NOTE: Currently eliminating writes at the end of a function is
1807	// limited to MemoryDefs with a single underlying object, to save
1808	// compile-time. In practice it appears the case with multiple
1809	// underlying objects is very uncommon. If it turns out to be important,
1810	// we can use getUnderlyingObjects here instead.
1811	const Value *UO = getUnderlyingObject(V: DefLoc ->Ptr);
1812	if (!isInvisibleToCallerAfterRet(V: UO))
1813	continue;
1814
1815	if (isWriteAtEndOfFunction(Def)) {
1816	// See through pointer-to-pointer bitcasts
1817	LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
1818	"of the function\n");
1819	deleteDeadInstruction(SI: DefI);
1820	++NumFastStores;
1821	MadeChange = true;
1822	}
1823	}
1824	} while (ShouldIterateEndOfFunctionDSE);
1825	return MadeChange;
1826	}
1827
1828	/// If we have a zero initializing memset following a call to malloc,
1829	/// try folding it into a call to calloc.
1830	bool tryFoldIntoCalloc(MemoryDef Def, const* Value *DefUO) {
1831	Instruction *DefI = Def->getMemoryInst();
1832	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1833	if (!MemSet)
1834	// TODO: Could handle zero store to small allocation as well.
1835	return false;
1836	Constant *StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1837	if (!StoredConstant \|\| !StoredConstant->isNullValue())
1838	return false;
1839
1840	if (!isRemovable(I: DefI))
1841	// The memset might be volatile..
1842	return false;
1843
1844	if (F.hasFnAttribute(Attribute::SanitizeMemory) \|\|
1845	F.hasFnAttribute(Attribute::SanitizeAddress) \|\|
1846	F.hasFnAttribute(Attribute::SanitizeHWAddress) \|\|
1847	F.getName() == "calloc")
1848	return false;
1849	auto Malloc = const_cast<CallInst >(dyn_cast<CallInst>(Val: DefUO));
1850	if (!Malloc)
1851	return false;
1852	auto *InnerCallee = Malloc->getCalledFunction();
1853	if (!InnerCallee)
1854	return false;
1855	LibFunc Func;
1856	if (!TLI.getLibFunc(FDecl: *InnerCallee, F&: Func) \|\| !TLI.has(F: Func) \|\|
1857	Func != LibFunc_malloc)
1858	return false;
1859	// Gracefully handle malloc with unexpected memory attributes.
1860	auto *MallocDef = dyn_cast_or_null<MemoryDef>(Val: MSSA.getMemoryAccess(I: Malloc));
1861	if (!MallocDef)
1862	return false;
1863
1864	auto shouldCreateCalloc = [](CallInst Malloc, CallInst Memset) {
1865	// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
1866	// of malloc block
1867	auto *MallocBB = Malloc->getParent(),
1868	*MemsetBB = Memset->getParent();
1869	if (MallocBB == MemsetBB)
1870	return true;
1871	auto *Ptr = Memset->getArgOperand(i: `0`);
1872	auto *TI = MallocBB->getTerminator();
1873	ICmpInst::Predicate Pred;
1874	BasicBlock TrueBB, FalseBB;
1875	if (!match(V: TI, P: m_Br(C: m_ICmp(Pred, L: m_Specific(V: Ptr), R: m_Zero()), T&: TrueBB,
1876	F&: FalseBB)))
1877	return false;
1878	if (Pred != ICmpInst::ICMP_EQ \|\| MemsetBB != FalseBB)
1879	return false;
1880	return true;
1881	};
1882
1883	if (Malloc->getOperand(i_nocapture: `0`) != MemSet->getLength())
1884	return false;
1885	if (!shouldCreateCalloc(Malloc, MemSet) \|\|
1886	!DT.dominates(Def: Malloc, User: MemSet) \|\|
1887	!memoryIsNotModifiedBetween(FirstI: Malloc, SecondI: MemSet, AA&: BatchAA, DL, DT: &DT))
1888	return false;
1889	IRBuilder<> IRB(Malloc);
1890	Type *SizeTTy = Malloc->getArgOperand(i: `0`)->getType();
1891	auto *Calloc = emitCalloc(Num: ConstantInt::get(Ty: SizeTTy, V: `1`),
1892	Size: Malloc->getArgOperand(i: `0`), B&: IRB, TLI);
1893	if (!Calloc)
1894	return false;
1895	MemorySSAUpdater Updater(&MSSA);
1896	auto *NewAccess =
1897	Updater.createMemoryAccessAfter(I: cast<Instruction>(Val: Calloc), Definition: nullptr,
1898	InsertPt: MallocDef);
1899	auto *NewAccessMD = cast<MemoryDef>(Val: NewAccess);
1900	Updater.insertDef(Def: NewAccessMD, /RenameUses=/true);
1901	Updater.removeMemoryAccess(I: Malloc);
1902	Malloc->replaceAllUsesWith(V: Calloc);
1903	Malloc->eraseFromParent();
1904	return true;
1905	}
1906
1907	// Check if there is a dominating condition, that implies that the value
1908	// being stored in a ptr is already present in the ptr.
1909	bool dominatingConditionImpliesValue(MemoryDef *Def) {
1910	auto *StoreI = cast<StoreInst>(Val: Def->getMemoryInst());
1911	BasicBlock *StoreBB = StoreI->getParent();
1912	Value *StorePtr = StoreI->getPointerOperand();
1913	Value *StoreVal = StoreI->getValueOperand();
1914
1915	DomTreeNode *IDom = DT.getNode(BB: StoreBB)->getIDom();
1916	if (!IDom)
1917	return false;
1918
1919	auto *BI = dyn_cast<BranchInst>(Val: IDom->getBlock()->getTerminator());
1920	if (!BI \|\| !BI->isConditional())
1921	return false;
1922
1923	// In case both blocks are the same, it is not possible to determine
1924	// if optimization is possible. (We would not want to optimize a store
1925	// in the FalseBB if condition is true and vice versa.)
1926	if (BI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`))
1927	return false;
1928
1929	Instruction *ICmpL;
1930	ICmpInst::Predicate Pred;
1931	if (!match(V: BI->getCondition(),
1932	P: m_c_ICmp(Pred,
1933	L: m_CombineAnd(L: m_Load(Op: m_Specific(V: StorePtr)),
1934	R: m_Instruction(I&: ICmpL)),
1935	R: m_Specific(V: StoreVal))) \|\|
1936	!ICmpInst::isEquality(P: Pred))
1937	return false;
1938
1939	// In case the else blocks also branches to the if block or the other way
1940	// around it is not possible to determine if the optimization is possible.
1941	if (Pred == ICmpInst::ICMP_EQ &&
1942	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `0`)),
1943	BB: StoreBB))
1944	return false;
1945
1946	if (Pred == ICmpInst::ICMP_NE &&
1947	!DT.dominates(BBE: BasicBlockEdge (BI->getParent(), BI->getSuccessor(i: `1`)),
1948	BB: StoreBB))
1949	return false;
1950
1951	MemoryAccess *LoadAcc = MSSA.getMemoryAccess(I: ICmpL);
1952	MemoryAccess *ClobAcc =
1953	MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
1954
1955	return MSSA.dominates(A: ClobAcc, B: LoadAcc);
1956	}
1957
1958	/// \returns true if \p Def is a no-op store, either because it
1959	/// directly stores back a loaded value or stores zero to a calloced object.
1960	bool storeIsNoop(MemoryDef Def, const* Value *DefUO) {
1961	Instruction *DefI = Def->getMemoryInst();
1962	StoreInst *Store = dyn_cast<StoreInst>(Val: DefI);
1963	MemSetInst *MemSet = dyn_cast<MemSetInst>(Val: DefI);
1964	Constant StoredConstant = nullptr*;
1965	if (Store)
1966	StoredConstant = dyn_cast<Constant>(Val: Store->getOperand(i_nocapture: `0`));
1967	else if (MemSet)
1968	StoredConstant = dyn_cast<Constant>(Val: MemSet->getValue());
1969	else
1970	return false;
1971
1972	if (!isRemovable(I: DefI))
1973	return false;
1974
1975	if (StoredConstant) {
1976	Constant *InitC =
1977	getInitialValueOfAllocation(V: DefUO, TLI: &TLI, Ty: StoredConstant->getType());
1978	// If the clobbering access is LiveOnEntry, no instructions between them
1979	// can modify the memory location.
1980	if (InitC && InitC == StoredConstant)
1981	return MSSA.isLiveOnEntryDef(
1982	MA: MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA));
1983	}
1984
1985	if (!Store)
1986	return false;
1987
1988	if (dominatingConditionImpliesValue(Def))
1989	return true;
1990
1991	if (auto *LoadI = dyn_cast<LoadInst>(Val: Store->getOperand(i_nocapture: `0`))) {
1992	if (LoadI->getPointerOperand() == Store->getOperand(i_nocapture: `1`)) {
1993	// Get the defining access for the load.
1994	auto *LoadAccess = MSSA.getMemoryAccess(I: LoadI)->getDefiningAccess();
1995	// Fast path: the defining accesses are the same.
1996	if (LoadAccess == Def->getDefiningAccess())
1997	return true;
1998
1999	// Look through phi accesses. Recursively scan all phi accesses by
2000	// adding them to a worklist. Bail when we run into a memory def that
2001	// does not match LoadAccess.
2002	SetVector<MemoryAccess *> ToCheck;
2003	MemoryAccess *Current =
2004	MSSA.getWalker()->getClobberingMemoryAccess(Def, AA&: BatchAA);
2005	// We don't want to bail when we run into the store memory def. But,
2006	// the phi access may point to it. So, pretend like we've already
2007	// checked it.
2008	ToCheck.insert(X: Def);
2009	ToCheck.insert(X: Current);
2010	// Start at current (1) to simulate already having checked Def.
2011	for (unsigned I = `1`; I < ToCheck.size(); ++I) {
2012	Current = ToCheck [I];
2013	if (auto PhiAccess = dyn_cast<MemoryPhi>(Val: Current)) {
2014	// Check all the operands.
2015	for (auto &Use : PhiAccess->incoming_values())
2016	ToCheck.insert(X: cast<MemoryAccess>(Val: &Use));
2017	continue;
2018	}
2019
2020	// If we found a memory def, bail. This happens when we have an
2021	// unrelated write in between an otherwise noop store.
2022	assert(isa<MemoryDef>(Current) &&
2023	"Only MemoryDefs should reach here.");
2024	// TODO: Skip no alias MemoryDefs that have no aliasing reads.
2025	// We are searching for the definition of the store's destination.
2026	// So, if that is the same definition as the load, then this is a
2027	// noop. Otherwise, fail.
2028	if (LoadAccess != Current)
2029	return false;
2030	}
2031	return true;
2032	}
2033	}
2034
2035	return false;
2036	}
2037
2038	bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
2039	bool Changed = false;
2040	for (auto OI : IOL) {
2041	Instruction *DeadI = OI.first;
2042	MemoryLocation Loc = *getLocForWrite(I: DeadI);
2043	assert(isRemovable(DeadI) && "Expect only removable instruction");
2044
2045	const Value *Ptr = Loc.Ptr->stripPointerCasts();
2046	int64_t DeadStart = `0`;
2047	uint64_t DeadSize = Loc.Size.getValue();
2048	GetPointerBaseWithConstantOffset(Ptr, Offset&: DeadStart, DL);
2049	OverlapIntervalsTy &IntervalMap = OI.second;
2050	Changed \|= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2051	if (IntervalMap.empty())
2052	continue;
2053	Changed \|= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2054	}
2055	return Changed;
2056	}
2057
2058	/// Eliminates writes to locations where the value that is being written
2059	/// is already stored at the same location.
2060	bool eliminateRedundantStoresOfExistingValues() {
2061	bool MadeChange = false;
2062	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
2063	"already existing value\n");
2064	for (auto *Def : MemDefs) {
2065	if (SkipStores.contains(Ptr: Def) \|\| MSSA.isLiveOnEntryDef(MA: Def))
2066	continue;
2067
2068	Instruction *DefInst = Def->getMemoryInst();
2069	auto MaybeDefLoc = getLocForWrite(I: DefInst);
2070	if (!MaybeDefLoc \|\| !isRemovable(I: DefInst))
2071	continue;
2072
2073	MemoryDef *UpperDef;
2074	// To conserve compile-time, we avoid walking to the next clobbering def.
2075	// Instead, we just try to get the optimized access, if it exists. DSE
2076	// will try to optimize defs during the earlier traversal.
2077	if (Def->isOptimized())
2078	UpperDef = dyn_cast<MemoryDef>(Val: Def->getOptimized());
2079	else
2080	UpperDef = dyn_cast<MemoryDef>(Val: Def->getDefiningAccess());
2081	if (!UpperDef \|\| MSSA.isLiveOnEntryDef(MA: UpperDef))
2082	continue;
2083
2084	Instruction *UpperInst = UpperDef->getMemoryInst();
2085	auto IsRedundantStore = [&]() {
2086	if (DefInst->isIdenticalTo(I: UpperInst))
2087	return true;
2088	if (auto *MemSetI = dyn_cast<MemSetInst>(Val: UpperInst)) {
2089	if (auto *SI = dyn_cast<StoreInst>(Val: DefInst)) {
2090	// MemSetInst must have a write location.
2091	MemoryLocation UpperLoc = *getLocForWrite(I: UpperInst);
2092	int64_t InstWriteOffset = `0`;
2093	int64_t DepWriteOffset = `0`;
2094	auto OR = isOverwrite(KillingI: UpperInst, DeadI: DefInst, KillingLoc: UpperLoc, DeadLoc: *MaybeDefLoc,
2095	KillingOff&: InstWriteOffset, DeadOff&: DepWriteOffset);
2096	Value *StoredByte = isBytewiseValue(V: SI->getValueOperand(), DL);
2097	return StoredByte && StoredByte == MemSetI->getOperand(i_nocapture: `1`) &&
2098	OR == OW_Complete;
2099	}
2100	}
2101	return false;
2102	};
2103
2104	if (!IsRedundantStore() \|\| isReadClobber(DefLoc: *MaybeDefLoc, UseInst: DefInst))
2105	continue;
2106	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
2107	<< `'\n'`);
2108	deleteDeadInstruction(SI: DefInst);
2109	NumRedundantStores ++;
2110	MadeChange = true;
2111	}
2112	return MadeChange;
2113	}
2114	};
2115
2116	static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2117	DominatorTree &DT, PostDominatorTree &PDT,
2118	const TargetLibraryInfo &TLI,
2119	const LoopInfo &LI) {
2120	bool MadeChange = false;
2121
2122	DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2123	// For each store:
2124	for (unsigned I = `0`; I < State.MemDefs.size(); I++) {
2125	MemoryDef *KillingDef = State.MemDefs [I];
2126	if (State.SkipStores.count(Ptr: KillingDef))
2127	continue;
2128	Instruction *KillingI = KillingDef->getMemoryInst();
2129
2130	std::optional<MemoryLocation> MaybeKillingLoc;
2131	if (State.isMemTerminatorInst(I: KillingI)) {
2132	if (auto KillingLoc = State.getLocForTerminator(I: KillingI))
2133	MaybeKillingLoc = KillingLoc ->first;
2134	} else {
2135	MaybeKillingLoc = State.getLocForWrite(I: KillingI);
2136	}
2137
2138	if (!MaybeKillingLoc) {
2139	LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
2140	<< *KillingI << "\n");
2141	continue;
2142	}
2143	MemoryLocation KillingLoc = *MaybeKillingLoc;
2144	assert(KillingLoc.Ptr && "KillingLoc should not be null");
2145	const Value *KillingUndObj = getUnderlyingObject(V: KillingLoc.Ptr);
2146	LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
2147	<< KillingDef << " (" << KillingI << ")\n");
2148
2149	unsigned ScanLimit = MemorySSAScanLimit;
2150	unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
2151	unsigned PartialLimit = MemorySSAPartialStoreLimit;
2152	// Worklist of MemoryAccesses that may be killed by KillingDef.
2153	SmallSetVector<MemoryAccess *, `8`> ToCheck;
2154	ToCheck.insert(X: KillingDef->getDefiningAccess());
2155
2156	bool Shortend = false;
2157	bool IsMemTerm = State.isMemTerminatorInst(I: KillingI);
2158	// Check if MemoryAccesses in the worklist are killed by KillingDef.
2159	for (unsigned I = `0`; I < ToCheck.size(); I++) {
2160	MemoryAccess *Current = ToCheck [I];
2161	if (State.SkipStores.count(Ptr: Current))
2162	continue;
2163
2164	std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
2165	KillingDef, StartAccess: Current, KillingLoc, KillingUndObj, ScanLimit,
2166	WalkerStepLimit, IsMemTerm, PartialLimit);
2167
2168	if (!MaybeDeadAccess) {
2169	LLVM_DEBUG(dbgs() << " finished walk\n");
2170	continue;
2171	}
2172
2173	MemoryAccess DeadAccess = MaybeDeadAccess;
2174	LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
2175	if (isa<MemoryPhi>(Val: DeadAccess)) {
2176	LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
2177	for (Value *V : cast<MemoryPhi>(Val: DeadAccess)->incoming_values()) {
2178	MemoryAccess *IncomingAccess = cast<MemoryAccess>(Val: V);
2179	BasicBlock *IncomingBlock = IncomingAccess->getBlock();
2180	BasicBlock *PhiBlock = DeadAccess->getBlock();
2181
2182	// We only consider incoming MemoryAccesses that come before the
2183	// MemoryPhi. Otherwise we could discover candidates that do not
2184	// strictly dominate our starting def.
2185	if (State.PostOrderNumbers [IncomingBlock] >
2186	State.PostOrderNumbers [PhiBlock])
2187	ToCheck.insert(X: IncomingAccess);
2188	}
2189	continue;
2190	}
2191	auto *DeadDefAccess = cast<MemoryDef>(Val: DeadAccess);
2192	Instruction *DeadI = DeadDefAccess->getMemoryInst();
2193	LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
2194	ToCheck.insert(X: DeadDefAccess->getDefiningAccess());
2195	NumGetDomMemoryDefPassed ++;
2196
2197	if (!DebugCounter::shouldExecute(CounterName: MemorySSACounter))
2198	continue;
2199
2200	MemoryLocation DeadLoc = *State.getLocForWrite(I: DeadI);
2201
2202	if (IsMemTerm) {
2203	const Value *DeadUndObj = getUnderlyingObject(V: DeadLoc.Ptr);
2204	if (KillingUndObj != DeadUndObj)
2205	continue;
2206	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2207	<< "\n KILLER: " << *KillingI << `'\n'`);
2208	State.deleteDeadInstruction(SI: DeadI);
2209	++NumFastStores;
2210	MadeChange = true;
2211	} else {
2212	// Check if DeadI overwrites KillingI.
2213	int64_t KillingOffset = `0`;
2214	int64_t DeadOffset = `0`;
2215	OverwriteResult OR = State.isOverwrite(
2216	KillingI, DeadI, KillingLoc, DeadLoc, KillingOff&: KillingOffset, DeadOff&: DeadOffset);
2217	if (OR == OW_MaybePartial) {
2218	auto Iter = State.IOLs.insert(
2219	KV: std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
2220	x: DeadI->getParent(), y: InstOverlapIntervalsTy ()));
2221	auto &IOL = Iter.first->second;
2222	OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOff: KillingOffset,
2223	DeadOff: DeadOffset, DeadI, IOL);
2224	}
2225
2226	if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
2227	auto *DeadSI = dyn_cast<StoreInst>(Val: DeadI);
2228	auto *KillingSI = dyn_cast<StoreInst>(Val: KillingI);
2229	// We are re-using tryToMergePartialOverlappingStores, which requires
2230	// DeadSI to dominate KillingSI.
2231	// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
2232	if (DeadSI && KillingSI && DT.dominates(Def: DeadSI, User: KillingSI)) {
2233	if (Constant *Merged = tryToMergePartialOverlappingStores(
2234	KillingI: KillingSI, DeadI: DeadSI, KillingOffset, DeadOffset, DL: State.DL,
2235	AA&: State.BatchAA, DT: &DT)) {
2236
2237	// Update stored value of earlier store to merged constant.
2238	DeadSI->setOperand(i_nocapture: `0`, Val_nocapture: Merged);
2239	++NumModifiedStores;
2240	MadeChange = true;
2241
2242	Shortend = true;
2243	// Remove killing store and remove any outstanding overlap
2244	// intervals for the updated store.
2245	State.deleteDeadInstruction(SI: KillingSI);
2246	auto I = State.IOLs.find(Key: DeadSI->getParent());
2247	if (I != State.IOLs.end())
2248	I->second.erase(Val: DeadSI);
2249	break;
2250	}
2251	}
2252	}
2253
2254	if (OR == OW_Complete) {
2255	LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
2256	<< "\n KILLER: " << *KillingI << `'\n'`);
2257	State.deleteDeadInstruction(SI: DeadI);
2258	++NumFastStores;
2259	MadeChange = true;
2260	}
2261	}
2262	}
2263
2264	// Check if the store is a no-op.
2265	if (!Shortend && State.storeIsNoop(Def: KillingDef, DefUO: KillingUndObj)) {
2266	LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
2267	<< `'\n'`);
2268	State.deleteDeadInstruction(SI: KillingI);
2269	NumRedundantStores ++;
2270	MadeChange = true;
2271	continue;
2272	}
2273
2274	// Can we form a calloc from a memset/malloc pair?
2275	if (!Shortend && State.tryFoldIntoCalloc(Def: KillingDef, DefUO: KillingUndObj)) {
2276	LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
2277	<< " DEAD: " << *KillingI << `'\n'`);
2278	State.deleteDeadInstruction(SI: KillingI);
2279	MadeChange = true;
2280	continue;
2281	}
2282	}
2283
2284	if (EnablePartialOverwriteTracking)
2285	for (auto &KV : State.IOLs)
2286	MadeChange \|= State.removePartiallyOverlappedStores(IOL&: KV.second);
2287
2288	MadeChange \|= State.eliminateRedundantStoresOfExistingValues();
2289	MadeChange \|= State.eliminateDeadWritesAtEndOfFunction();
2290	return MadeChange;
2291	}
2292	} // end anonymous namespace
2293
2294	//===----------------------------------------------------------------------===//
2295	// DSE Pass
2296	//===----------------------------------------------------------------------===//
2297	PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
2298	AliasAnalysis &AA = AM.getResult<AAManager>(IR&: F);
2299	const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(IR&: F);
2300	DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(IR&: F);
2301	MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(IR&: F).getMSSA();
2302	PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(IR&: F);
2303	LoopInfo &LI = AM.getResult<LoopAnalysis>(IR&: F);
2304
2305	bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2306
2307	#ifdef LLVM_ENABLE_STATS
2308	if (AreStatisticsEnabled())
2309	for (auto &I : instructions(F))
2310	NumRemainingStores += isa<StoreInst>(Val: &I);
2311	#endif
2312
2313	if (!Changed)
2314	return PreservedAnalyses::all();
2315
2316	PreservedAnalyses PA;
2317	PA.preserveSet<CFGAnalyses>();
2318	PA.preserve<MemorySSAAnalysis>();
2319	PA.preserve<LoopAnalysis>();
2320	return PA;
2321	}
2322

source code of llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp