LoopAccessAnalysis.cpp source code [llvm/lib/Analysis/LoopAccessAnalysis.cpp]

1	//===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// The implementation for the loop memory dependence that was originally
10	// developed for the loop vectorizer.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "llvm/Analysis/LoopAccessAnalysis.h"
15	#include "llvm/ADT/APInt.h"
16	#include "llvm/ADT/DenseMap.h"
17	#include "llvm/ADT/EquivalenceClasses.h"
18	#include "llvm/ADT/PointerIntPair.h"
19	#include "llvm/ADT/STLExtras.h"
20	#include "llvm/ADT/SetVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/SmallSet.h"
23	#include "llvm/ADT/SmallVector.h"
24	#include "llvm/Analysis/AliasAnalysis.h"
25	#include "llvm/Analysis/AliasSetTracker.h"
26	#include "llvm/Analysis/LoopAnalysisManager.h"
27	#include "llvm/Analysis/LoopInfo.h"
28	#include "llvm/Analysis/LoopIterator.h"
29	#include "llvm/Analysis/MemoryLocation.h"
30	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
31	#include "llvm/Analysis/ScalarEvolution.h"
32	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
33	#include "llvm/Analysis/TargetLibraryInfo.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/Analysis/VectorUtils.h"
36	#include "llvm/IR/BasicBlock.h"
37	#include "llvm/IR/Constants.h"
38	#include "llvm/IR/DataLayout.h"
39	#include "llvm/IR/DebugLoc.h"
40	#include "llvm/IR/DerivedTypes.h"
41	#include "llvm/IR/DiagnosticInfo.h"
42	#include "llvm/IR/Dominators.h"
43	#include "llvm/IR/Function.h"
44	#include "llvm/IR/GetElementPtrTypeIterator.h"
45	#include "llvm/IR/InstrTypes.h"
46	#include "llvm/IR/Instruction.h"
47	#include "llvm/IR/Instructions.h"
48	#include "llvm/IR/Operator.h"
49	#include "llvm/IR/PassManager.h"
50	#include "llvm/IR/PatternMatch.h"
51	#include "llvm/IR/Type.h"
52	#include "llvm/IR/Value.h"
53	#include "llvm/IR/ValueHandle.h"
54	#include "llvm/Support/Casting.h"
55	#include "llvm/Support/CommandLine.h"
56	#include "llvm/Support/Debug.h"
57	#include "llvm/Support/ErrorHandling.h"
58	#include "llvm/Support/raw_ostream.h"
59	#include <algorithm>
60	#include <cassert>
61	#include <cstdint>
62	#include <iterator>
63	#include <utility>
64	#include <variant>
65	#include <vector>
66
67	using namespace llvm;
68	using namespace llvm::PatternMatch;
69
70	#define DEBUG_TYPE "loop-accesses"
71
72	static cl::opt<unsigned, true>
73	VectorizationFactor("force-vector-width", cl::Hidden,
74	cl::desc ("Sets the SIMD width. Zero is autoselect."),
75	cl::location(L&: VectorizerParams::VectorizationFactor));
76	unsigned VectorizerParams::VectorizationFactor;
77
78	static cl::opt<unsigned, true>
79	VectorizationInterleave("force-vector-interleave", cl::Hidden,
80	cl::desc ("Sets the vectorization interleave count. "
81	"Zero is autoselect."),
82	cl::location(
83	L&: VectorizerParams::VectorizationInterleave));
84	unsigned VectorizerParams::VectorizationInterleave;
85
86	static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
87	"runtime-memory-check-threshold", cl::Hidden,
88	cl::desc ("When performing memory disambiguation checks at runtime do not "
89	"generate more than this number of comparisons (default = 8)."),
90	cl::location(L&: VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(Val: `8`));
91	unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
92
93	/// The maximum iterations used to merge memory checks
94	static cl::opt<unsigned> MemoryCheckMergeThreshold(
95	"memory-check-merge-threshold", cl::Hidden,
96	cl::desc ("Maximum number of comparisons done when trying to merge "
97	"runtime memory checks. (default = 100)"),
98	cl::init(Val: `100`));
99
100	/// Maximum SIMD width.
101	const unsigned VectorizerParams::MaxVectorWidth = `64`;
102
103	/// We collect dependences up to this threshold.
104	static cl::opt<unsigned>
105	MaxDependences("max-dependences", cl::Hidden,
106	cl::desc ("Maximum number of dependences collected by "
107	"loop-access analysis (default = 100)"),
108	cl::init(Val: `100`));
109
110	/// This enables versioning on the strides of symbolically striding memory
111	/// accesses in code like the following.
112	/// for (i = 0; i < N; ++i)
113	/// A[i Stride1] += B[i * Stride2] ...*
114	///
115	/// Will be roughly translated to
116	/// if (Stride1 == 1 && Stride2 == 1) {
117	/// for (i = 0; i < N; i+=4)
118	/// A[i:i+3] += ...
119	/// } else
120	/// ...
121	static cl::opt<bool> EnableMemAccessVersioning(
122	"enable-mem-access-versioning", cl::init(Val: true), cl::Hidden,
123	cl::desc ("Enable symbolic stride memory access versioning"));
124
125	/// Enable store-to-load forwarding conflict detection. This option can
126	/// be disabled for correctness testing.
127	static cl::opt<bool> EnableForwardingConflictDetection(
128	"store-to-load-forwarding-conflict-detection", cl::Hidden,
129	cl::desc ("Enable conflict detection in loop-access analysis"),
130	cl::init(Val: true));
131
132	static cl::opt<unsigned> MaxForkedSCEVDepth(
133	"max-forked-scev-depth", cl::Hidden,
134	cl::desc ("Maximum recursion depth when finding forked SCEVs (default = 5)"),
135	cl::init(Val: `5`));
136
137	static cl::opt<bool> SpeculateUnitStride(
138	"laa-speculate-unit-stride", cl::Hidden,
139	cl::desc ("Speculate that non-constant strides are unit in LAA"),
140	cl::init(Val: true));
141
142	static cl::opt<bool, true> HoistRuntimeChecks(
143	"hoist-runtime-checks", cl::Hidden,
144	cl::desc (
145	"Hoist inner loop runtime memory checks to outer loop if possible"),
146	cl::location(L&: VectorizerParams::HoistRuntimeChecks), cl::init(Val: true));
147	bool VectorizerParams::HoistRuntimeChecks;
148
149	bool VectorizerParams::isInterleaveForced() {
150	return ::VectorizationInterleave.getNumOccurrences() > `0`;
151	}
152
153	const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
154	const DenseMap<Value , const* SCEV *> &PtrToStride,
155	Value *Ptr) {
156	const SCEV *OrigSCEV = PSE.getSCEV(V: Ptr);
157
158	// If there is an entry in the map return the SCEV of the pointer with the
159	// symbolic stride replaced by one.
160	DenseMap<Value , const* SCEV *>::const_iterator SI = PtrToStride.find(Val: Ptr);
161	if (SI == PtrToStride.end())
162	// For a non-symbolic stride, just return the original expression.
163	return OrigSCEV;
164
165	const SCEV *StrideSCEV = SI ->second;
166	// Note: This assert is both overly strong and overly weak. The actual
167	// invariant here is that StrideSCEV should be loop invariant. The only
168	// such invariant strides we happen to speculate right now are unknowns
169	// and thus this is a reasonable proxy of the actual invariant.
170	assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
171
172	ScalarEvolution *SE = PSE.getSE();
173	const auto *CT = SE->getOne(Ty: StrideSCEV->getType());
174	PSE.addPredicate(Pred: *SE->getEqualPredicate(LHS: StrideSCEV, RHS: CT));
175	auto *Expr = PSE.getSCEV(V: Ptr);
176
177	LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
178	<< " by: " << *Expr << "\n");
179	return Expr;
180	}
181
182	RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
183	unsigned Index, RuntimePointerChecking &RtCheck)
184	: High(RtCheck.Pointers [Index].End), Low(RtCheck.Pointers [Index].Start),
185	AddressSpace(RtCheck.Pointers [Index]
186	.PointerValue ->getType()
187	->getPointerAddressSpace()),
188	NeedsFreeze(RtCheck.Pointers [Index].NeedsFreeze) {
189	Members.push_back(Elt: Index);
190	}
191
192	/// Calculate Start and End points of memory access.
193	/// Let's assume A is the first access and B is a memory access on N-th loop
194	/// iteration. Then B is calculated as:
195	/// B = A + StepN .*
196	/// Step value may be positive or negative.
197	/// N is a calculated back-edge taken count:
198	/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
199	/// Start and End points are calculated in the following way:
200	/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
201	/// where SizeOfElt is the size of single memory access in bytes.
202	///
203	/// There is no conflict when the intervals are disjoint:
204	/// NoConflict = (P2.Start >= P1.End) \|\| (P1.Start >= P2.End)
205	void RuntimePointerChecking::insert(Loop Lp, Value Ptr, const SCEV *PtrExpr,
206	Type AccessTy, bool* WritePtr,
207	unsigned DepSetId, unsigned ASId,
208	PredicatedScalarEvolution &PSE,
209	bool NeedsFreeze) {
210	ScalarEvolution *SE = PSE.getSE();
211
212	const SCEV *ScStart;
213	const SCEV *ScEnd;
214
215	if (SE->isLoopInvariant(S: PtrExpr, L: Lp)) {
216	ScStart = ScEnd = PtrExpr;
217	} else {
218	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: PtrExpr);
219	assert(AR && "Invalid addrec expression");
220	const SCEV *Ex = PSE.getBackedgeTakenCount();
221
222	ScStart = AR->getStart();
223	ScEnd = AR->evaluateAtIteration(It: Ex, SE&: *SE);
224	const SCEV Step = AR->getStepRecurrence(SE&: SE);
225
226	// For expressions with negative step, the upper bound is ScStart and the
227	// lower bound is ScEnd.
228	if (const auto *CStep = dyn_cast<SCEVConstant>(Val: Step)) {
229	if (CStep->getValue()->isNegative())
230	std::swap(a&: ScStart, b&: ScEnd);
231	} else {
232	// Fallback case: the step is not constant, but we can still
233	// get the upper and lower bounds of the interval by using min/max
234	// expressions.
235	ScStart = SE->getUMinExpr(LHS: ScStart, RHS: ScEnd);
236	ScEnd = SE->getUMaxExpr(LHS: AR->getStart(), RHS: ScEnd);
237	}
238	}
239	assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
240	assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
241
242	// Add the size of the pointed element to ScEnd.
243	auto &DL = Lp->getHeader()->getModule()->getDataLayout();
244	Type *IdxTy = DL.getIndexType(PtrTy: Ptr->getType());
245	const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IntTy: IdxTy, StoreTy: AccessTy);
246	ScEnd = SE->getAddExpr(LHS: ScEnd, RHS: EltSizeSCEV);
247
248	Pointers.emplace_back(Args&: Ptr, Args&: ScStart, Args&: ScEnd, Args&: WritePtr, Args&: DepSetId, Args&: ASId, Args&: PtrExpr,
249	Args&: NeedsFreeze);
250	}
251
252	void RuntimePointerChecking::tryToCreateDiffCheck(
253	const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
254	if (!CanUseDiffCheck)
255	return;
256
257	// If either group contains multiple different pointers, bail out.
258	// TODO: Support multiple pointers by using the minimum or maximum pointer,
259	// depending on src & sink.
260	if (CGI.Members.size() != `1` \|\| CGJ.Members.size() != `1`) {
261	CanUseDiffCheck = false;
262	return;
263	}
264
265	PointerInfo *Src = &Pointers [CGI.Members [`0`]];
266	PointerInfo *Sink = &Pointers [CGJ.Members [`0`]];
267
268	// If either pointer is read and written, multiple checks may be needed. Bail
269	// out.
270	if (!DC.getOrderForAccess(Ptr: Src->PointerValue, IsWrite: !Src->IsWritePtr).empty() \|\|
271	!DC.getOrderForAccess(Ptr: Sink->PointerValue, IsWrite: !Sink->IsWritePtr).empty()) {
272	CanUseDiffCheck = false;
273	return;
274	}
275
276	ArrayRef<unsigned> AccSrc =
277	DC.getOrderForAccess(Ptr: Src->PointerValue, IsWrite: Src->IsWritePtr);
278	ArrayRef<unsigned> AccSink =
279	DC.getOrderForAccess(Ptr: Sink->PointerValue, IsWrite: Sink->IsWritePtr);
280	// If either pointer is accessed multiple times, there may not be a clear
281	// src/sink relation. Bail out for now.
282	if (AccSrc.size() != `1` \|\| AccSink.size() != `1`) {
283	CanUseDiffCheck = false;
284	return;
285	}
286	// If the sink is accessed before src, swap src/sink.
287	if (AccSink [`0`] < AccSrc [`0`])
288	std::swap(a&: Src, b&: Sink);
289
290	auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Val: Src->Expr);
291	auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Val: Sink->Expr);
292	if (!SrcAR \|\| !SinkAR \|\| SrcAR->getLoop() != DC.getInnermostLoop() \|\|
293	SinkAR->getLoop() != DC.getInnermostLoop()) {
294	CanUseDiffCheck = false;
295	return;
296	}
297
298	SmallVector<Instruction *, `4`> SrcInsts =
299	DC.getInstructionsForAccess(Ptr: Src->PointerValue, isWrite: Src->IsWritePtr);
300	SmallVector<Instruction *, `4`> SinkInsts =
301	DC.getInstructionsForAccess(Ptr: Sink->PointerValue, isWrite: Sink->IsWritePtr);
302	Type *SrcTy = getLoadStoreType(I: SrcInsts [`0`]);
303	Type *DstTy = getLoadStoreType(I: SinkInsts [`0`]);
304	if (isa<ScalableVectorType>(Val: SrcTy) \|\| isa<ScalableVectorType>(Val: DstTy)) {
305	CanUseDiffCheck = false;
306	return;
307	}
308	const DataLayout &DL =
309	SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
310	unsigned AllocSize =
311	std::max(a: DL.getTypeAllocSize(Ty: SrcTy), b: DL.getTypeAllocSize(Ty: DstTy));
312
313	// Only matching constant steps matching the AllocSize are supported at the
314	// moment. This simplifies the difference computation. Can be extended in the
315	// future.
316	auto Step = dyn_cast<SCEVConstant>(Val: SinkAR->getStepRecurrence(SE&: SE));
317	if (!Step \|\| Step != SrcAR->getStepRecurrence(SE&: *SE) \|\|
318	Step->getAPInt().abs() != AllocSize) {
319	CanUseDiffCheck = false;
320	return;
321	}
322
323	IntegerType *IntTy =
324	IntegerType::get(C&: Src->PointerValue ->getContext(),
325	NumBits: DL.getPointerSizeInBits(AS: CGI.AddressSpace));
326
327	// When counting down, the dependence distance needs to be swapped.
328	if (Step->getValue()->isNegative())
329	std::swap(a&: SinkAR, b&: SrcAR);
330
331	const SCEV *SinkStartInt = SE->getPtrToIntExpr(Op: SinkAR->getStart(), Ty: IntTy);
332	const SCEV *SrcStartInt = SE->getPtrToIntExpr(Op: SrcAR->getStart(), Ty: IntTy);
333	if (isa<SCEVCouldNotCompute>(Val: SinkStartInt) \|\|
334	isa<SCEVCouldNotCompute>(Val: SrcStartInt)) {
335	CanUseDiffCheck = false;
336	return;
337	}
338
339	const Loop *InnerLoop = SrcAR->getLoop();
340	// If the start values for both Src and Sink also vary according to an outer
341	// loop, then it's probably better to avoid creating diff checks because
342	// they may not be hoisted. We should instead let llvm::addRuntimeChecks
343	// do the expanded full range overlap checks, which can be hoisted.
344	if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
345	isa<SCEVAddRecExpr>(Val: SinkStartInt) && isa<SCEVAddRecExpr>(Val: SrcStartInt)) {
346	auto *SrcStartAR = cast<SCEVAddRecExpr>(Val: SrcStartInt);
347	auto *SinkStartAR = cast<SCEVAddRecExpr>(Val: SinkStartInt);
348	const Loop *StartARLoop = SrcStartAR->getLoop();
349	if (StartARLoop == SinkStartAR->getLoop() &&
350	StartARLoop == InnerLoop->getParentLoop() &&
351	// If the diff check would already be loop invariant (due to the
352	// recurrences being the same), then we prefer to keep the diff checks
353	// because they are cheaper.
354	SrcStartAR->getStepRecurrence(SE&: *SE) !=
355	SinkStartAR->getStepRecurrence(SE&: *SE)) {
356	LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
357	"cannot be hoisted out of the outer loop\n");
358	CanUseDiffCheck = false;
359	return;
360	}
361	}
362
363	LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
364	<< "SrcStart: " << *SrcStartInt << `'\n'`
365	<< "SinkStartInt: " << *SinkStartInt << `'\n'`);
366	DiffChecks.emplace_back(Args&: SrcStartInt, Args&: SinkStartInt, Args&: AllocSize,
367	Args: Src->NeedsFreeze \|\| Sink->NeedsFreeze);
368	}
369
370	SmallVector<RuntimePointerCheck, `4`> RuntimePointerChecking::generateChecks() {
371	SmallVector<RuntimePointerCheck, `4`> Checks;
372
373	for (unsigned I = `0`; I < CheckingGroups.size(); ++I) {
374	for (unsigned J = I + `1`; J < CheckingGroups.size(); ++J) {
375	const RuntimeCheckingPtrGroup &CGI = CheckingGroups [I];
376	const RuntimeCheckingPtrGroup &CGJ = CheckingGroups [J];
377
378	if (needsChecking(M: CGI, N: CGJ)) {
379	tryToCreateDiffCheck(CGI, CGJ);
380	Checks.push_back(Elt: std::make_pair(x: &CGI, y: &CGJ));
381	}
382	}
383	}
384	return Checks;
385	}
386
387	void RuntimePointerChecking::generateChecks(
388	MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
389	assert(Checks.empty() && "Checks is not empty");
390	groupChecks(DepCands, UseDependencies);
391	Checks = generateChecks();
392	}
393
394	bool RuntimePointerChecking::needsChecking(
395	const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
396	for (unsigned I = `0`, EI = M.Members.size(); EI != I; ++I)
397	for (unsigned J = `0`, EJ = N.Members.size(); EJ != J; ++J)
398	if (needsChecking(I: M.Members [I], J: N.Members [J]))
399	return true;
400	return false;
401	}
402
403	/// Compare \p I and \p J and return the minimum.
404	/// Return nullptr in case we couldn't find an answer.
405	static const SCEV getMinFromExprs(const* SCEV I, const* SCEV *J,
406	ScalarEvolution *SE) {
407	const SCEV *Diff = SE->getMinusSCEV(LHS: J, RHS: I);
408	const SCEVConstant C = dyn_cast<const* SCEVConstant>(Val: Diff);
409
410	if (!C)
411	return nullptr;
412	if (C->getValue()->isNegative())
413	return J;
414	return I;
415	}
416
417	bool RuntimeCheckingPtrGroup::addPointer(unsigned Index,
418	RuntimePointerChecking &RtCheck) {
419	return addPointer(
420	Index, Start: RtCheck.Pointers [Index].Start, End: RtCheck.Pointers [Index].End,
421	AS: RtCheck.Pointers [Index].PointerValue ->getType()->getPointerAddressSpace(),
422	NeedsFreeze: RtCheck.Pointers [Index].NeedsFreeze, SE&: *RtCheck.SE);
423	}
424
425	bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
426	const SCEV End, unsigned* AS,
427	bool NeedsFreeze,
428	ScalarEvolution &SE) {
429	assert(AddressSpace == AS &&
430	"all pointers in a checking group must be in the same address space");
431
432	// Compare the starts and ends with the known minimum and maximum
433	// of this set. We need to know how we compare against the min/max
434	// of the set in order to be able to emit memchecks.
435	const SCEV *Min0 = getMinFromExprs(I: Start, J: Low, SE: &SE);
436	if (!Min0)
437	return false;
438
439	const SCEV *Min1 = getMinFromExprs(I: End, J: High, SE: &SE);
440	if (!Min1)
441	return false;
442
443	// Update the low bound expression if we've found a new min value.
444	if (Min0 == Start)
445	Low = Start;
446
447	// Update the high bound expression if we've found a new max value.
448	if (Min1 != End)
449	High = End;
450
451	Members.push_back(Elt: Index);
452	this->NeedsFreeze \|= NeedsFreeze;
453	return true;
454	}
455
456	void RuntimePointerChecking::groupChecks(
457	MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
458	// We build the groups from dependency candidates equivalence classes
459	// because:
460	// - We know that pointers in the same equivalence class share
461	// the same underlying object and therefore there is a chance
462	// that we can compare pointers
463	// - We wouldn't be able to merge two pointers for which we need
464	// to emit a memcheck. The classes in DepCands are already
465	// conveniently built such that no two pointers in the same
466	// class need checking against each other.
467
468	// We use the following (greedy) algorithm to construct the groups
469	// For every pointer in the equivalence class:
470	// For each existing group:
471	// - if the difference between this pointer and the min/max bounds
472	// of the group is a constant, then make the pointer part of the
473	// group and update the min/max bounds of that group as required.
474
475	CheckingGroups.clear();
476
477	// If we need to check two pointers to the same underlying object
478	// with a non-constant difference, we shouldn't perform any pointer
479	// grouping with those pointers. This is because we can easily get
480	// into cases where the resulting check would return false, even when
481	// the accesses are safe.
482	//
483	// The following example shows this:
484	// for (i = 0; i < 1000; ++i)
485	// a[5000 + i m] = a[i] + a[i + 9000]*
486	//
487	// Here grouping gives a check of (5000, 5000 + 1000 m) against*
488	// (0, 10000) which is always false. However, if m is 1, there is no
489	// dependence. Not grouping the checks for a[i] and a[i + 9000] allows
490	// us to perform an accurate check in this case.
491	//
492	// The above case requires that we have an UnknownDependence between
493	// accesses to the same underlying object. This cannot happen unless
494	// FoundNonConstantDistanceDependence is set, and therefore UseDependencies
495	// is also false. In this case we will use the fallback path and create
496	// separate checking groups for all pointers.
497
498	// If we don't have the dependency partitions, construct a new
499	// checking pointer group for each pointer. This is also required
500	// for correctness, because in this case we can have checking between
501	// pointers to the same underlying object.
502	if (!UseDependencies) {
503	for (unsigned I = `0`; I < Pointers.size(); ++I)
504	CheckingGroups.push_back(Elt: RuntimeCheckingPtrGroup (I, *this));
505	return;
506	}
507
508	unsigned TotalComparisons = `0`;
509
510	DenseMap<Value , SmallVector<unsigned*>> PositionMap;
511	for (unsigned Index = `0`; Index < Pointers.size(); ++Index) {
512	auto Iter = PositionMap.insert(KV: {Pointers [Index].PointerValue, {}});
513	Iter.first ->second.push_back(Elt: Index);
514	}
515
516	// We need to keep track of what pointers we've already seen so we
517	// don't process them twice.
518	SmallSet<unsigned, `2`> Seen;
519
520	// Go through all equivalence classes, get the "pointer check groups"
521	// and add them to the overall solution. We use the order in which accesses
522	// appear in 'Pointers' to enforce determinism.
523	for (unsigned I = `0`; I < Pointers.size(); ++I) {
524	// We've seen this pointer before, and therefore already processed
525	// its equivalence class.
526	if (Seen.count(V: I))
527	continue;
528
529	MemoryDepChecker::MemAccessInfo Access(Pointers [I].PointerValue,
530	Pointers [I].IsWritePtr);
531
532	SmallVector<RuntimeCheckingPtrGroup, `2`> Groups;
533	auto LeaderI = DepCands.findValue(V: DepCands.getLeaderValue(V: Access));
534
535	// Because DepCands is constructed by visiting accesses in the order in
536	// which they appear in alias sets (which is deterministic) and the
537	// iteration order within an equivalence class member is only dependent on
538	// the order in which unions and insertions are performed on the
539	// equivalence class, the iteration order is deterministic.
540	for (auto MI = DepCands.member_begin(I: LeaderI), ME = DepCands.member_end();
541	MI != ME; ++MI) {
542	auto PointerI = PositionMap.find(Val: MI ->getPointer());
543	assert(PointerI != PositionMap.end() &&
544	"pointer in equivalence class not found in PositionMap");
545	for (unsigned Pointer : PointerI ->second) {
546	bool Merged = false;
547	// Mark this pointer as seen.
548	Seen.insert(V: Pointer);
549
550	// Go through all the existing sets and see if we can find one
551	// which can include this pointer.
552	for (RuntimeCheckingPtrGroup &Group : Groups) {
553	// Don't perform more than a certain amount of comparisons.
554	// This should limit the cost of grouping the pointers to something
555	// reasonable. If we do end up hitting this threshold, the algorithm
556	// will create separate groups for all remaining pointers.
557	if (TotalComparisons > MemoryCheckMergeThreshold)
558	break;
559
560	TotalComparisons++;
561
562	if (Group.addPointer(Index: Pointer, RtCheck&: *this)) {
563	Merged = true;
564	break;
565	}
566	}
567
568	if (!Merged)
569	// We couldn't add this pointer to any existing set or the threshold
570	// for the number of comparisons has been reached. Create a new group
571	// to hold the current pointer.
572	Groups.push_back(Elt: RuntimeCheckingPtrGroup (Pointer, *this));
573	}
574	}
575
576	// We've computed the grouped checks for this partition.
577	// Save the results and continue with the next one.
578	llvm::copy(Range&: Groups, Out: std::back_inserter(x&: CheckingGroups));
579	}
580	}
581
582	bool RuntimePointerChecking::arePointersInSamePartition(
583	const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
584	unsigned PtrIdx2) {
585	return (PtrToPartition [PtrIdx1] != -`1` &&
586	PtrToPartition [PtrIdx1] == PtrToPartition [PtrIdx2]);
587	}
588
589	bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
590	const PointerInfo &PointerI = Pointers [I];
591	const PointerInfo &PointerJ = Pointers [J];
592
593	// No need to check if two readonly pointers intersect.
594	if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
595	return false;
596
597	// Only need to check pointers between two different dependency sets.
598	if (PointerI.DependencySetId == PointerJ.DependencySetId)
599	return false;
600
601	// Only need to check pointers in the same alias set.
602	if (PointerI.AliasSetId != PointerJ.AliasSetId)
603	return false;
604
605	return true;
606	}
607
608	void RuntimePointerChecking::printChecks(
609	raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
610	unsigned Depth) const {
611	unsigned N = `0`;
612	for (const auto &Check : Checks) {
613	const auto &First = Check.first->Members, &Second = Check.second->Members;
614
615	OS.indent(NumSpaces: Depth) << "Check " << N++ << ":\n";
616
617	OS.indent(NumSpaces: Depth + `2`) << "Comparing group (" << Check.first << "):\n";
618	for (unsigned K = `0`; K < First.size(); ++K)
619	OS.indent(NumSpaces: Depth + `2`) << *Pointers [First [K]].PointerValue << "\n";
620
621	OS.indent(NumSpaces: Depth + `2`) << "Against group (" << Check.second << "):\n";
622	for (unsigned K = `0`; K < Second.size(); ++K)
623	OS.indent(NumSpaces: Depth + `2`) << *Pointers [Second [K]].PointerValue << "\n";
624	}
625	}
626
627	void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
628
629	OS.indent(NumSpaces: Depth) << "Run-time memory checks:\n";
630	printChecks(OS, Checks, Depth);
631
632	OS.indent(NumSpaces: Depth) << "Grouped accesses:\n";
633	for (unsigned I = `0`; I < CheckingGroups.size(); ++I) {
634	const auto &CG = CheckingGroups [I];
635
636	OS.indent(NumSpaces: Depth + `2`) << "Group " << &CG << ":\n";
637	OS.indent(NumSpaces: Depth + `4`) << "(Low: " << CG.Low << " High: " << CG.High
638	<< ")\n";
639	for (unsigned J = `0`; J < CG.Members.size(); ++J) {
640	OS.indent(NumSpaces: Depth + `6`) << "Member: " << *Pointers [CG.Members [J]].Expr
641	<< "\n";
642	}
643	}
644	}
645
646	namespace {
647
648	/// Analyses memory accesses in a loop.
649	///
650	/// Checks whether run time pointer checks are needed and builds sets for data
651	/// dependence checking.
652	class AccessAnalysis {
653	public:
654	/// Read or write access location.
655	typedef PointerIntPair<Value , `1`, bool*> MemAccessInfo;
656	typedef SmallVector<MemAccessInfo, `8`> MemAccessInfoList;
657
658	AccessAnalysis(Loop TheLoop, AAResults AA, LoopInfo *LI,
659	MemoryDepChecker::DepCandidates &DA,
660	PredicatedScalarEvolution &PSE,
661	SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
662	: TheLoop(TheLoop), BAA (*AA), AST (BAA), LI(LI), DepCands(DA), PSE(PSE),
663	LoopAliasScopes(LoopAliasScopes) {
664	// We're analyzing dependences across loop iterations.
665	BAA.enableCrossIterationMode();
666	}
667
668	/// Register a load and whether it is only read from.
669	void addLoad(MemoryLocation &Loc, Type AccessTy, bool* IsReadOnly) {
670	Value Ptr = const_cast<Value >(Loc.Ptr);
671	AST.add(Loc: adjustLoc(Loc));
672	Accesses [MemAccessInfo (Ptr, false)].insert(X: AccessTy);
673	if (IsReadOnly)
674	ReadOnlyPtr.insert(Ptr);
675	}
676
677	/// Register a store.
678	void addStore(MemoryLocation &Loc, Type *AccessTy) {
679	Value Ptr = const_cast<Value >(Loc.Ptr);
680	AST.add(Loc: adjustLoc(Loc));
681	Accesses [MemAccessInfo (Ptr, true)].insert(X: AccessTy);
682	}
683
684	/// Check if we can emit a run-time no-alias check for \p Access.
685	///
686	/// Returns true if we can emit a run-time no alias check for \p Access.
687	/// If we can check this access, this also adds it to a dependence set and
688	/// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
689	/// we will attempt to use additional run-time checks in order to get
690	/// the bounds of the pointer.
691	bool createCheckForAccess(RuntimePointerChecking &RtCheck,
692	MemAccessInfo Access, Type *AccessTy,
693	const DenseMap<Value , const* SCEV *> &Strides,
694	DenseMap<Value , unsigned*> &DepSetId,
695	Loop TheLoop, unsigned* &RunningDepId,
696	unsigned ASId, bool ShouldCheckStride, bool Assume);
697
698	/// Check whether we can check the pointers at runtime for
699	/// non-intersection.
700	///
701	/// Returns true if we need no check or if we do and we can generate them
702	/// (i.e. the pointers have computable bounds).
703	bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
704	Loop TheLoop, const* DenseMap<Value , const* SCEV *> &Strides,
705	Value &UncomputablePtr, bool* ShouldCheckWrap = false);
706
707	/// Goes over all memory accesses, checks whether a RT check is needed
708	/// and builds sets of dependent accesses.
709	void buildDependenceSets() {
710	processMemAccesses();
711	}
712
713	/// Initial processing of memory accesses determined that we need to
714	/// perform dependency checking.
715	///
716	/// Note that this can later be cleared if we retry memcheck analysis without
717	/// dependency checking (i.e. FoundNonConstantDistanceDependence).
718	bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
719
720	/// We decided that no dependence analysis would be used. Reset the state.
721	void resetDepChecks(MemoryDepChecker &DepChecker) {
722	CheckDeps.clear();
723	DepChecker.clearDependences();
724	}
725
726	MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
727
728	const DenseMap<Value , SmallVector<const* Value *, `16`>> &
729	getUnderlyingObjects() {
730	return UnderlyingObjects;
731	}
732
733	private:
734	typedef MapVector<MemAccessInfo, SmallSetVector<Type *, `1`>> PtrAccessMap;
735
736	/// Adjust the MemoryLocation so that it represents accesses to this
737	/// location across all iterations, rather than a single one.
738	MemoryLocation adjustLoc(MemoryLocation Loc) const {
739	// The accessed location varies within the loop, but remains within the
740	// underlying object.
741	Loc.Size = LocationSize::beforeOrAfterPointer();
742	Loc.AATags.Scope = adjustAliasScopeList(ScopeList: Loc.AATags.Scope);
743	Loc.AATags.NoAlias = adjustAliasScopeList(ScopeList: Loc.AATags.NoAlias);
744	return Loc;
745	}
746
747	/// Drop alias scopes that are only valid within a single loop iteration.
748	MDNode adjustAliasScopeList(MDNode ScopeList) const {
749	if (!ScopeList)
750	return nullptr;
751
752	// For the sake of simplicity, drop the whole scope list if any scope is
753	// iteration-local.
754	if (any_of(Range: ScopeList->operands(), P: [&](Metadata *Scope) {
755	return LoopAliasScopes.contains(Ptr: cast<MDNode>(Val: Scope));
756	}))
757	return nullptr;
758
759	return ScopeList;
760	}
761
762	/// Go over all memory access and check whether runtime pointer checks
763	/// are needed and build sets of dependency check candidates.
764	void processMemAccesses();
765
766	/// Map of all accesses. Values are the types used to access memory pointed to
767	/// by the pointer.
768	PtrAccessMap Accesses;
769
770	/// The loop being checked.
771	const Loop *TheLoop;
772
773	/// List of accesses that need a further dependence check.
774	MemAccessInfoList CheckDeps;
775
776	/// Set of pointers that are read only.
777	SmallPtrSet<Value*, `16`> ReadOnlyPtr;
778
779	/// Batched alias analysis results.
780	BatchAAResults BAA;
781
782	/// An alias set tracker to partition the access set by underlying object and
783	//intrinsic property (such as TBAA metadata).
784	AliasSetTracker AST;
785
786	LoopInfo *LI;
787
788	/// Sets of potentially dependent accesses - members of one set share an
789	/// underlying pointer. The set "CheckDeps" identfies which sets really need a
790	/// dependence check.
791	MemoryDepChecker::DepCandidates &DepCands;
792
793	/// Initial processing of memory accesses determined that we may need
794	/// to add memchecks. Perform the analysis to determine the necessary checks.
795	///
796	/// Note that, this is different from isDependencyCheckNeeded. When we retry
797	/// memcheck analysis without dependency checking
798	/// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
799	/// cleared while this remains set if we have potentially dependent accesses.
800	bool IsRTCheckAnalysisNeeded = false;
801
802	/// The SCEV predicate containing all the SCEV-related assumptions.
803	PredicatedScalarEvolution &PSE;
804
805	DenseMap<Value , SmallVector<const* Value *, `16`>> UnderlyingObjects;
806
807	/// Alias scopes that are declared inside the loop, and as such not valid
808	/// across iterations.
809	SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
810	};
811
812	} // end anonymous namespace
813
814	/// Check whether a pointer can participate in a runtime bounds check.
815	/// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
816	/// by adding run-time checks (overflow checks) if necessary.
817	static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
818	const SCEV PtrScev, Loop L, bool Assume) {
819	// The bounds for loop-invariant pointer is trivial.
820	if (PSE.getSE()->isLoopInvariant(S: PtrScev, L))
821	return true;
822
823	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: PtrScev);
824
825	if (!AR && Assume)
826	AR = PSE.getAsAddRec(V: Ptr);
827
828	if (!AR)
829	return false;
830
831	return AR->isAffine();
832	}
833
834	/// Check whether a pointer address cannot wrap.
835	static bool isNoWrap(PredicatedScalarEvolution &PSE,
836	const DenseMap<Value , const* SCEV > &Strides, Value Ptr, Type *AccessTy,
837	Loop *L) {
838	const SCEV *PtrScev = PSE.getSCEV(V: Ptr);
839	if (PSE.getSE()->isLoopInvariant(S: PtrScev, L))
840	return true;
841
842	int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, Lp: L, StridesMap: Strides).value_or(u: `0`);
843	if (Stride == `1` \|\| PSE.hasNoOverflow(V: Ptr, Flags: SCEVWrapPredicate::IncrementNUSW))
844	return true;
845
846	return false;
847	}
848
849	static void visitPointers(Value StartPtr, const* Loop &InnermostLoop,
850	function_ref<void(Value *)> AddPointer) {
851	SmallPtrSet<Value *, `8`> Visited;
852	SmallVector<Value *> WorkList;
853	WorkList.push_back(Elt: StartPtr);
854
855	while (!WorkList.empty()) {
856	Value *Ptr = WorkList.pop_back_val();
857	if (!Visited.insert(Ptr).second)
858	continue;
859	auto *PN = dyn_cast<PHINode>(Val: Ptr);
860	// SCEV does not look through non-header PHIs inside the loop. Such phis
861	// can be analyzed by adding separate accesses for each incoming pointer
862	// value.
863	if (PN && InnermostLoop.contains(BB: PN->getParent()) &&
864	PN->getParent() != InnermostLoop.getHeader()) {
865	for (const Use &Inc : PN->incoming_values())
866	WorkList.push_back(Elt: Inc);
867	} else
868	AddPointer (Ptr);
869	}
870	}
871
872	// Walk back through the IR for a pointer, looking for a select like the
873	// following:
874	//
875	// %offset = select i1 %cmp, i64 %a, i64 %b
876	// %addr = getelementptr double, double %base, i64 %offset*
877	// %ld = load double, double %addr, align 8*
878	//
879	// We won't be able to form a single SCEVAddRecExpr from this since the
880	// address for each loop iteration depends on %cmp. We could potentially
881	// produce multiple valid SCEVAddRecExprs, though, and check all of them for
882	// memory safety/aliasing if needed.
883	//
884	// If we encounter some IR we don't yet handle, or something obviously fine
885	// like a constant, then we just add the SCEV for that term to the list passed
886	// in by the caller. If we have a node that may potentially yield a valid
887	// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
888	// ourselves before adding to the list.
889	static void findForkedSCEVs(
890	ScalarEvolution SE, const* Loop L, Value Ptr,
891	SmallVectorImpl<PointerIntPair<const SCEV , `1`, bool*>> &ScevList,
892	unsigned Depth) {
893	// If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
894	// we've exceeded our limit on recursion, just return whatever we have
895	// regardless of whether it can be used for a forked pointer or not, along
896	// with an indication of whether it might be a poison or undef value.
897	const SCEV *Scev = SE->getSCEV(V: Ptr);
898	if (isa<SCEVAddRecExpr>(Val: Scev) \|\| L->isLoopInvariant(V: Ptr) \|\|
899	!isa<Instruction>(Val: Ptr) \|\| Depth == `0`) {
900	ScevList.emplace_back(Args&: Scev, Args: !isGuaranteedNotToBeUndefOrPoison(V: Ptr));
901	return;
902	}
903
904	Depth--;
905
906	auto UndefPoisonCheck = [](PointerIntPair<const SCEV , `1`, bool*> S) {
907	return get<`1`>(Pair: S);
908	};
909
910	auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV L, const* SCEV *R) {
911	switch (Opcode) {
912	case Instruction::Add:
913	return SE->getAddExpr(LHS: L, RHS: R);
914	case Instruction::Sub:
915	return SE->getMinusSCEV(LHS: L, RHS: R);
916	default:
917	llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
918	}
919	};
920
921	Instruction *I = cast<Instruction>(Val: Ptr);
922	unsigned Opcode = I->getOpcode();
923	switch (Opcode) {
924	case Instruction::GetElementPtr: {
925	GetElementPtrInst *GEP = cast<GetElementPtrInst>(Val: I);
926	Type *SourceTy = GEP->getSourceElementType();
927	// We only handle base + single offset GEPs here for now.
928	// Not dealing with preexisting gathers yet, so no vectors.
929	if (I->getNumOperands() != `2` \|\| SourceTy->isVectorTy()) {
930	ScevList.emplace_back(Args&: Scev, Args: !isGuaranteedNotToBeUndefOrPoison(V: GEP));
931	break;
932	}
933	SmallVector<PointerIntPair<const SCEV , `1`, bool*>, `2`> BaseScevs;
934	SmallVector<PointerIntPair<const SCEV , `1`, bool*>, `2`> OffsetScevs;
935	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `0`), ScevList&: BaseScevs, Depth);
936	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `1`), ScevList&: OffsetScevs, Depth);
937
938	// See if we need to freeze our fork...
939	bool NeedsFreeze = any_of(Range&: BaseScevs, P: UndefPoisonCheck) \|\|
940	any_of(Range&: OffsetScevs, P: UndefPoisonCheck);
941
942	// Check that we only have a single fork, on either the base or the offset.
943	// Copy the SCEV across for the one without a fork in order to generate
944	// the full SCEV for both sides of the GEP.
945	if (OffsetScevs.size() == `2` && BaseScevs.size() == `1`)
946	BaseScevs.push_back(Elt: BaseScevs [`0`]);
947	else if (BaseScevs.size() == `2` && OffsetScevs.size() == `1`)
948	OffsetScevs.push_back(Elt: OffsetScevs [`0`]);
949	else {
950	ScevList.emplace_back(Args&: Scev, Args&: NeedsFreeze);
951	break;
952	}
953
954	// Find the pointer type we need to extend to.
955	Type *IntPtrTy = SE->getEffectiveSCEVType(
956	Ty: SE->getSCEV(V: GEP->getPointerOperand())->getType());
957
958	// Find the size of the type being pointed to. We only have a single
959	// index term (guarded above) so we don't need to index into arrays or
960	// structures, just get the size of the scalar value.
961	const SCEV *Size = SE->getSizeOfExpr(IntTy: IntPtrTy, AllocTy: SourceTy);
962
963	// Scale up the offsets by the size of the type, then add to the bases.
964	const SCEV *Scaled1 = SE->getMulExpr(
965	LHS: Size, RHS: SE->getTruncateOrSignExtend(V: get<`0`>(Pair: OffsetScevs [`0`]), Ty: IntPtrTy));
966	const SCEV *Scaled2 = SE->getMulExpr(
967	LHS: Size, RHS: SE->getTruncateOrSignExtend(V: get<`0`>(Pair: OffsetScevs [`1`]), Ty: IntPtrTy));
968	ScevList.emplace_back(Args: SE->getAddExpr(LHS: get<`0`>(Pair: BaseScevs [`0`]), RHS: Scaled1),
969	Args&: NeedsFreeze);
970	ScevList.emplace_back(Args: SE->getAddExpr(LHS: get<`0`>(Pair: BaseScevs [`1`]), RHS: Scaled2),
971	Args&: NeedsFreeze);
972	break;
973	}
974	case Instruction::Select: {
975	SmallVector<PointerIntPair<const SCEV , `1`, bool*>, `2`> ChildScevs;
976	// A select means we've found a forked pointer, but we currently only
977	// support a single select per pointer so if there's another behind this
978	// then we just bail out and return the generic SCEV.
979	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `1`), ScevList&: ChildScevs, Depth);
980	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `2`), ScevList&: ChildScevs, Depth);
981	if (ChildScevs.size() == `2`) {
982	ScevList.push_back(Elt: ChildScevs [`0`]);
983	ScevList.push_back(Elt: ChildScevs [`1`]);
984	} else
985	ScevList.emplace_back(Args&: Scev, Args: !isGuaranteedNotToBeUndefOrPoison(V: Ptr));
986	break;
987	}
988	case Instruction::PHI: {
989	SmallVector<PointerIntPair<const SCEV , `1`, bool*>, `2`> ChildScevs;
990	// A phi means we've found a forked pointer, but we currently only
991	// support a single phi per pointer so if there's another behind this
992	// then we just bail out and return the generic SCEV.
993	if (I->getNumOperands() == `2`) {
994	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `0`), ScevList&: ChildScevs, Depth);
995	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `1`), ScevList&: ChildScevs, Depth);
996	}
997	if (ChildScevs.size() == `2`) {
998	ScevList.push_back(Elt: ChildScevs [`0`]);
999	ScevList.push_back(Elt: ChildScevs [`1`]);
1000	} else
1001	ScevList.emplace_back(Args&: Scev, Args: !isGuaranteedNotToBeUndefOrPoison(V: Ptr));
1002	break;
1003	}
1004	case Instruction::Add:
1005	case Instruction::Sub: {
1006	SmallVector<PointerIntPair<const SCEV , `1`, bool*>> LScevs;
1007	SmallVector<PointerIntPair<const SCEV , `1`, bool*>> RScevs;
1008	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `0`), ScevList&: LScevs, Depth);
1009	findForkedSCEVs(SE, L, Ptr: I->getOperand(i: `1`), ScevList&: RScevs, Depth);
1010
1011	// See if we need to freeze our fork...
1012	bool NeedsFreeze =
1013	any_of(Range&: LScevs, P: UndefPoisonCheck) \|\| any_of(Range&: RScevs, P: UndefPoisonCheck);
1014
1015	// Check that we only have a single fork, on either the left or right side.
1016	// Copy the SCEV across for the one without a fork in order to generate
1017	// the full SCEV for both sides of the BinOp.
1018	if (LScevs.size() == `2` && RScevs.size() == `1`)
1019	RScevs.push_back(Elt: RScevs [`0`]);
1020	else if (RScevs.size() == `2` && LScevs.size() == `1`)
1021	LScevs.push_back(Elt: LScevs [`0`]);
1022	else {
1023	ScevList.emplace_back(Args&: Scev, Args&: NeedsFreeze);
1024	break;
1025	}
1026
1027	ScevList.emplace_back(
1028	Args: GetBinOpExpr (Opcode, get<`0`>(Pair: LScevs [`0`]), get<`0`>(Pair: RScevs [`0`])),
1029	Args&: NeedsFreeze);
1030	ScevList.emplace_back(
1031	Args: GetBinOpExpr (Opcode, get<`0`>(Pair: LScevs [`1`]), get<`0`>(Pair: RScevs [`1`])),
1032	Args&: NeedsFreeze);
1033	break;
1034	}
1035	default:
1036	// Just return the current SCEV if we haven't handled the instruction yet.
1037	LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1038	ScevList.emplace_back(Args&: Scev, Args: !isGuaranteedNotToBeUndefOrPoison(V: Ptr));
1039	break;
1040	}
1041	}
1042
1043	static SmallVector<PointerIntPair<const SCEV , `1`, bool*>>
1044	findForkedPointer(PredicatedScalarEvolution &PSE,
1045	const DenseMap<Value , const* SCEV > &StridesMap, Value Ptr,
1046	const Loop *L) {
1047	ScalarEvolution *SE = PSE.getSE();
1048	assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1049	SmallVector<PointerIntPair<const SCEV , `1`, bool*>> Scevs;
1050	findForkedSCEVs(SE, L, Ptr, ScevList&: Scevs, Depth: MaxForkedSCEVDepth);
1051
1052	// For now, we will only accept a forked pointer with two possible SCEVs
1053	// that are either SCEVAddRecExprs or loop invariant.
1054	if (Scevs.size() == `2` &&
1055	(isa<SCEVAddRecExpr>(Val: get<`0`>(Pair: Scevs [`0`])) \|\|
1056	SE->isLoopInvariant(S: get<`0`>(Pair: Scevs [`0`]), L)) &&
1057	(isa<SCEVAddRecExpr>(Val: get<`0`>(Pair: Scevs [`1`])) \|\|
1058	SE->isLoopInvariant(S: get<`0`>(Pair: Scevs [`1`]), L))) {
1059	LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
1060	LLVM_DEBUG(dbgs() << "\t(1) " << *get<`0`>(Scevs[`0`]) << "\n");
1061	LLVM_DEBUG(dbgs() << "\t(2) " << *get<`0`>(Scevs[`1`]) << "\n");
1062	return Scevs;
1063	}
1064
1065	return {{replaceSymbolicStrideSCEV(PSE, PtrToStride: StridesMap, Ptr), false}};
1066	}
1067
1068	bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
1069	MemAccessInfo Access, Type *AccessTy,
1070	const DenseMap<Value , const* SCEV *> &StridesMap,
1071	DenseMap<Value , unsigned*> &DepSetId,
1072	Loop TheLoop, unsigned* &RunningDepId,
1073	unsigned ASId, bool ShouldCheckWrap,
1074	bool Assume) {
1075	Value *Ptr = Access.getPointer();
1076
1077	SmallVector<PointerIntPair<const SCEV , `1`, bool*>> TranslatedPtrs =
1078	findForkedPointer(PSE, StridesMap, Ptr, L: TheLoop);
1079
1080	for (auto &P : TranslatedPtrs) {
1081	const SCEV *PtrExpr = get<`0`>(Pair: P);
1082	if (!hasComputableBounds(PSE, Ptr, PtrScev: PtrExpr, L: TheLoop, Assume))
1083	return false;
1084
1085	// When we run after a failing dependency check we have to make sure
1086	// we don't have wrapping pointers.
1087	if (ShouldCheckWrap) {
1088	// Skip wrap checking when translating pointers.
1089	if (TranslatedPtrs.size() > `1`)
1090	return false;
1091
1092	if (!isNoWrap(PSE, Strides: StridesMap, Ptr, AccessTy, L: TheLoop)) {
1093	auto *Expr = PSE.getSCEV(V: Ptr);
1094	if (!Assume \|\| !isa<SCEVAddRecExpr>(Val: Expr))
1095	return false;
1096	PSE.setNoOverflow(V: Ptr, Flags: SCEVWrapPredicate::IncrementNUSW);
1097	}
1098	}
1099	// If there's only one option for Ptr, look it up after bounds and wrap
1100	// checking, because assumptions might have been added to PSE.
1101	if (TranslatedPtrs.size() == `1`)
1102	TranslatedPtrs [`0`] = {replaceSymbolicStrideSCEV(PSE, PtrToStride: StridesMap, Ptr),
1103	false};
1104	}
1105
1106	for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1107	// The id of the dependence set.
1108	unsigned DepId;
1109
1110	if (isDependencyCheckNeeded()) {
1111	Value *Leader = DepCands.getLeaderValue(V: Access).getPointer();
1112	unsigned &LeaderId = DepSetId [Leader];
1113	if (!LeaderId)
1114	LeaderId = RunningDepId++;
1115	DepId = LeaderId;
1116	} else
1117	// Each access has its own dependence set.
1118	DepId = RunningDepId++;
1119
1120	bool IsWrite = Access.getInt();
1121	RtCheck.insert(Lp: TheLoop, Ptr, PtrExpr, AccessTy, WritePtr: IsWrite, DepSetId: DepId, ASId, PSE,
1122	NeedsFreeze);
1123	LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << `'\n'`);
1124	}
1125
1126	return true;
1127	}
1128
1129	bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1130	ScalarEvolution SE, Loop TheLoop,
1131	const DenseMap<Value , const* SCEV *> &StridesMap,
1132	Value &UncomputablePtr, bool* ShouldCheckWrap) {
1133	// Find pointers with computable bounds. We are going to use this information
1134	// to place a runtime bound check.
1135	bool CanDoRT = true;
1136
1137	bool MayNeedRTCheck = false;
1138	if (!IsRTCheckAnalysisNeeded) return true;
1139
1140	bool IsDepCheckNeeded = isDependencyCheckNeeded();
1141
1142	// We assign a consecutive id to access from different alias sets.
1143	// Accesses between different groups doesn't need to be checked.
1144	unsigned ASId = `0`;
1145	for (auto &AS : AST) {
1146	int NumReadPtrChecks = `0`;
1147	int NumWritePtrChecks = `0`;
1148	bool CanDoAliasSetRT = true;
1149	++ASId;
1150	auto ASPointers = AS.getPointers();
1151
1152	// We assign consecutive id to access from different dependence sets.
1153	// Accesses within the same set don't need a runtime check.
1154	unsigned RunningDepId = `1`;
1155	DenseMap<Value , unsigned*> DepSetId;
1156
1157	SmallVector<std::pair<MemAccessInfo, Type *>, `4`> Retries;
1158
1159	// First, count how many write and read accesses are in the alias set. Also
1160	// collect MemAccessInfos for later.
1161	SmallVector<MemAccessInfo, `4`> AccessInfos;
1162	for (const Value *Ptr_ : ASPointers) {
1163	Value Ptr = const_cast<Value >(Ptr_);
1164	bool IsWrite = Accesses.count(Key: MemAccessInfo (Ptr, true));
1165	if (IsWrite)
1166	++NumWritePtrChecks;
1167	else
1168	++NumReadPtrChecks;
1169	AccessInfos.emplace_back(Args&: Ptr, Args&: IsWrite);
1170	}
1171
1172	// We do not need runtime checks for this alias set, if there are no writes
1173	// or a single write and no reads.
1174	if (NumWritePtrChecks == `0` \|\|
1175	(NumWritePtrChecks == `1` && NumReadPtrChecks == `0`)) {
1176	assert((ASPointers.size() <= `1` \|\|
1177	all_of(ASPointers,
1178	[this](const Value *Ptr) {
1179	MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1180	true);
1181	return DepCands.findValue(AccessWrite) == DepCands.end();
1182	})) &&
1183	"Can only skip updating CanDoRT below, if all entries in AS "
1184	"are reads or there is at most 1 entry");
1185	continue;
1186	}
1187
1188	for (auto &Access : AccessInfos) {
1189	for (const auto &AccessTy : Accesses [Access]) {
1190	if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1191	DepSetId, TheLoop, RunningDepId, ASId,
1192	ShouldCheckWrap, Assume: false)) {
1193	LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1194	<< *Access.getPointer() << `'\n'`);
1195	Retries.push_back(Elt: {Access, AccessTy});
1196	CanDoAliasSetRT = false;
1197	}
1198	}
1199	}
1200
1201	// Note that this function computes CanDoRT and MayNeedRTCheck
1202	// independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1203	// we have a pointer for which we couldn't find the bounds but we don't
1204	// actually need to emit any checks so it does not matter.
1205	//
1206	// We need runtime checks for this alias set, if there are at least 2
1207	// dependence sets (in which case RunningDepId > 2) or if we need to re-try
1208	// any bound checks (because in that case the number of dependence sets is
1209	// incomplete).
1210	bool NeedsAliasSetRTCheck = RunningDepId > `2` \|\| !Retries.empty();
1211
1212	// We need to perform run-time alias checks, but some pointers had bounds
1213	// that couldn't be checked.
1214	if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1215	// Reset the CanDoSetRt flag and retry all accesses that have failed.
1216	// We know that we need these checks, so we can now be more aggressive
1217	// and add further checks if required (overflow checks).
1218	CanDoAliasSetRT = true;
1219	for (auto Retry : Retries) {
1220	MemAccessInfo Access = Retry.first;
1221	Type *AccessTy = Retry.second;
1222	if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1223	DepSetId, TheLoop, RunningDepId, ASId,
1224	ShouldCheckWrap, /Assume=/true)) {
1225	CanDoAliasSetRT = false;
1226	UncomputablePtr = Access.getPointer();
1227	break;
1228	}
1229	}
1230	}
1231
1232	CanDoRT &= CanDoAliasSetRT;
1233	MayNeedRTCheck \|= NeedsAliasSetRTCheck;
1234	++ASId;
1235	}
1236
1237	// If the pointers that we would use for the bounds comparison have different
1238	// address spaces, assume the values aren't directly comparable, so we can't
1239	// use them for the runtime check. We also have to assume they could
1240	// overlap. In the future there should be metadata for whether address spaces
1241	// are disjoint.
1242	unsigned NumPointers = RtCheck.Pointers.size();
1243	for (unsigned i = `0`; i < NumPointers; ++i) {
1244	for (unsigned j = i + `1`; j < NumPointers; ++j) {
1245	// Only need to check pointers between two different dependency sets.
1246	if (RtCheck.Pointers [i].DependencySetId ==
1247	RtCheck.Pointers [j].DependencySetId)
1248	continue;
1249	// Only need to check pointers in the same alias set.
1250	if (RtCheck.Pointers [i].AliasSetId != RtCheck.Pointers [j].AliasSetId)
1251	continue;
1252
1253	Value *PtrI = RtCheck.Pointers [i].PointerValue;
1254	Value *PtrJ = RtCheck.Pointers [j].PointerValue;
1255
1256	unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1257	unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1258	if (ASi != ASj) {
1259	LLVM_DEBUG(
1260	dbgs() << "LAA: Runtime check would require comparison between"
1261	" different address spaces\n");
1262	return false;
1263	}
1264	}
1265	}
1266
1267	if (MayNeedRTCheck && CanDoRT)
1268	RtCheck.generateChecks(DepCands, UseDependencies: IsDepCheckNeeded);
1269
1270	LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1271	<< " pointer comparisons.\n");
1272
1273	// If we can do run-time checks, but there are no checks, no runtime checks
1274	// are needed. This can happen when all pointers point to the same underlying
1275	// object for example.
1276	RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != `0` : MayNeedRTCheck;
1277
1278	bool CanDoRTIfNeeded = !RtCheck.Need \|\| CanDoRT;
1279	if (!CanDoRTIfNeeded)
1280	RtCheck.reset();
1281	return CanDoRTIfNeeded;
1282	}
1283
1284	void AccessAnalysis::processMemAccesses() {
1285	// We process the set twice: first we process read-write pointers, last we
1286	// process read-only pointers. This allows us to skip dependence tests for
1287	// read-only pointers.
1288
1289	LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1290	LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
1291	LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
1292	LLVM_DEBUG({
1293	for (auto A : Accesses)
1294	dbgs() << "\t" << *A.first.getPointer() << " ("
1295	<< (A.first.getInt()
1296	? "write"
1297	: (ReadOnlyPtr.count(A.first.getPointer()) ? "read-only"
1298	: "read"))
1299	<< ")\n";
1300	});
1301
1302	// The AliasSetTracker has nicely partitioned our pointers by metadata
1303	// compatibility and potential for underlying-object overlap. As a result, we
1304	// only need to check for potential pointer dependencies within each alias
1305	// set.
1306	for (const auto &AS : AST) {
1307	// Note that both the alias-set tracker and the alias sets themselves used
1308	// ordered collections internally and so the iteration order here is
1309	// deterministic.
1310	auto ASPointers = AS.getPointers();
1311
1312	bool SetHasWrite = false;
1313
1314	// Map of pointers to last access encountered.
1315	typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
1316	UnderlyingObjToAccessMap ObjToLastAccess;
1317
1318	// Set of access to check after all writes have been processed.
1319	PtrAccessMap DeferredAccesses;
1320
1321	// Iterate over each alias set twice, once to process read/write pointers,
1322	// and then to process read-only pointers.
1323	for (int SetIteration = `0`; SetIteration < `2`; ++SetIteration) {
1324	bool UseDeferred = SetIteration > `0`;
1325	PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1326
1327	for (const Value *Ptr_ : ASPointers) {
1328	Value Ptr = const_cast<Value >(Ptr_);
1329
1330	// For a single memory access in AliasSetTracker, Accesses may contain
1331	// both read and write, and they both need to be handled for CheckDeps.
1332	for (const auto &AC : S) {
1333	if (AC.first.getPointer() != Ptr)
1334	continue;
1335
1336	bool IsWrite = AC.first.getInt();
1337
1338	// If we're using the deferred access set, then it contains only
1339	// reads.
1340	bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
1341	if (UseDeferred && !IsReadOnlyPtr)
1342	continue;
1343	// Otherwise, the pointer must be in the PtrAccessSet, either as a
1344	// read or a write.
1345	assert(((IsReadOnlyPtr && UseDeferred) \|\| IsWrite \|\|
1346	S.count(MemAccessInfo (Ptr, false))) &&
1347	"Alias-set pointer not in the access set?");
1348
1349	MemAccessInfo Access(Ptr, IsWrite);
1350	DepCands.insert(Data: Access);
1351
1352	// Memorize read-only pointers for later processing and skip them in
1353	// the first round (they need to be checked after we have seen all
1354	// write pointers). Note: we also mark pointer that are not
1355	// consecutive as "read-only" pointers (so that we check
1356	// "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1357	if (!UseDeferred && IsReadOnlyPtr) {
1358	// We only use the pointer keys, the types vector values don't
1359	// matter.
1360	DeferredAccesses.insert(KV: {Access, {}});
1361	continue;
1362	}
1363
1364	// If this is a write - check other reads and writes for conflicts. If
1365	// this is a read only check other writes for conflicts (but only if
1366	// there is no other write to the ptr - this is an optimization to
1367	// catch "a[i] = a[i] + " without having to do a dependence check).
1368	if ((IsWrite \|\| IsReadOnlyPtr) && SetHasWrite) {
1369	CheckDeps.push_back(Elt: Access);
1370	IsRTCheckAnalysisNeeded = true;
1371	}
1372
1373	if (IsWrite)
1374	SetHasWrite = true;
1375
1376	// Create sets of pointers connected by a shared alias set and
1377	// underlying object.
1378	typedef SmallVector<const Value *, `16`> ValueVector;
1379	ValueVector TempObjects;
1380
1381	UnderlyingObjects [Ptr] = {};
1382	SmallVector<const Value *, `16`> &UOs = UnderlyingObjects [Ptr];
1383	::getUnderlyingObjects(V: Ptr, Objects&: UOs, LI);
1384	LLVM_DEBUG(dbgs()
1385	<< "Underlying objects for pointer " << *Ptr << "\n");
1386	for (const Value *UnderlyingObj : UOs) {
1387	// nullptr never alias, don't join sets for pointer that have "null"
1388	// in their UnderlyingObjects list.
1389	if (isa<ConstantPointerNull>(Val: UnderlyingObj) &&
1390	!NullPointerIsDefined(
1391	F: TheLoop->getHeader()->getParent(),
1392	AS: UnderlyingObj->getType()->getPointerAddressSpace()))
1393	continue;
1394
1395	UnderlyingObjToAccessMap::iterator Prev =
1396	ObjToLastAccess.find(Val: UnderlyingObj);
1397	if (Prev != ObjToLastAccess.end())
1398	DepCands.unionSets(V1: Access, V2: Prev ->second);
1399
1400	ObjToLastAccess [UnderlyingObj] = Access;
1401	LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
1402	}
1403	}
1404	}
1405	}
1406	}
1407	}
1408
1409	/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1410	/// i.e. monotonically increasing/decreasing.
1411	static bool isNoWrapAddRec(Value Ptr, const* SCEVAddRecExpr *AR,
1412	PredicatedScalarEvolution &PSE, const Loop *L) {
1413
1414	// FIXME: This should probably only return true for NUW.
1415	if (AR->getNoWrapFlags(Mask: SCEV::NoWrapMask))
1416	return true;
1417
1418	if (PSE.hasNoOverflow(V: Ptr, Flags: SCEVWrapPredicate::IncrementNUSW))
1419	return true;
1420
1421	// Scalar evolution does not propagate the non-wrapping flags to values that
1422	// are derived from a non-wrapping induction variable because non-wrapping
1423	// could be flow-sensitive.
1424	//
1425	// Look through the potentially overflowing instruction to try to prove
1426	// non-wrapping for the specific* value of Ptr.*
1427
1428	// The arithmetic implied by an inbounds GEP can't overflow.
1429	auto *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
1430	if (!GEP \|\| !GEP->isInBounds())
1431	return false;
1432
1433	// Make sure there is only one non-const index and analyze that.
1434	Value NonConstIndex = nullptr*;
1435	for (Value *Index : GEP->indices())
1436	if (!isa<ConstantInt>(Val: Index)) {
1437	if (NonConstIndex)
1438	return false;
1439	NonConstIndex = Index;
1440	}
1441	if (!NonConstIndex)
1442	// The recurrence is on the pointer, ignore for now.
1443	return false;
1444
1445	// The index in GEP is signed. It is non-wrapping if it's derived from a NSW
1446	// AddRec using a NSW operation.
1447	if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Val: NonConstIndex))
1448	if (OBO->hasNoSignedWrap() &&
1449	// Assume constant for other the operand so that the AddRec can be
1450	// easily found.
1451	isa<ConstantInt>(Val: OBO->getOperand(i_nocapture: `1`))) {
1452	auto *OpScev = PSE.getSCEV(V: OBO->getOperand(i_nocapture: `0`));
1453
1454	if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(Val: OpScev))
1455	return OpAR->getLoop() == L && OpAR->getNoWrapFlags(Mask: SCEV::FlagNSW);
1456	}
1457
1458	return false;
1459	}
1460
1461	/// Check whether the access through \p Ptr has a constant stride.
1462	std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
1463	Type AccessTy, Value Ptr,
1464	const Loop *Lp,
1465	const DenseMap<Value , const* SCEV *> &StridesMap,
1466	bool Assume, bool ShouldCheckWrap) {
1467	Type *Ty = Ptr->getType();
1468	assert(Ty->isPointerTy() && "Unexpected non-ptr");
1469
1470	if (isa<ScalableVectorType>(Val: AccessTy)) {
1471	LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1472	<< "\n");
1473	return std::nullopt;
1474	}
1475
1476	const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, PtrToStride: StridesMap, Ptr);
1477
1478	const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Val: PtrScev);
1479	if (Assume && !AR)
1480	AR = PSE.getAsAddRec(V: Ptr);
1481
1482	if (!AR) {
1483	LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1484	<< " SCEV: " << *PtrScev << "\n");
1485	return std::nullopt;
1486	}
1487
1488	// The access function must stride over the innermost loop.
1489	if (Lp != AR->getLoop()) {
1490	LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1491	<< Ptr << " SCEV: " << AR << "\n");
1492	return std::nullopt;
1493	}
1494
1495	// Check the step is constant.
1496	const SCEV Step = AR->getStepRecurrence(SE&: PSE.getSE());
1497
1498	// Calculate the pointer stride and check if it is constant.
1499	const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: Step);
1500	if (!C) {
1501	LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1502	<< " SCEV: " << *AR << "\n");
1503	return std::nullopt;
1504	}
1505
1506	auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1507	TypeSize AllocSize = DL.getTypeAllocSize(Ty: AccessTy);
1508	int64_t Size = AllocSize.getFixedValue();
1509	const APInt &APStepVal = C->getAPInt();
1510
1511	// Huge step value - give up.
1512	if (APStepVal.getBitWidth() > `64`)
1513	return std::nullopt;
1514
1515	int64_t StepVal = APStepVal.getSExtValue();
1516
1517	// Strided access.
1518	int64_t Stride = StepVal / Size;
1519	int64_t Rem = StepVal % Size;
1520	if (Rem)
1521	return std::nullopt;
1522
1523	if (!ShouldCheckWrap)
1524	return Stride;
1525
1526	// The address calculation must not wrap. Otherwise, a dependence could be
1527	// inverted.
1528	if (isNoWrapAddRec(Ptr, AR, PSE, L: Lp))
1529	return Stride;
1530
1531	// An inbounds getelementptr that is a AddRec with a unit stride
1532	// cannot wrap per definition. If it did, the result would be poison
1533	// and any memory access dependent on it would be immediate UB
1534	// when executed.
1535	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
1536	GEP && GEP->isInBounds() && (Stride == `1` \|\| Stride == -`1`))
1537	return Stride;
1538
1539	// If the null pointer is undefined, then a access sequence which would
1540	// otherwise access it can be assumed not to unsigned wrap. Note that this
1541	// assumes the object in memory is aligned to the natural alignment.
1542	unsigned AddrSpace = Ty->getPointerAddressSpace();
1543	if (!NullPointerIsDefined(F: Lp->getHeader()->getParent(), AS: AddrSpace) &&
1544	(Stride == `1` \|\| Stride == -`1`))
1545	return Stride;
1546
1547	if (Assume) {
1548	PSE.setNoOverflow(V: Ptr, Flags: SCEVWrapPredicate::IncrementNUSW);
1549	LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1550	<< "LAA: Pointer: " << *Ptr << "\n"
1551	<< "LAA: SCEV: " << *AR << "\n"
1552	<< "LAA: Added an overflow assumption\n");
1553	return Stride;
1554	}
1555	LLVM_DEBUG(
1556	dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1557	<< Ptr << " SCEV: " << AR << "\n");
1558	return std::nullopt;
1559	}
1560
1561	std::optional<int> llvm::getPointersDiff(Type ElemTyA, Value PtrA,
1562	Type ElemTyB, Value PtrB,
1563	const DataLayout &DL,
1564	ScalarEvolution &SE, bool StrictCheck,
1565	bool CheckType) {
1566	assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1567
1568	// Make sure that A and B are different pointers.
1569	if (PtrA == PtrB)
1570	return `0`;
1571
1572	// Make sure that the element types are the same if required.
1573	if (CheckType && ElemTyA != ElemTyB)
1574	return std::nullopt;
1575
1576	unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1577	unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1578
1579	// Check that the address spaces match.
1580	if (ASA != ASB)
1581	return std::nullopt;
1582	unsigned IdxWidth = DL.getIndexSizeInBits(AS: ASA);
1583
1584	APInt OffsetA(IdxWidth, `0`), OffsetB(IdxWidth, `0`);
1585	Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: OffsetA);
1586	Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, Offset&: OffsetB);
1587
1588	int Val;
1589	if (PtrA1 == PtrB1) {
1590	// Retrieve the address space again as pointer stripping now tracks through
1591	// `addrspacecast`.
1592	ASA = cast<PointerType>(Val: PtrA1->getType())->getAddressSpace();
1593	ASB = cast<PointerType>(Val: PtrB1->getType())->getAddressSpace();
1594	// Check that the address spaces match and that the pointers are valid.
1595	if (ASA != ASB)
1596	return std::nullopt;
1597
1598	IdxWidth = DL.getIndexSizeInBits(AS: ASA);
1599	OffsetA = OffsetA.sextOrTrunc(width: IdxWidth);
1600	OffsetB = OffsetB.sextOrTrunc(width: IdxWidth);
1601
1602	OffsetB -= OffsetA;
1603	Val = OffsetB.getSExtValue();
1604	} else {
1605	// Otherwise compute the distance with SCEV between the base pointers.
1606	const SCEV *PtrSCEVA = SE.getSCEV(V: PtrA);
1607	const SCEV *PtrSCEVB = SE.getSCEV(V: PtrB);
1608	const auto *Diff =
1609	dyn_cast<SCEVConstant>(Val: SE.getMinusSCEV(LHS: PtrSCEVB, RHS: PtrSCEVA));
1610	if (!Diff)
1611	return std::nullopt;
1612	Val = Diff->getAPInt().getSExtValue();
1613	}
1614	int Size = DL.getTypeStoreSize(Ty: ElemTyA);
1615	int Dist = Val / Size;
1616
1617	// Ensure that the calculated distance matches the type-based one after all
1618	// the bitcasts removal in the provided pointers.
1619	if (!StrictCheck \|\| Dist * Size == Val)
1620	return Dist;
1621	return std::nullopt;
1622	}
1623
1624	bool llvm::sortPtrAccesses(ArrayRef<Value > VL, Type ElemTy,
1625	const DataLayout &DL, ScalarEvolution &SE,
1626	SmallVectorImpl<unsigned> &SortedIndices) {
1627	assert(llvm::all_of(
1628	VL, [](const Value V) { return* V->getType()->isPointerTy(); }) &&
1629	"Expected list of pointer operands.");
1630	// Walk over the pointers, and map each of them to an offset relative to
1631	// first pointer in the array.
1632	Value *Ptr0 = VL [`0`];
1633
1634	using DistOrdPair = std::pair<int64_t, int>;
1635	auto Compare = llvm::less_first ();
1636	std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1637	Offsets.emplace(args: `0`, args: `0`);
1638	int Cnt = `1`;
1639	bool IsConsecutive = true;
1640	for (auto *Ptr : VL.drop_front()) {
1641	std::optional<int> Diff = getPointersDiff(ElemTyA: ElemTy, PtrA: Ptr0, ElemTyB: ElemTy, PtrB: Ptr, DL, SE,
1642	/StrictCheck=/true);
1643	if (!Diff)
1644	return false;
1645
1646	// Check if the pointer with the same offset is found.
1647	int64_t Offset = *Diff;
1648	auto Res = Offsets.emplace(args&: Offset, args&: Cnt);
1649	if (!Res.second)
1650	return false;
1651	// Consecutive order if the inserted element is the last one.
1652	IsConsecutive = IsConsecutive && std::next(x: Res.first) == Offsets.end();
1653	++Cnt;
1654	}
1655	SortedIndices.clear();
1656	if (!IsConsecutive) {
1657	// Fill SortedIndices array only if it is non-consecutive.
1658	SortedIndices.resize(N: VL.size());
1659	Cnt = `0`;
1660	for (const std::pair<int64_t, int> &Pair : Offsets) {
1661	SortedIndices [Cnt] = Pair.second;
1662	++Cnt;
1663	}
1664	}
1665	return true;
1666	}
1667
1668	/// Returns true if the memory operations \p A and \p B are consecutive.
1669	bool llvm::isConsecutiveAccess(Value A, Value B, const DataLayout &DL,
1670	ScalarEvolution &SE, bool CheckType) {
1671	Value *PtrA = getLoadStorePointerOperand(V: A);
1672	Value *PtrB = getLoadStorePointerOperand(V: B);
1673	if (!PtrA \|\| !PtrB)
1674	return false;
1675	Type *ElemTyA = getLoadStoreType(I: A);
1676	Type *ElemTyB = getLoadStoreType(I: B);
1677	std::optional<int> Diff =
1678	getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1679	/StrictCheck=/true, CheckType);
1680	return Diff && *Diff == `1`;
1681	}
1682
1683	void MemoryDepChecker::addAccess(StoreInst *SI) {
1684	visitPointers(StartPtr: SI->getPointerOperand(), InnermostLoop: *InnermostLoop,
1685	AddPointer: [this, SI](Value *Ptr) {
1686	Accesses [MemAccessInfo (Ptr, true)].push_back(x: AccessIdx);
1687	InstMap.push_back(Elt: SI);
1688	++AccessIdx;
1689	});
1690	}
1691
1692	void MemoryDepChecker::addAccess(LoadInst *LI) {
1693	visitPointers(StartPtr: LI->getPointerOperand(), InnermostLoop: *InnermostLoop,
1694	AddPointer: [this, LI](Value *Ptr) {
1695	Accesses [MemAccessInfo (Ptr, false)].push_back(x: AccessIdx);
1696	InstMap.push_back(Elt: LI);
1697	++AccessIdx;
1698	});
1699	}
1700
1701	MemoryDepChecker::VectorizationSafetyStatus
1702	MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1703	switch (Type) {
1704	case NoDep:
1705	case Forward:
1706	case BackwardVectorizable:
1707	return VectorizationSafetyStatus::Safe;
1708
1709	case Unknown:
1710	return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1711	case ForwardButPreventsForwarding:
1712	case Backward:
1713	case BackwardVectorizableButPreventsForwarding:
1714	case IndirectUnsafe:
1715	return VectorizationSafetyStatus::Unsafe;
1716	}
1717	llvm_unreachable("unexpected DepType!");
1718	}
1719
1720	bool MemoryDepChecker::Dependence::isBackward() const {
1721	switch (Type) {
1722	case NoDep:
1723	case Forward:
1724	case ForwardButPreventsForwarding:
1725	case Unknown:
1726	case IndirectUnsafe:
1727	return false;
1728
1729	case BackwardVectorizable:
1730	case Backward:
1731	case BackwardVectorizableButPreventsForwarding:
1732	return true;
1733	}
1734	llvm_unreachable("unexpected DepType!");
1735	}
1736
1737	bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1738	return isBackward() \|\| Type == Unknown;
1739	}
1740
1741	bool MemoryDepChecker::Dependence::isForward() const {
1742	switch (Type) {
1743	case Forward:
1744	case ForwardButPreventsForwarding:
1745	return true;
1746
1747	case NoDep:
1748	case Unknown:
1749	case BackwardVectorizable:
1750	case Backward:
1751	case BackwardVectorizableButPreventsForwarding:
1752	case IndirectUnsafe:
1753	return false;
1754	}
1755	llvm_unreachable("unexpected DepType!");
1756	}
1757
1758	bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1759	uint64_t TypeByteSize) {
1760	// If loads occur at a distance that is not a multiple of a feasible vector
1761	// factor store-load forwarding does not take place.
1762	// Positive dependences might cause troubles because vectorizing them might
1763	// prevent store-load forwarding making vectorized code run a lot slower.
1764	// a[i] = a[i-3] ^ a[i-8];
1765	// The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1766	// hence on your typical architecture store-load forwarding does not take
1767	// place. Vectorizing in such cases does not make sense.
1768	// Store-load forwarding distance.
1769
1770	// After this many iterations store-to-load forwarding conflicts should not
1771	// cause any slowdowns.
1772	const uint64_t NumItersForStoreLoadThroughMemory = `8` * TypeByteSize;
1773	// Maximum vector factor.
1774	uint64_t MaxVFWithoutSLForwardIssues = std::min(
1775	a: VectorizerParams::MaxVectorWidth * TypeByteSize, b: MinDepDistBytes);
1776
1777	// Compute the smallest VF at which the store and load would be misaligned.
1778	for (uint64_t VF = `2` * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1779	VF *= `2`) {
1780	// If the number of vector iteration between the store and the load are
1781	// small we could incur conflicts.
1782	if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1783	MaxVFWithoutSLForwardIssues = (VF >> `1`);
1784	break;
1785	}
1786	}
1787
1788	if (MaxVFWithoutSLForwardIssues < `2` * TypeByteSize) {
1789	LLVM_DEBUG(
1790	dbgs() << "LAA: Distance " << Distance
1791	<< " that could cause a store-load forwarding conflict\n");
1792	return true;
1793	}
1794
1795	if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
1796	MaxVFWithoutSLForwardIssues !=
1797	VectorizerParams::MaxVectorWidth * TypeByteSize)
1798	MinDepDistBytes = MaxVFWithoutSLForwardIssues;
1799	return false;
1800	}
1801
1802	void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1803	if (Status < S)
1804	Status = S;
1805	}
1806
1807	/// Given a dependence-distance \p Dist between two
1808	/// memory accesses, that have the same stride whose absolute value is given
1809	/// in \p Stride, and that have the same type size \p TypeByteSize,
1810	/// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1811	/// possible to prove statically that the dependence distance is larger
1812	/// than the range that the accesses will travel through the execution of
1813	/// the loop. If so, return true; false otherwise. This is useful for
1814	/// example in loops such as the following (PR31098):
1815	/// for (i = 0; i < D; ++i) {
1816	/// = out[i];
1817	/// out[i+D] =
1818	/// }
1819	static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1820	const SCEV &BackedgeTakenCount,
1821	const SCEV &Dist, uint64_t Stride,
1822	uint64_t TypeByteSize) {
1823
1824	// If we can prove that
1825	// () \|Dist\| > BackedgeTakenCount Step*
1826	// where Step is the absolute stride of the memory accesses in bytes,
1827	// then there is no dependence.
1828	//
1829	// Rationale:
1830	// We basically want to check if the absolute distance (\|Dist/Step\|)
1831	// is >= the loop iteration count (or > BackedgeTakenCount).
1832	// This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1833	// Section 4.2.1); Note, that for vectorization it is sufficient to prove
1834	// that the dependence distance is >= VF; This is checked elsewhere.
1835	// But in some cases we can prune dependence distances early, and
1836	// even before selecting the VF, and without a runtime test, by comparing
1837	// the distance against the loop iteration count. Since the vectorized code
1838	// will be executed only if LoopCount >= VF, proving distance >= LoopCount
1839	// also guarantees that distance >= VF.
1840	//
1841	const uint64_t ByteStride = Stride * TypeByteSize;
1842	const SCEV *Step = SE.getConstant(Ty: BackedgeTakenCount.getType(), V: ByteStride);
1843	const SCEV *Product = SE.getMulExpr(LHS: &BackedgeTakenCount, RHS: Step);
1844
1845	const SCEV *CastedDist = &Dist;
1846	const SCEV *CastedProduct = Product;
1847	uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Ty: Dist.getType());
1848	uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Ty: Product->getType());
1849
1850	// The dependence distance can be positive/negative, so we sign extend Dist;
1851	// The multiplication of the absolute stride in bytes and the
1852	// backedgeTakenCount is non-negative, so we zero extend Product.
1853	if (DistTypeSizeBits > ProductTypeSizeBits)
1854	CastedProduct = SE.getZeroExtendExpr(Op: Product, Ty: Dist.getType());
1855	else
1856	CastedDist = SE.getNoopOrSignExtend(V: &Dist, Ty: Product->getType());
1857
1858	// Is Dist - (BackedgeTakenCount Step) > 0 ?*
1859	// (If so, then we have proven () because \|Dist\| >= Dist)
1860	const SCEV *Minus = SE.getMinusSCEV(LHS: CastedDist, RHS: CastedProduct);
1861	if (SE.isKnownPositive(S: Minus))
1862	return true;
1863
1864	// Second try: Is -Dist - (BackedgeTakenCount Step) > 0 ?*
1865	// (If so, then we have proven () because \|Dist\| >= -1Dist)*
1866	const SCEV *NegDist = SE.getNegativeSCEV(V: CastedDist);
1867	Minus = SE.getMinusSCEV(LHS: NegDist, RHS: CastedProduct);
1868	if (SE.isKnownPositive(S: Minus))
1869	return true;
1870
1871	return false;
1872	}
1873
1874	/// Check the dependence for two accesses with the same stride \p Stride.
1875	/// \p Distance is the positive distance and \p TypeByteSize is type size in
1876	/// bytes.
1877	///
1878	/// \returns true if they are independent.
1879	static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1880	uint64_t TypeByteSize) {
1881	assert(Stride > `1` && "The stride must be greater than 1");
1882	assert(TypeByteSize > `0` && "The type size in byte must be non-zero");
1883	assert(Distance > `0` && "The distance must be non-zero");
1884
1885	// Skip if the distance is not multiple of type byte size.
1886	if (Distance % TypeByteSize)
1887	return false;
1888
1889	uint64_t ScaledDist = Distance / TypeByteSize;
1890
1891	// No dependence if the scaled distance is not multiple of the stride.
1892	// E.g.
1893	// for (i = 0; i < 1024 ; i += 4)
1894	// A[i+2] = A[i] + 1;
1895	//
1896	// Two accesses in memory (scaled distance is 2, stride is 4):
1897	// \| A[0] \| \| \| \| A[4] \| \| \| \|
1898	// \| \| \| A[2] \| \| \| \| A[6] \| \|
1899	//
1900	// E.g.
1901	// for (i = 0; i < 1024 ; i += 3)
1902	// A[i+4] = A[i] + 1;
1903	//
1904	// Two accesses in memory (scaled distance is 4, stride is 3):
1905	// \| A[0] \| \| \| A[3] \| \| \| A[6] \| \| \|
1906	// \| \| \| \| \| A[4] \| \| \| A[7] \| \|
1907	return ScaledDist % Stride;
1908	}
1909
1910	/// Returns true if any of the underlying objects has a loop varying address,
1911	/// i.e. may change in \p L.
1912	static bool
1913	isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
1914	ScalarEvolution &SE, const Loop *L) {
1915	return any_of(Range&: UnderlyingObjects, P: [&SE, L](const Value *UO) {
1916	return !SE.isLoopInvariant(S: SE.getSCEV(V: const_cast<Value *>(UO)), L);
1917	});
1918	}
1919
1920	namespace {
1921	struct DepDistanceStrideAndSizeInfo {
1922	const SCEV *Dist;
1923	uint64_t Stride;
1924	uint64_t TypeByteSize;
1925	bool AIsWrite;
1926	bool BIsWrite;
1927
1928	DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride,
1929	uint64_t TypeByteSize, bool AIsWrite,
1930	bool BIsWrite)
1931	: Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize),
1932	AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
1933	};
1934	} // namespace
1935
1936	// Get the dependence distance, stride, type size and whether it is a write for
1937	// the dependence between A and B. Returns a DepType, if we can prove there's
1938	// no dependence or the analysis fails. Outlined to lambda to limit he scope
1939	// of various temporary variables, like A/BPtr, StrideA/BPtr and others.
1940	// Returns either the dependence result, if it could already be determined, or a
1941	// struct containing (Distance, Stride, TypeSize, AIsWrite, BIsWrite).
1942	static std::variant<MemoryDepChecker::Dependence::DepType,
1943	DepDistanceStrideAndSizeInfo>
1944	getDependenceDistanceStrideAndSize(
1945	const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
1946	const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
1947	const DenseMap<Value , const* SCEV *> &Strides,
1948	const DenseMap<Value , SmallVector<const* Value *, `16`>> &UnderlyingObjects,
1949	PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
1950	auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1951	auto &SE = *PSE.getSE();
1952	auto [APtr, AIsWrite] = A;
1953	auto [BPtr, BIsWrite] = B;
1954
1955	// Two reads are independent.
1956	if (!AIsWrite && !BIsWrite)
1957	return MemoryDepChecker::Dependence::NoDep;
1958
1959	Type *ATy = getLoadStoreType(I: AInst);
1960	Type *BTy = getLoadStoreType(I: BInst);
1961
1962	// We cannot check pointers in different address spaces.
1963	if (APtr->getType()->getPointerAddressSpace() !=
1964	BPtr->getType()->getPointerAddressSpace())
1965	return MemoryDepChecker::Dependence::Unknown;
1966
1967	int64_t StrideAPtr =
1968	getPtrStride(PSE, AccessTy: ATy, Ptr: APtr, Lp: InnermostLoop, StridesMap: Strides, Assume: true).value_or(u: `0`);
1969	int64_t StrideBPtr =
1970	getPtrStride(PSE, AccessTy: BTy, Ptr: BPtr, Lp: InnermostLoop, StridesMap: Strides, Assume: true).value_or(u: `0`);
1971
1972	const SCEV *Src = PSE.getSCEV(V: APtr);
1973	const SCEV *Sink = PSE.getSCEV(V: BPtr);
1974
1975	// If the induction step is negative we have to invert source and sink of the
1976	// dependence when measuring the distance between them. We should not swap
1977	// AIsWrite with BIsWrite, as their uses expect them in program order.
1978	if (StrideAPtr < `0`) {
1979	std::swap(a&: Src, b&: Sink);
1980	std::swap(a&: AInst, b&: BInst);
1981	}
1982
1983	const SCEV *Dist = SE.getMinusSCEV(LHS: Sink, RHS: Src);
1984
1985	LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << Src << "Sink Scev: " << Sink
1986	<< "(Induction step: " << StrideAPtr << ")\n");
1987	LLVM_DEBUG(dbgs() << "LAA: Distance for " << AInst << " to " << BInst
1988	<< ": " << *Dist << "\n");
1989
1990	// Needs accesses where the addresses of the accessed underlying objects do
1991	// not change within the loop.
1992	if (isLoopVariantIndirectAddress(UnderlyingObjects: UnderlyingObjects.find(Val: APtr)->second, SE,
1993	L: InnermostLoop) \|\|
1994	isLoopVariantIndirectAddress(UnderlyingObjects: UnderlyingObjects.find(Val: BPtr)->second, SE,
1995	L: InnermostLoop))
1996	return MemoryDepChecker::Dependence::IndirectUnsafe;
1997
1998	// Need accesses with constant stride. We don't want to vectorize
1999	// "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap
2000	// in the address space.
2001	if (!StrideAPtr \|\| !StrideBPtr \|\| StrideAPtr != StrideBPtr) {
2002	LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
2003	return MemoryDepChecker::Dependence::Unknown;
2004	}
2005
2006	uint64_t TypeByteSize = DL.getTypeAllocSize(Ty: ATy);
2007	bool HasSameSize =
2008	DL.getTypeStoreSizeInBits(Ty: ATy) == DL.getTypeStoreSizeInBits(Ty: BTy);
2009	if (!HasSameSize)
2010	TypeByteSize = `0`;
2011	uint64_t Stride = std::abs(i: StrideAPtr);
2012	return DepDistanceStrideAndSizeInfo (Dist, Stride, TypeByteSize, AIsWrite,
2013	BIsWrite);
2014	}
2015
2016	MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2017	const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
2018	unsigned BIdx, const DenseMap<Value , const* SCEV *> &Strides,
2019	const DenseMap<Value , SmallVector<const* Value *, `16`>>
2020	&UnderlyingObjects) {
2021	assert(AIdx < BIdx && "Must pass arguments in program order");
2022
2023	// Get the dependence distance, stride, type size and what access writes for
2024	// the dependence between A and B.
2025	auto Res = getDependenceDistanceStrideAndSize(
2026	A, AInst: InstMap [AIdx], B, BInst: InstMap [BIdx], Strides, UnderlyingObjects, PSE,
2027	InnermostLoop);
2028	if (std::holds_alternative<Dependence::DepType>(v: Res))
2029	return std::get<Dependence::DepType>(v&: Res);
2030
2031	const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] =
2032	std::get<DepDistanceStrideAndSizeInfo>(v&: Res);
2033	bool HasSameSize = TypeByteSize > `0`;
2034
2035	ScalarEvolution &SE = *PSE.getSE();
2036	auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
2037	// If the distance between the acecsses is larger than their absolute stride
2038	// multiplied by the backedge taken count, the accesses are independet, i.e.
2039	// they are far enough appart that accesses won't access the same location
2040	// across all loop ierations.
2041	if (!isa<SCEVCouldNotCompute>(Val: Dist) && HasSameSize &&
2042	isSafeDependenceDistance(DL, SE, BackedgeTakenCount: (PSE.getBackedgeTakenCount()), Dist: Dist,
2043	Stride, TypeByteSize))
2044	return Dependence::NoDep;
2045
2046	const SCEVConstant *C = dyn_cast<SCEVConstant>(Val: Dist);
2047	if (!C) {
2048	LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
2049	FoundNonConstantDistanceDependence = true;
2050	return Dependence::Unknown;
2051	}
2052
2053	const APInt &Val = C->getAPInt();
2054	int64_t Distance = Val.getSExtValue();
2055
2056	// If the distance between accesses and their strides are known constants,
2057	// check whether the accesses interlace each other.
2058	if (std::abs(i: Distance) > `0` && Stride > `1` && HasSameSize &&
2059	areStridedAccessesIndependent(Distance: std::abs(i: Distance), Stride, TypeByteSize)) {
2060	LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2061	return Dependence::NoDep;
2062	}
2063
2064	// Negative distances are not plausible dependencies.
2065	if (Val.isNegative()) {
2066	bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2067	// Check if the first access writes to a location that is read in a later
2068	// iteration, where the distance between them is not a multiple of a vector
2069	// factor and relatively small.
2070	//
2071	// NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2072	// couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2073	// forward dependency will allow vectorization using any width.
2074	if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2075	(!HasSameSize \|\| couldPreventStoreLoadForward(Distance: Val.abs().getZExtValue(),
2076	TypeByteSize))) {
2077	LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2078	return Dependence::ForwardButPreventsForwarding;
2079	}
2080
2081	LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2082	return Dependence::Forward;
2083	}
2084
2085	// Write to the same location with the same size.
2086	if (Val == `0`) {
2087	if (HasSameSize)
2088	return Dependence::Forward;
2089	LLVM_DEBUG(
2090	dbgs() << "LAA: Zero dependence difference but different type sizes\n");
2091	return Dependence::Unknown;
2092	}
2093
2094	assert(Val.isStrictlyPositive() && "Expect a positive value");
2095
2096	if (!HasSameSize) {
2097	LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2098	"different type sizes\n");
2099	return Dependence::Unknown;
2100	}
2101
2102	// Bail out early if passed-in parameters make vectorization not feasible.
2103	unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2104	VectorizerParams::VectorizationFactor : `1`);
2105	unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2106	VectorizerParams::VectorizationInterleave : `1`);
2107	// The minimum number of iterations for a vectorized/unrolled version.
2108	unsigned MinNumIter = std::max(a: ForcedFactor * ForcedUnroll, b: `2U`);
2109
2110	// It's not vectorizable if the distance is smaller than the minimum distance
2111	// needed for a vectroized/unrolled version. Vectorizing one iteration in
2112	// front needs TypeByteSize Stride. Vectorizing the last iteration needs*
2113	// TypeByteSize (No need to plus the last gap distance).
2114	//
2115	// E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2116	// foo(int A) {*
2117	// int B = (int )((char )A + 14);*
2118	// for (i = 0 ; i < 1024 ; i += 2)
2119	// B[i] = A[i] + 1;
2120	// }
2121	//
2122	// Two accesses in memory (stride is 2):
2123	// \| A[0] \| \| A[2] \| \| A[4] \| \| A[6] \| \|
2124	// \| B[0] \| \| B[2] \| \| B[4] \|
2125	//
2126	// Distance needs for vectorizing iterations except the last iteration:
2127	// 4 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.*
2128	// So the minimum distance needed is: 4 2 * (MinNumIter - 1) + 4.*
2129	//
2130	// If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2131	// 12, which is less than distance.
2132	//
2133	// If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2134	// the minimum distance needed is 28, which is greater than distance. It is
2135	// not safe to do vectorization.
2136	uint64_t MinDistanceNeeded =
2137	TypeByteSize * Stride * (MinNumIter - `1`) + TypeByteSize;
2138	if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
2139	LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
2140	<< Distance << `'\n'`);
2141	return Dependence::Backward;
2142	}
2143
2144	// Unsafe if the minimum distance needed is greater than smallest dependence
2145	// distance distance.
2146	if (MinDistanceNeeded > MinDepDistBytes) {
2147	LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2148	<< MinDistanceNeeded << " size in bytes\n");
2149	return Dependence::Backward;
2150	}
2151
2152	// Positive distance bigger than max vectorization factor.
2153	// FIXME: Should use max factor instead of max distance in bytes, which could
2154	// not handle different types.
2155	// E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2156	// void foo (int A, char B) {
2157	// for (unsigned i = 0; i < 1024; i++) {
2158	// A[i+2] = A[i] + 1;
2159	// B[i+2] = B[i] + 1;
2160	// }
2161	// }
2162	//
2163	// This case is currently unsafe according to the max safe distance. If we
2164	// analyze the two accesses on array B, the max safe dependence distance
2165	// is 2. Then we analyze the accesses on array A, the minimum distance needed
2166	// is 8, which is less than 2 and forbidden vectorization, But actually
2167	// both A and B could be vectorized by 2 iterations.
2168	MinDepDistBytes =
2169	std::min(a: static_cast<uint64_t>(Distance), b: MinDepDistBytes);
2170
2171	bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2172	uint64_t MinDepDistBytesOld = MinDepDistBytes;
2173	if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2174	couldPreventStoreLoadForward(Distance, TypeByteSize)) {
2175	// Sanity check that we didn't update MinDepDistBytes when calling
2176	// couldPreventStoreLoadForward
2177	assert(MinDepDistBytes == MinDepDistBytesOld &&
2178	"An update to MinDepDistBytes requires an update to "
2179	"MaxSafeVectorWidthInBits");
2180	(void)MinDepDistBytesOld;
2181	return Dependence::BackwardVectorizableButPreventsForwarding;
2182	}
2183
2184	// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2185	// since there is a backwards dependency.
2186	uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride);
2187	LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
2188	<< " with max VF = " << MaxVF << `'\n'`);
2189	uint64_t MaxVFInBits = MaxVF * TypeByteSize * `8`;
2190	MaxSafeVectorWidthInBits = std::min(a: MaxSafeVectorWidthInBits, b: MaxVFInBits);
2191	return Dependence::BackwardVectorizable;
2192	}
2193
2194	bool MemoryDepChecker::areDepsSafe(
2195	DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
2196	const DenseMap<Value , const* SCEV *> &Strides,
2197	const DenseMap<Value , SmallVector<const* Value *, `16`>>
2198	&UnderlyingObjects) {
2199
2200	MinDepDistBytes = -`1`;
2201	SmallPtrSet<MemAccessInfo, `8`> Visited;
2202	for (MemAccessInfo CurAccess : CheckDeps) {
2203	if (Visited.count(Ptr: CurAccess))
2204	continue;
2205
2206	// Get the relevant memory access set.
2207	EquivalenceClasses<MemAccessInfo>::iterator I =
2208	AccessSets.findValue(V: AccessSets.getLeaderValue(V: CurAccess));
2209
2210	// Check accesses within this set.
2211	EquivalenceClasses<MemAccessInfo>::member_iterator AI =
2212	AccessSets.member_begin(I);
2213	EquivalenceClasses<MemAccessInfo>::member_iterator AE =
2214	AccessSets.member_end();
2215
2216	// Check every access pair.
2217	while (AI != AE) {
2218	Visited.insert(Ptr: *AI);
2219	bool AIIsWrite = AI ->getInt();
2220	// Check loads only against next equivalent class, but stores also against
2221	// other stores in the same equivalence class - to the same address.
2222	EquivalenceClasses<MemAccessInfo>::member_iterator OI =
2223	(AIIsWrite ? AI : std::next(x: AI));
2224	while (OI != AE) {
2225	// Check every accessing instruction pair in program order.
2226	for (std::vector<unsigned>::iterator I1 = Accesses [*AI].begin(),
2227	I1E = Accesses [*AI].end(); I1 != I1E; ++I1)
2228	// Scan all accesses of another equivalence class, but only the next
2229	// accesses of the same equivalent class.
2230	for (std::vector<unsigned>::iterator
2231	I2 = (OI == AI ? std::next(x: I1) : Accesses [*OI].begin()),
2232	I2E = (OI == AI ? I1E : Accesses [*OI].end());
2233	I2 != I2E; ++I2) {
2234	auto A = std::make_pair(x: &AI, y&: I1);
2235	auto B = std::make_pair(x: &OI, y&: I2);
2236
2237	assert(I1 != I2);
2238	if (I1 > I2)
2239	std::swap(x&: A, y&: B);
2240
2241	Dependence::DepType Type =
2242	isDependent(A: A.first, AIdx: A.second, B: B.first, BIdx: B.second, Strides,
2243	UnderlyingObjects);
2244	mergeInStatus(S: Dependence::isSafeForVectorization(Type));
2245
2246	// Gather dependences unless we accumulated MaxDependences
2247	// dependences. In that case return as soon as we find the first
2248	// unsafe dependence. This puts a limit on this quadratic
2249	// algorithm.
2250	if (RecordDependences) {
2251	if (Type != Dependence::NoDep)
2252	Dependences.push_back(Elt: Dependence (A.second, B.second, Type));
2253
2254	if (Dependences.size() >= MaxDependences) {
2255	RecordDependences = false;
2256	Dependences.clear();
2257	LLVM_DEBUG(dbgs()
2258	<< "Too many dependences, stopped recording\n");
2259	}
2260	}
2261	if (!RecordDependences && !isSafeForVectorization())
2262	return false;
2263	}
2264	++OI;
2265	}
2266	AI ++;
2267	}
2268	}
2269
2270	LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2271	return isSafeForVectorization();
2272	}
2273
2274	SmallVector<Instruction *, `4`>
2275	MemoryDepChecker::getInstructionsForAccess(Value Ptr, bool* isWrite) const {
2276	MemAccessInfo Access(Ptr, isWrite);
2277	auto &IndexVector = Accesses.find(Val: Access)->second;
2278
2279	SmallVector<Instruction *, `4`> Insts;
2280	transform(Range: IndexVector,
2281	d_first: std::back_inserter(x&: Insts),
2282	F: [&](unsigned Idx) { return this->InstMap [Idx]; });
2283	return Insts;
2284	}
2285
2286	const char *MemoryDepChecker::Dependence::DepName[] = {
2287	"NoDep",
2288	"Unknown",
2289	"IndirectUnsafe",
2290	"Forward",
2291	"ForwardButPreventsForwarding",
2292	"Backward",
2293	"BackwardVectorizable",
2294	"BackwardVectorizableButPreventsForwarding"};
2295
2296	void MemoryDepChecker::Dependence::print(
2297	raw_ostream &OS, unsigned Depth,
2298	const SmallVectorImpl<Instruction > &Instrs) const* {
2299	OS.indent(NumSpaces: Depth) << DepName[Type] << ":\n";
2300	OS.indent(NumSpaces: Depth + `2`) << *Instrs [Source] << " -> \n";
2301	OS.indent(NumSpaces: Depth + `2`) << *Instrs [Destination] << "\n";
2302	}
2303
2304	bool LoopAccessInfo::canAnalyzeLoop() {
2305	// We need to have a loop header.
2306	LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
2307	<< TheLoop->getHeader()->getParent()->getName() << ": "
2308	<< TheLoop->getHeader()->getName() << `'\n'`);
2309
2310	// We can only analyze innermost loops.
2311	if (!TheLoop->isInnermost()) {
2312	LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2313	recordAnalysis(RemarkName: "NotInnerMostLoop") << "loop is not the innermost loop";
2314	return false;
2315	}
2316
2317	// We must have a single backedge.
2318	if (TheLoop->getNumBackEdges() != `1`) {
2319	LLVM_DEBUG(
2320	dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2321	recordAnalysis(RemarkName: "CFGNotUnderstood")
2322	<< "loop control flow is not understood by analyzer";
2323	return false;
2324	}
2325
2326	// ScalarEvolution needs to be able to find the exit count.
2327	const SCEV *ExitCount = PSE ->getBackedgeTakenCount();
2328	if (isa<SCEVCouldNotCompute>(Val: ExitCount)) {
2329	recordAnalysis(RemarkName: "CantComputeNumberOfIterations")
2330	<< "could not determine number of loop iterations";
2331	LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2332	return false;
2333	}
2334
2335	return true;
2336	}
2337
2338	void LoopAccessInfo::analyzeLoop(AAResults AA, LoopInfo LI,
2339	const TargetLibraryInfo *TLI,
2340	DominatorTree *DT) {
2341	// Holds the Load and Store instructions.
2342	SmallVector<LoadInst *, `16`> Loads;
2343	SmallVector<StoreInst *, `16`> Stores;
2344	SmallPtrSet<MDNode *, `8`> LoopAliasScopes;
2345
2346	// Holds all the different accesses in the loop.
2347	unsigned NumReads = `0`;
2348	unsigned NumReadWrites = `0`;
2349
2350	bool HasComplexMemInst = false;
2351
2352	// A runtime check is only legal to insert if there are no convergent calls.
2353	HasConvergentOp = false;
2354
2355	PtrRtChecking ->Pointers.clear();
2356	PtrRtChecking ->Need = false;
2357
2358	const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2359
2360	const bool EnableMemAccessVersioningOfLoop =
2361	EnableMemAccessVersioning &&
2362	!TheLoop->getHeader()->getParent()->hasOptSize();
2363
2364	// Traverse blocks in fixed RPOT order, regardless of their storage in the
2365	// loop info, as it may be arbitrary.
2366	LoopBlocksRPO RPOT(TheLoop);
2367	RPOT.perform(LI);
2368	for (BasicBlock *BB : RPOT) {
2369	// Scan the BB and collect legal loads and stores. Also detect any
2370	// convergent instructions.
2371	for (Instruction &I : *BB) {
2372	if (auto *Call = dyn_cast<CallBase>(Val: &I)) {
2373	if (Call->isConvergent())
2374	HasConvergentOp = true;
2375	}
2376
2377	// With both a non-vectorizable memory instruction and a convergent
2378	// operation, found in this loop, no reason to continue the search.
2379	if (HasComplexMemInst && HasConvergentOp) {
2380	CanVecMem = false;
2381	return;
2382	}
2383
2384	// Avoid hitting recordAnalysis multiple times.
2385	if (HasComplexMemInst)
2386	continue;
2387
2388	// Record alias scopes defined inside the loop.
2389	if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(Val: &I))
2390	for (Metadata *Op : Decl->getScopeList()->operands())
2391	LoopAliasScopes.insert(Ptr: cast<MDNode>(Val: Op));
2392
2393	// Many math library functions read the rounding mode. We will only
2394	// vectorize a loop if it contains known function calls that don't set
2395	// the flag. Therefore, it is safe to ignore this read from memory.
2396	auto *Call = dyn_cast<CallInst>(Val: &I);
2397	if (Call && getVectorIntrinsicIDForCall(CI: Call, TLI))
2398	continue;
2399
2400	// If this is a load, save it. If this instruction can read from memory
2401	// but is not a load, then we quit. Notice that we don't handle function
2402	// calls that read or write.
2403	if (I.mayReadFromMemory()) {
2404	// If the function has an explicit vectorized counterpart, we can safely
2405	// assume that it can be vectorized.
2406	if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2407	!VFDatabase::getMappings(CI: *Call).empty())
2408	continue;
2409
2410	auto *Ld = dyn_cast<LoadInst>(Val: &I);
2411	if (!Ld) {
2412	recordAnalysis(RemarkName: "CantVectorizeInstruction", Instr: Ld)
2413	<< "instruction cannot be vectorized";
2414	HasComplexMemInst = true;
2415	continue;
2416	}
2417	if (!Ld->isSimple() && !IsAnnotatedParallel) {
2418	recordAnalysis(RemarkName: "NonSimpleLoad", Instr: Ld)
2419	<< "read with atomic ordering or volatile read";
2420	LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2421	HasComplexMemInst = true;
2422	continue;
2423	}
2424	NumLoads++;
2425	Loads.push_back(Elt: Ld);
2426	DepChecker ->addAccess(LI: Ld);
2427	if (EnableMemAccessVersioningOfLoop)
2428	collectStridedAccess(LoadOrStoreInst: Ld);
2429	continue;
2430	}
2431
2432	// Save 'store' instructions. Abort if other instructions write to memory.
2433	if (I.mayWriteToMemory()) {
2434	auto *St = dyn_cast<StoreInst>(Val: &I);
2435	if (!St) {
2436	recordAnalysis(RemarkName: "CantVectorizeInstruction", Instr: St)
2437	<< "instruction cannot be vectorized";
2438	HasComplexMemInst = true;
2439	continue;
2440	}
2441	if (!St->isSimple() && !IsAnnotatedParallel) {
2442	recordAnalysis(RemarkName: "NonSimpleStore", Instr: St)
2443	<< "write with atomic ordering or volatile write";
2444	LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2445	HasComplexMemInst = true;
2446	continue;
2447	}
2448	NumStores++;
2449	Stores.push_back(Elt: St);
2450	DepChecker ->addAccess(SI: St);
2451	if (EnableMemAccessVersioningOfLoop)
2452	collectStridedAccess(LoadOrStoreInst: St);
2453	}
2454	} // Next instr.
2455	} // Next block.
2456
2457	if (HasComplexMemInst) {
2458	CanVecMem = false;
2459	return;
2460	}
2461
2462	// Now we have two lists that hold the loads and the stores.
2463	// Next, we find the pointers that they use.
2464
2465	// Check if we see any stores. If there are no stores, then we don't
2466	// care if the pointers are restrict.
2467	if (!Stores.size()) {
2468	LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2469	CanVecMem = true;
2470	return;
2471	}
2472
2473	MemoryDepChecker::DepCandidates DependentAccesses;
2474	AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
2475	LoopAliasScopes);
2476
2477	// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2478	// multiple times on the same object. If the ptr is accessed twice, once
2479	// for read and once for write, it will only appear once (on the write
2480	// list). This is okay, since we are going to check for conflicts between
2481	// writes and between reads and writes, but not between reads and reads.
2482	SmallSet<std::pair<Value , Type >, `16`> Seen;
2483
2484	// Record uniform store addresses to identify if we have multiple stores
2485	// to the same address.
2486	SmallPtrSet<Value *, `16`> UniformStores;
2487
2488	for (StoreInst *ST : Stores) {
2489	Value *Ptr = ST->getPointerOperand();
2490
2491	if (isInvariant(V: Ptr)) {
2492	// Record store instructions to loop invariant addresses
2493	StoresToInvariantAddresses.push_back(Elt: ST);
2494	HasDependenceInvolvingLoopInvariantAddress \|=
2495	!UniformStores.insert(Ptr).second;
2496	}
2497
2498	// If we did not* see this pointer before, insert it to the read-write*
2499	// list. At this phase it is only a 'write' list.
2500	Type *AccessTy = getLoadStoreType(I: ST);
2501	if (Seen.insert(V: {Ptr, AccessTy}).second) {
2502	++NumReadWrites;
2503
2504	MemoryLocation Loc = MemoryLocation::get(SI: ST);
2505	// The TBAA metadata could have a control dependency on the predication
2506	// condition, so we cannot rely on it when determining whether or not we
2507	// need runtime pointer checks.
2508	if (blockNeedsPredication(BB: ST->getParent(), TheLoop, DT))
2509	Loc.AATags.TBAA = nullptr;
2510
2511	visitPointers(StartPtr: const_cast<Value >(Loc.Ptr), InnermostLoop: TheLoop,
2512	AddPointer: [&Accesses, AccessTy, Loc](Value *Ptr) {
2513	MemoryLocation NewLoc = Loc.getWithNewPtr(NewPtr: Ptr);
2514	Accesses.addStore(Loc&: NewLoc, AccessTy);
2515	});
2516	}
2517	}
2518
2519	if (IsAnnotatedParallel) {
2520	LLVM_DEBUG(
2521	dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2522	<< "checks.\n");
2523	CanVecMem = true;
2524	return;
2525	}
2526
2527	for (LoadInst *LD : Loads) {
2528	Value *Ptr = LD->getPointerOperand();
2529	// If we did not* see this pointer before, insert it to the*
2530	// read list. If we did* see it before, then it is already in*
2531	// the read-write list. This allows us to vectorize expressions
2532	// such as A[i] += x; Because the address of A[i] is a read-write
2533	// pointer. This only works if the index of A[i] is consecutive.
2534	// If the address of i is unknown (for example A[B[i]]) then we may
2535	// read a few words, modify, and write a few words, and some of the
2536	// words may be written to the same address.
2537	bool IsReadOnlyPtr = false;
2538	Type *AccessTy = getLoadStoreType(I: LD);
2539	if (Seen.insert(V: {Ptr, AccessTy}).second \|\|
2540	!getPtrStride(PSE&: *PSE, AccessTy: LD->getType(), Ptr, Lp: TheLoop, StridesMap: SymbolicStrides).value_or(u: `0`)) {
2541	++NumReads;
2542	IsReadOnlyPtr = true;
2543	}
2544
2545	// See if there is an unsafe dependency between a load to a uniform address and
2546	// store to the same uniform address.
2547	if (UniformStores.count(Ptr)) {
2548	LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2549	"load and uniform store to the same address!\n");
2550	HasDependenceInvolvingLoopInvariantAddress = true;
2551	}
2552
2553	MemoryLocation Loc = MemoryLocation::get(LI: LD);
2554	// The TBAA metadata could have a control dependency on the predication
2555	// condition, so we cannot rely on it when determining whether or not we
2556	// need runtime pointer checks.
2557	if (blockNeedsPredication(BB: LD->getParent(), TheLoop, DT))
2558	Loc.AATags.TBAA = nullptr;
2559
2560	visitPointers(StartPtr: const_cast<Value >(Loc.Ptr), InnermostLoop: TheLoop,
2561	AddPointer: [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2562	MemoryLocation NewLoc = Loc.getWithNewPtr(NewPtr: Ptr);
2563	Accesses.addLoad(Loc&: NewLoc, AccessTy, IsReadOnly: IsReadOnlyPtr);
2564	});
2565	}
2566
2567	// If we write (or read-write) to a single destination and there are no
2568	// other reads in this loop then is it safe to vectorize.
2569	if (NumReadWrites == `1` && NumReads == `0`) {
2570	LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2571	CanVecMem = true;
2572	return;
2573	}
2574
2575	// Build dependence sets and check whether we need a runtime pointer bounds
2576	// check.
2577	Accesses.buildDependenceSets();
2578
2579	// Find pointers with computable bounds. We are going to use this information
2580	// to place a runtime bound check.
2581	Value UncomputablePtr = nullptr*;
2582	bool CanDoRTIfNeeded =
2583	Accesses.canCheckPtrAtRT(RtCheck&: *PtrRtChecking, SE: PSE ->getSE(), TheLoop,
2584	StridesMap: SymbolicStrides, UncomputablePtr, ShouldCheckWrap: false);
2585	if (!CanDoRTIfNeeded) {
2586	auto *I = dyn_cast_or_null<Instruction>(Val: UncomputablePtr);
2587	recordAnalysis(RemarkName: "CantIdentifyArrayBounds", Instr: I)
2588	<< "cannot identify array bounds";
2589	LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2590	<< "the array bounds.\n");
2591	CanVecMem = false;
2592	return;
2593	}
2594
2595	LLVM_DEBUG(
2596	dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2597
2598	CanVecMem = true;
2599	if (Accesses.isDependencyCheckNeeded()) {
2600	LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2601	CanVecMem = DepChecker ->areDepsSafe(
2602	AccessSets&: DependentAccesses, CheckDeps&: Accesses.getDependenciesToCheck(), Strides: SymbolicStrides,
2603	UnderlyingObjects: Accesses.getUnderlyingObjects());
2604
2605	if (!CanVecMem && DepChecker ->shouldRetryWithRuntimeCheck()) {
2606	LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2607
2608	// Clear the dependency checks. We assume they are not needed.
2609	Accesses.resetDepChecks(DepChecker&: *DepChecker);
2610
2611	PtrRtChecking ->reset();
2612	PtrRtChecking ->Need = true;
2613
2614	auto *SE = PSE ->getSE();
2615	UncomputablePtr = nullptr;
2616	CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2617	RtCheck&: PtrRtChecking, SE, TheLoop, StridesMap: SymbolicStrides, UncomputablePtr, ShouldCheckWrap: true*);
2618
2619	// Check that we found the bounds for the pointer.
2620	if (!CanDoRTIfNeeded) {
2621	auto *I = dyn_cast_or_null<Instruction>(Val: UncomputablePtr);
2622	recordAnalysis(RemarkName: "CantCheckMemDepsAtRunTime", Instr: I)
2623	<< "cannot check memory dependencies at runtime";
2624	LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2625	CanVecMem = false;
2626	return;
2627	}
2628
2629	CanVecMem = true;
2630	}
2631	}
2632
2633	if (HasConvergentOp) {
2634	recordAnalysis(RemarkName: "CantInsertRuntimeCheckWithConvergent")
2635	<< "cannot add control dependency to convergent operation";
2636	LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2637	"would be needed with a convergent operation\n");
2638	CanVecMem = false;
2639	return;
2640	}
2641
2642	if (CanVecMem)
2643	LLVM_DEBUG(
2644	dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2645	<< (PtrRtChecking ->Need ? "" : " don't")
2646	<< " need runtime memory checks.\n");
2647	else
2648	emitUnsafeDependenceRemark();
2649	}
2650
2651	void LoopAccessInfo::emitUnsafeDependenceRemark() {
2652	auto Deps = getDepChecker().getDependences();
2653	if (!Deps)
2654	return;
2655	auto Found = llvm::find_if(Range: Deps, P: [](const* MemoryDepChecker::Dependence &D) {
2656	return MemoryDepChecker::Dependence::isSafeForVectorization(Type: D.Type) !=
2657	MemoryDepChecker::VectorizationSafetyStatus::Safe;
2658	});
2659	if (Found == Deps->end())
2660	return;
2661	MemoryDepChecker::Dependence Dep = *Found;
2662
2663	LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2664
2665	// Emit remark for first unsafe dependence
2666	bool HasForcedDistribution = false;
2667	std::optional<const MDOperand *> Value =
2668	findStringMetadataForLoop(TheLoop, Name: "llvm.loop.distribute.enable");
2669	if (Value) {
2670	const MDOperand Op = Value;
2671	assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2672	HasForcedDistribution = mdconst::extract<ConstantInt>(MD: *Op)->getZExtValue();
2673	}
2674
2675	const std::string Info =
2676	HasForcedDistribution
2677	? "unsafe dependent memory operations in loop."
2678	: "unsafe dependent memory operations in loop. Use "
2679	"#pragma clang loop distribute(enable) to allow loop distribution "
2680	"to attempt to isolate the offending operations into a separate "
2681	"loop";
2682	OptimizationRemarkAnalysis &R =
2683	recordAnalysis(RemarkName: "UnsafeDep", Instr: Dep.getDestination(LAI: *this)) << Info;
2684
2685	switch (Dep.Type) {
2686	case MemoryDepChecker::Dependence::NoDep:
2687	case MemoryDepChecker::Dependence::Forward:
2688	case MemoryDepChecker::Dependence::BackwardVectorizable:
2689	llvm_unreachable("Unexpected dependence");
2690	case MemoryDepChecker::Dependence::Backward:
2691	R << "\nBackward loop carried data dependence.";
2692	break;
2693	case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
2694	R << "\nForward loop carried data dependence that prevents "
2695	"store-to-load forwarding.";
2696	break;
2697	case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
2698	R << "\nBackward loop carried data dependence that prevents "
2699	"store-to-load forwarding.";
2700	break;
2701	case MemoryDepChecker::Dependence::IndirectUnsafe:
2702	R << "\nUnsafe indirect dependence.";
2703	break;
2704	case MemoryDepChecker::Dependence::Unknown:
2705	R << "\nUnknown data dependence.";
2706	break;
2707	}
2708
2709	if (Instruction I = Dep.getSource(LAI: this)) {
2710	DebugLoc SourceLoc = I->getDebugLoc();
2711	if (auto *DD = dyn_cast_or_null<Instruction>(Val: getPointerOperand(V: I)))
2712	SourceLoc = DD->getDebugLoc();
2713	if (SourceLoc)
2714	R << " Memory location is the same as accessed at "
2715	<< ore::NV ("Location", SourceLoc);
2716	}
2717	}
2718
2719	bool LoopAccessInfo::blockNeedsPredication(BasicBlock BB, Loop TheLoop,
2720	DominatorTree *DT) {
2721	assert(TheLoop->contains(BB) && "Unknown block used");
2722
2723	// Blocks that do not dominate the latch need predication.
2724	BasicBlock* Latch = TheLoop->getLoopLatch();
2725	return !DT->dominates(A: BB, B: Latch);
2726	}
2727
2728	OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2729	Instruction *I) {
2730	assert(!Report && "Multiple reports generated");
2731
2732	Value *CodeRegion = TheLoop->getHeader();
2733	DebugLoc DL = TheLoop->getStartLoc();
2734
2735	if (I) {
2736	CodeRegion = I->getParent();
2737	// If there is no debug location attached to the instruction, revert back to
2738	// using the loop's.
2739	if (I->getDebugLoc())
2740	DL = I->getDebugLoc();
2741	}
2742
2743	Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, args&: RemarkName, args&: DL,
2744	args&: CodeRegion);
2745	return *Report;
2746	}
2747
2748	bool LoopAccessInfo::isInvariant(Value V) const* {
2749	auto *SE = PSE ->getSE();
2750	// TODO: Is this really what we want? Even without FP SCEV, we may want some
2751	// trivially loop-invariant FP values to be considered invariant.
2752	if (!SE->isSCEVable(Ty: V->getType()))
2753	return false;
2754	const SCEV *S = SE->getSCEV(V);
2755	return SE->isLoopInvariant(S, L: TheLoop);
2756	}
2757
2758	/// Find the operand of the GEP that should be checked for consecutive
2759	/// stores. This ignores trailing indices that have no effect on the final
2760	/// pointer.
2761	static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
2762	const DataLayout &DL = Gep->getModule()->getDataLayout();
2763	unsigned LastOperand = Gep->getNumOperands() - `1`;
2764	TypeSize GEPAllocSize = DL.getTypeAllocSize(Ty: Gep->getResultElementType());
2765
2766	// Walk backwards and try to peel off zeros.
2767	while (LastOperand > `1` && match(V: Gep->getOperand(i_nocapture: LastOperand), P: m_Zero())) {
2768	// Find the type we're currently indexing into.
2769	gep_type_iterator GEPTI = gep_type_begin(GEP: Gep);
2770	std::advance(i&: GEPTI, n: LastOperand - `2`);
2771
2772	// If it's a type with the same allocation size as the result of the GEP we
2773	// can peel off the zero index.
2774	TypeSize ElemSize = GEPTI.isStruct()
2775	? DL.getTypeAllocSize(Ty: GEPTI.getIndexedType())
2776	: GEPTI.getSequentialElementStride(DL);
2777	if (ElemSize != GEPAllocSize)
2778	break;
2779	--LastOperand;
2780	}
2781
2782	return LastOperand;
2783	}
2784
2785	/// If the argument is a GEP, then returns the operand identified by
2786	/// getGEPInductionOperand. However, if there is some other non-loop-invariant
2787	/// operand, it returns that instead.
2788	static Value stripGetElementPtr(Value Ptr, ScalarEvolution SE, Loop Lp) {
2789	GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
2790	if (!GEP)
2791	return Ptr;
2792
2793	unsigned InductionOperand = getGEPInductionOperand(Gep: GEP);
2794
2795	// Check that all of the gep indices are uniform except for our induction
2796	// operand.
2797	for (unsigned i = `0`, e = GEP->getNumOperands(); i != e; ++i)
2798	if (i != InductionOperand &&
2799	!SE->isLoopInvariant(S: SE->getSCEV(V: GEP->getOperand(i_nocapture: i)), L: Lp))
2800	return Ptr;
2801	return GEP->getOperand(i_nocapture: InductionOperand);
2802	}
2803
2804	/// If a value has only one user that is a CastInst, return it.
2805	static Value getUniqueCastUse(Value Ptr, Loop Lp, Type Ty) {
2806	Value UniqueCast = nullptr*;
2807	for (User *U : Ptr->users()) {
2808	CastInst *CI = dyn_cast<CastInst>(Val: U);
2809	if (CI && CI->getType() == Ty) {
2810	if (!UniqueCast)
2811	UniqueCast = CI;
2812	else
2813	return nullptr;
2814	}
2815	}
2816	return UniqueCast;
2817	}
2818
2819	/// Get the stride of a pointer access in a loop. Looks for symbolic
2820	/// strides "a[istride]". Returns the symbolic stride, or null otherwise.*
2821	static const SCEV getStrideFromPointer(Value Ptr, ScalarEvolution SE, Loop Lp) {
2822	auto *PtrTy = dyn_cast<PointerType>(Val: Ptr->getType());
2823	if (!PtrTy \|\| PtrTy->isAggregateType())
2824	return nullptr;
2825
2826	// Try to remove a gep instruction to make the pointer (actually index at this
2827	// point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
2828	// pointer, otherwise, we are analyzing the index.
2829	Value *OrigPtr = Ptr;
2830
2831	// The size of the pointer access.
2832	int64_t PtrAccessSize = `1`;
2833
2834	Ptr = stripGetElementPtr(Ptr, SE, Lp);
2835	const SCEV *V = SE->getSCEV(V: Ptr);
2836
2837	if (Ptr != OrigPtr)
2838	// Strip off casts.
2839	while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(Val: V))
2840	V = C->getOperand();
2841
2842	const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(Val: V);
2843	if (!S)
2844	return nullptr;
2845
2846	// If the pointer is invariant then there is no stride and it makes no
2847	// sense to add it here.
2848	if (Lp != S->getLoop())
2849	return nullptr;
2850
2851	V = S->getStepRecurrence(SE&: *SE);
2852	if (!V)
2853	return nullptr;
2854
2855	// Strip off the size of access multiplication if we are still analyzing the
2856	// pointer.
2857	if (OrigPtr == Ptr) {
2858	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Val: V)) {
2859	if (M->getOperand(i: `0`)->getSCEVType() != scConstant)
2860	return nullptr;
2861
2862	const APInt &APStepVal = cast<SCEVConstant>(Val: M->getOperand(i: `0`))->getAPInt();
2863
2864	// Huge step value - give up.
2865	if (APStepVal.getBitWidth() > `64`)
2866	return nullptr;
2867
2868	int64_t StepVal = APStepVal.getSExtValue();
2869	if (PtrAccessSize != StepVal)
2870	return nullptr;
2871	V = M->getOperand(i: `1`);
2872	}
2873	}
2874
2875	// Note that the restriction after this loop invariant check are only
2876	// profitability restrictions.
2877	if (!SE->isLoopInvariant(S: V, L: Lp))
2878	return nullptr;
2879
2880	// Look for the loop invariant symbolic value.
2881	const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Val: V);
2882	if (!U) {
2883	const auto *C = dyn_cast<SCEVIntegralCastExpr>(Val: V);
2884	if (!C)
2885	return nullptr;
2886	U = dyn_cast<SCEVUnknown>(Val: C->getOperand());
2887	if (!U)
2888	return nullptr;
2889
2890	// Match legacy behavior - this is not needed for correctness
2891	if (!getUniqueCastUse(Ptr: U->getValue(), Lp, Ty: V->getType()))
2892	return nullptr;
2893	}
2894
2895	return V;
2896	}
2897
2898	void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2899	Value *Ptr = getLoadStorePointerOperand(V: MemAccess);
2900	if (!Ptr)
2901	return;
2902
2903	// Note: getStrideFromPointer is a profitability* heuristic. We*
2904	// could broaden the scope of values returned here - to anything
2905	// which happens to be loop invariant and contributes to the
2906	// computation of an interesting IV - but we chose not to as we
2907	// don't have a cost model here, and broadening the scope exposes
2908	// far too many unprofitable cases.
2909	const SCEV *StrideExpr = getStrideFromPointer(Ptr, SE: PSE ->getSE(), Lp: TheLoop);
2910	if (!StrideExpr)
2911	return;
2912
2913	LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2914	"versioning:");
2915	LLVM_DEBUG(dbgs() << " Ptr: " << Ptr << " Stride: " << StrideExpr << "\n");
2916
2917	if (!SpeculateUnitStride) {
2918	LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
2919	return;
2920	}
2921
2922	// Avoid adding the "Stride == 1" predicate when we know that
2923	// Stride >= Trip-Count. Such a predicate will effectively optimize a single
2924	// or zero iteration loop, as Trip-Count <= Stride == 1.
2925	//
2926	// TODO: We are currently not making a very informed decision on when it is
2927	// beneficial to apply stride versioning. It might make more sense that the
2928	// users of this analysis (such as the vectorizer) will trigger it, based on
2929	// their specific cost considerations; For example, in cases where stride
2930	// versioning does not help resolving memory accesses/dependences, the
2931	// vectorizer should evaluate the cost of the runtime test, and the benefit
2932	// of various possible stride specializations, considering the alternatives
2933	// of using gather/scatters (if available).
2934
2935	const SCEV *BETakenCount = PSE ->getBackedgeTakenCount();
2936
2937	// Match the types so we can compare the stride and the BETakenCount.
2938	// The Stride can be positive/negative, so we sign extend Stride;
2939	// The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2940	const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2941	uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(Ty: StrideExpr->getType());
2942	uint64_t BETypeSizeBits = DL.getTypeSizeInBits(Ty: BETakenCount->getType());
2943	const SCEV *CastedStride = StrideExpr;
2944	const SCEV *CastedBECount = BETakenCount;
2945	ScalarEvolution *SE = PSE ->getSE();
2946	if (BETypeSizeBits >= StrideTypeSizeBits)
2947	CastedStride = SE->getNoopOrSignExtend(V: StrideExpr, Ty: BETakenCount->getType());
2948	else
2949	CastedBECount = SE->getZeroExtendExpr(Op: BETakenCount, Ty: StrideExpr->getType());
2950	const SCEV *StrideMinusBETaken = SE->getMinusSCEV(LHS: CastedStride, RHS: CastedBECount);
2951	// Since TripCount == BackEdgeTakenCount + 1, checking:
2952	// "Stride >= TripCount" is equivalent to checking:
2953	// Stride - BETakenCount > 0
2954	if (SE->isKnownPositive(S: StrideMinusBETaken)) {
2955	LLVM_DEBUG(
2956	dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2957	"Stride==1 predicate will imply that the loop executes "
2958	"at most once.\n");
2959	return;
2960	}
2961	LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2962
2963	// Strip back off the integer cast, and check that our result is a
2964	// SCEVUnknown as we expect.
2965	const SCEV *StrideBase = StrideExpr;
2966	if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(Val: StrideBase))
2967	StrideBase = C->getOperand();
2968	SymbolicStrides [Ptr] = cast<SCEVUnknown>(Val: StrideBase);
2969	}
2970
2971	LoopAccessInfo::LoopAccessInfo(Loop L, ScalarEvolution SE,
2972	const TargetLibraryInfo TLI, AAResults AA,
2973	DominatorTree DT, LoopInfo LI)
2974	: PSE(std::make_unique<PredicatedScalarEvolution>(args&: SE, args&: L)),
2975	PtrRtChecking (nullptr),
2976	DepChecker(std::make_unique<MemoryDepChecker>(args&: *PSE, args&: L)), TheLoop(L) {
2977	PtrRtChecking = std::make_unique<RuntimePointerChecking>(args&: *DepChecker, args&: SE);
2978	if (canAnalyzeLoop()) {
2979	analyzeLoop(AA, LI, TLI, DT);
2980	}
2981	}
2982
2983	void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2984	if (CanVecMem) {
2985	OS.indent(NumSpaces: Depth) << "Memory dependences are safe";
2986	const MemoryDepChecker &DC = getDepChecker();
2987	if (!DC.isSafeForAnyVectorWidth())
2988	OS << " with a maximum safe vector width of "
2989	<< DC.getMaxSafeVectorWidthInBits() << " bits";
2990	if (PtrRtChecking ->Need)
2991	OS << " with run-time checks";
2992	OS << "\n";
2993	}
2994
2995	if (HasConvergentOp)
2996	OS.indent(NumSpaces: Depth) << "Has convergent operation in loop\n";
2997
2998	if (Report)
2999	OS.indent(NumSpaces: Depth) << "Report: " << Report ->getMsg() << "\n";
3000
3001	if (auto *Dependences = DepChecker ->getDependences()) {
3002	OS.indent(NumSpaces: Depth) << "Dependences:\n";
3003	for (const auto &Dep : *Dependences) {
3004	Dep.print(OS, Depth: Depth + `2`, Instrs: DepChecker ->getMemoryInstructions());
3005	OS << "\n";
3006	}
3007	} else
3008	OS.indent(NumSpaces: Depth) << "Too many dependences, not recorded\n";
3009
3010	// List the pair of accesses need run-time checks to prove independence.
3011	PtrRtChecking ->print(OS, Depth);
3012	OS << "\n";
3013
3014	OS.indent(NumSpaces: Depth) << "Non vectorizable stores to invariant address were "
3015	<< (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
3016	<< "found in loop.\n";
3017
3018	OS.indent(NumSpaces: Depth) << "SCEV assumptions:\n";
3019	PSE ->getPredicate().print(OS, Depth);
3020
3021	OS << "\n";
3022
3023	OS.indent(NumSpaces: Depth) << "Expressions re-written:\n";
3024	PSE ->print(OS, Depth);
3025	}
3026
3027	const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3028	auto I = LoopAccessInfoMap.insert(KV: {&L, nullptr});
3029
3030	if (I.second)
3031	I.first ->second =
3032	std::make_unique<LoopAccessInfo>(args: &L, args: &SE, args&: TLI, args: &AA, args: &DT, args: &LI);
3033
3034	return *I.first ->second;
3035	}
3036
3037	bool LoopAccessInfoManager::invalidate(
3038	Function &F, const PreservedAnalyses &PA,
3039	FunctionAnalysisManager::Invalidator &Inv) {
3040	// Check whether our analysis is preserved.
3041	auto PAC = PA.getChecker<LoopAccessAnalysis>();
3042	if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3043	// If not, give up now.
3044	return true;
3045
3046	// Check whether the analyses we depend on became invalid for any reason.
3047	// Skip checking TargetLibraryAnalysis as it is immutable and can't become
3048	// invalid.
3049	return Inv.invalidate<AAManager>(IR&: F, PA) \|\|
3050	Inv.invalidate<ScalarEvolutionAnalysis>(IR&: F, PA) \|\|
3051	Inv.invalidate<LoopAnalysis>(IR&: F, PA) \|\|
3052	Inv.invalidate<DominatorTreeAnalysis>(IR&: F, PA);
3053	}
3054
3055	LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3056	FunctionAnalysisManager &FAM) {
3057	auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
3058	auto &AA = FAM.getResult<AAManager>(IR&: F);
3059	auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F);
3060	auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
3061	auto &TLI = FAM.getResult<TargetLibraryAnalysis>(IR&: F);
3062	return LoopAccessInfoManager (SE, AA, DT, LI, &TLI);
3063	}
3064
3065	AnalysisKey LoopAccessAnalysis::Key;
3066

source code of llvm/lib/Analysis/LoopAccessAnalysis.cpp