ScopBuilder.cpp source code [polly/lib/Analysis/ScopBuilder.cpp]

1	//===- ScopBuilder.cpp ----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Create a polyhedral description for a static control flow region.
10	//
11	// The pass creates a polyhedral description of the Scops detected by the SCoP
12	// detection derived from their LLVM-IR code.
13	//
14	//===----------------------------------------------------------------------===//
15
16	#include "polly/ScopBuilder.h"
17	#include "polly/Options.h"
18	#include "polly/ScopDetection.h"
19	#include "polly/ScopInfo.h"
20	#include "polly/Support/GICHelper.h"
21	#include "polly/Support/ISLTools.h"
22	#include "polly/Support/SCEVValidator.h"
23	#include "polly/Support/ScopHelper.h"
24	#include "polly/Support/VirtualInstruction.h"
25	#include "llvm/ADT/ArrayRef.h"
26	#include "llvm/ADT/EquivalenceClasses.h"
27	#include "llvm/ADT/PostOrderIterator.h"
28	#include "llvm/ADT/Sequence.h"
29	#include "llvm/ADT/SmallSet.h"
30	#include "llvm/ADT/Statistic.h"
31	#include "llvm/Analysis/AliasAnalysis.h"
32	#include "llvm/Analysis/AssumptionCache.h"
33	#include "llvm/Analysis/Delinearization.h"
34	#include "llvm/Analysis/Loads.h"
35	#include "llvm/Analysis/LoopInfo.h"
36	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37	#include "llvm/Analysis/RegionInfo.h"
38	#include "llvm/Analysis/RegionIterator.h"
39	#include "llvm/Analysis/ScalarEvolution.h"
40	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41	#include "llvm/IR/BasicBlock.h"
42	#include "llvm/IR/DataLayout.h"
43	#include "llvm/IR/DebugLoc.h"
44	#include "llvm/IR/DerivedTypes.h"
45	#include "llvm/IR/Dominators.h"
46	#include "llvm/IR/Function.h"
47	#include "llvm/IR/InstrTypes.h"
48	#include "llvm/IR/Instruction.h"
49	#include "llvm/IR/Instructions.h"
50	#include "llvm/IR/Type.h"
51	#include "llvm/IR/Use.h"
52	#include "llvm/IR/Value.h"
53	#include "llvm/Support/CommandLine.h"
54	#include "llvm/Support/Compiler.h"
55	#include "llvm/Support/Debug.h"
56	#include "llvm/Support/ErrorHandling.h"
57	#include "llvm/Support/raw_ostream.h"
58	#include <cassert>
59
60	using namespace llvm;
61	using namespace polly;
62
63	#include "polly/Support/PollyDebug.h"
64	#define DEBUG_TYPE "polly-scops"
65
66	STATISTIC(ScopFound, "Number of valid Scops");
67	STATISTIC(RichScopFound, "Number of Scops containing a loop");
68	STATISTIC(InfeasibleScops,
69	"Number of SCoPs with statically infeasible context.");
70
71	bool polly::ModelReadOnlyScalars;
72
73	// The maximal number of dimensions we allow during invariant load construction.
74	// More complex access ranges will result in very high compile time and are also
75	// unlikely to result in good code. This value is very high and should only
76	// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
77	static unsigned const MaxDimensionsInAccessRange = `9`;
78
79	static cl::opt<bool, true> XModelReadOnlyScalars(
80	"polly-analyze-read-only-scalars",
81	cl::desc ("Model read-only scalar values in the scop description"),
82	cl::location(L&: ModelReadOnlyScalars), cl::Hidden, cl::init(Val: true),
83	cl::cat (PollyCategory));
84
85	static cl::opt<int>
86	OptComputeOut("polly-analysis-computeout",
87	cl::desc ("Bound the scop analysis by a maximal amount of "
88	"computational steps (0 means no bound)"),
89	cl::Hidden, cl::init(Val: `800000`), cl::cat (PollyCategory));
90
91	static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
92	"polly-allow-dereference-of-all-function-parameters",
93	cl::desc (
94	"Treat all parameters to functions that are pointers as dereferencible."
95	" This is useful for invariant load hoisting, since we can generate"
96	" less runtime checks. This is only valid if all pointers to functions"
97	" are always initialized, so that Polly can choose to hoist"
98	" their loads. "),
99	cl::Hidden, cl::init(Val: false), cl::cat (PollyCategory));
100
101	static cl::opt<bool>
102	PollyIgnoreInbounds("polly-ignore-inbounds",
103	cl::desc ("Do not take inbounds assumptions at all"),
104	cl::Hidden, cl::init(Val: false), cl::cat (PollyCategory));
105
106	static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
107	"polly-rtc-max-arrays-per-group",
108	cl::desc ("The maximal number of arrays to compare in each alias group."),
109	cl::Hidden, cl::init(Val: `20`), cl::cat (PollyCategory));
110
111	static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
112	"polly-rtc-max-array-disjuncts",
113	cl::desc ("The maximal number of disjunts allowed in memory accesses to "
114	"to build RTCs."),
115	cl::Hidden, cl::init(Val: `8`), cl::cat (PollyCategory));
116
117	static cl::opt<unsigned> RunTimeChecksMaxParameters(
118	"polly-rtc-max-parameters",
119	cl::desc ("The maximal number of parameters allowed in RTCs."), cl::Hidden,
120	cl::init(Val: `8`), cl::cat (PollyCategory));
121
122	static cl::opt<bool> UnprofitableScalarAccs(
123	"polly-unprofitable-scalar-accs",
124	cl::desc ("Count statements with scalar accesses as not optimizable"),
125	cl::Hidden, cl::init(Val: false), cl::cat (PollyCategory));
126
127	static cl::opt<std::string> UserContextStr(
128	"polly-context", cl::value_desc ("isl parameter set"),
129	cl::desc ("Provide additional constraints on the context parameters"),
130	cl::init(Val: ""), cl::cat (PollyCategory));
131
132	static cl::opt<bool> DetectReductions("polly-detect-reductions",
133	cl::desc ("Detect and exploit reductions"),
134	cl::Hidden, cl::init(Val: true),
135	cl::cat (PollyCategory));
136
137	// Multiplicative reductions can be disabled separately as these kind of
138	// operations can overflow easily. Additive reductions and bit operations
139	// are in contrast pretty stable.
140	static cl::opt<bool> DisableMultiplicativeReductions(
141	"polly-disable-multiplicative-reductions",
142	cl::desc ("Disable multiplicative reductions"), cl::Hidden,
143	cl::cat (PollyCategory));
144
145	enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
146
147	static cl::opt<GranularityChoice> StmtGranularity(
148	"polly-stmt-granularity",
149	cl::desc (
150	"Algorithm to use for splitting basic blocks into multiple statements"),
151	cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
152	"One statement per basic block"),
153	clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
154	"Scalar independence heuristic"),
155	clEnumValN(GranularityChoice::Stores, "store",
156	"Store-level granularity")),
157	cl::init(Val: GranularityChoice::ScalarIndependence), cl::cat (PollyCategory));
158
159	/// Helper to treat non-affine regions and basic blocks the same.
160	///
161	///{
162
163	/// Return the block that is the representing block for @p RN.
164	static inline BasicBlock getRegionNodeBasicBlock(RegionNode RN) {
165	return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
166	: RN->getNodeAs<BasicBlock>();
167	}
168
169	/// Return the @p idx'th block that is executed after @p RN.
170	static inline BasicBlock *
171	getRegionNodeSuccessor(RegionNode RN, Instruction TI, unsigned idx) {
172	if (RN->isSubRegion()) {
173	assert(idx == `0`);
174	return RN->getNodeAs<Region>()->getExit();
175	}
176	return TI->getSuccessor(Idx: idx);
177	}
178
179	static bool containsErrorBlock(RegionNode RN, const* Region &R,
180	ScopDetection *SD) {
181	if (!RN->isSubRegion())
182	return SD->isErrorBlock(BB&: *RN->getNodeAs<BasicBlock>(), R);
183	for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
184	if (SD->isErrorBlock(BB&: *BB, R))
185	return true;
186	return false;
187	}
188
189	///}
190
191	/// Create a map to map from a given iteration to a subsequent iteration.
192	///
193	/// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
194	/// is incremented by one and all other dimensions are equal, e.g.,
195	/// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
196	///
197	/// if @p Dim is 2 and @p SetSpace has 4 dimensions.
198	static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
199	isl::space MapSpace = SetSpace.map_from_set();
200	isl::map NextIterationMap = isl::map::universe(space: MapSpace);
201	for (unsigned u : rangeIslSize(Begin: `0`, End: NextIterationMap.domain_tuple_dim()))
202	if (u != Dim)
203	NextIterationMap =
204	NextIterationMap.equate(type1: isl::dim::in, pos1: u, type2: isl::dim::out, pos2: u);
205	isl::constraint C =
206	isl::constraint::alloc_equality(ls: isl::local_space (MapSpace));
207	C = C.set_constant_si(`1`);
208	C = C.set_coefficient_si(type: isl::dim::in, pos: Dim, v: `1`);
209	C = C.set_coefficient_si(type: isl::dim::out, pos: Dim, v: -`1`);
210	NextIterationMap = NextIterationMap.add_constraint(constraint: C);
211	return NextIterationMap;
212	}
213
214	/// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
215	static isl::set collectBoundedParts(isl::set S) {
216	isl::set BoundedParts = isl::set::empty(space: S.get_space());
217	for (isl::basic_set BSet : S.get_basic_set_list())
218	if (BSet.is_bounded())
219	BoundedParts = BoundedParts.unite(set2: isl::set (BSet));
220	return BoundedParts;
221	}
222
223	/// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
224	///
225	/// @returns A separation of @p S into first an unbounded then a bounded subset,
226	/// both with regards to the dimension @p Dim.
227	static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
228	unsigned Dim) {
229	for (unsigned u : rangeIslSize(Begin: `0`, End: S.tuple_dim()))
230	S = S.lower_bound_si(type: isl::dim::set, pos: u, value: `0`);
231
232	unsigned NumDimsS = unsignedFromIslSize(Size: S.tuple_dim());
233	isl::set OnlyDimS = S;
234
235	// Remove dimensions that are greater than Dim as they are not interesting.
236	assert(NumDimsS >= Dim + `1`);
237	OnlyDimS = OnlyDimS.project_out(type: isl::dim::set, first: Dim + `1`, n: NumDimsS - Dim - `1`);
238
239	// Create artificial parametric upper bounds for dimensions smaller than Dim
240	// as we are not interested in them.
241	OnlyDimS = OnlyDimS.insert_dims(type: isl::dim::param, pos: `0`, n: Dim);
242
243	for (unsigned u = `0`; u < Dim; u++) {
244	isl::constraint C = isl::constraint::alloc_inequality(
245	ls: isl::local_space (OnlyDimS.get_space()));
246	C = C.set_coefficient_si(type: isl::dim::param, pos: u, v: `1`);
247	C = C.set_coefficient_si(type: isl::dim::set, pos: u, v: -`1`);
248	OnlyDimS = OnlyDimS.add_constraint(constraint: C);
249	}
250
251	// Collect all bounded parts of OnlyDimS.
252	isl::set BoundedParts = collectBoundedParts(S: OnlyDimS);
253
254	// Create the dimensions greater than Dim again.
255	BoundedParts =
256	BoundedParts.insert_dims(type: isl::dim::set, pos: Dim + `1`, n: NumDimsS - Dim - `1`);
257
258	// Remove the artificial upper bound parameters again.
259	BoundedParts = BoundedParts.remove_dims(type: isl::dim::param, first: `0`, n: Dim);
260
261	isl::set UnboundedParts = S.subtract(set2: BoundedParts);
262	return std::make_pair(x&: UnboundedParts, y&: BoundedParts);
263	}
264
265	/// Create the conditions under which @p L @p Pred @p R is true.
266	static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
267	isl::pw_aff R) {
268	switch (Pred) {
269	case ICmpInst::ICMP_EQ:
270	return L.eq_set(pwaff2: R);
271	case ICmpInst::ICMP_NE:
272	return L.ne_set(pwaff2: R);
273	case ICmpInst::ICMP_SLT:
274	return L.lt_set(pwaff2: R);
275	case ICmpInst::ICMP_SLE:
276	return L.le_set(pwaff2: R);
277	case ICmpInst::ICMP_SGT:
278	return L.gt_set(pwaff2: R);
279	case ICmpInst::ICMP_SGE:
280	return L.ge_set(pwaff2: R);
281	case ICmpInst::ICMP_ULT:
282	return L.lt_set(pwaff2: R);
283	case ICmpInst::ICMP_UGT:
284	return L.gt_set(pwaff2: R);
285	case ICmpInst::ICMP_ULE:
286	return L.le_set(pwaff2: R);
287	case ICmpInst::ICMP_UGE:
288	return L.ge_set(pwaff2: R);
289	default:
290	llvm_unreachable("Non integer predicate not supported");
291	}
292	}
293
294	isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
295	Loop *NewL) {
296	// If the loops are the same there is nothing to do.
297	if (NewL == OldL)
298	return Dom;
299
300	int OldDepth = scop ->getRelativeLoopDepth(L: OldL);
301	int NewDepth = scop ->getRelativeLoopDepth(L: NewL);
302	// If both loops are non-affine loops there is nothing to do.
303	if (OldDepth == -`1` && NewDepth == -`1`)
304	return Dom;
305
306	// Distinguish three cases:
307	// 1) The depth is the same but the loops are not.
308	// => One loop was left one was entered.
309	// 2) The depth increased from OldL to NewL.
310	// => One loop was entered, none was left.
311	// 3) The depth decreased from OldL to NewL.
312	// => Loops were left were difference of the depths defines how many.
313	if (OldDepth == NewDepth) {
314	assert(OldL->getParentLoop() == NewL->getParentLoop());
315	Dom = Dom.project_out(type: isl::dim::set, first: NewDepth, n: `1`);
316	Dom = Dom.add_dims(type: isl::dim::set, n: `1`);
317	} else if (OldDepth < NewDepth) {
318	assert(OldDepth + `1` == NewDepth);
319	auto &R = scop ->getRegion();
320	(void)R;
321	assert(NewL->getParentLoop() == OldL \|\|
322	((!OldL \|\| !R.contains(OldL)) && R.contains(NewL)));
323	Dom = Dom.add_dims(type: isl::dim::set, n: `1`);
324	} else {
325	assert(OldDepth > NewDepth);
326	unsigned Diff = OldDepth - NewDepth;
327	unsigned NumDim = unsignedFromIslSize(Size: Dom.tuple_dim());
328	assert(NumDim >= Diff);
329	Dom = Dom.project_out(type: isl::dim::set, first: NumDim - Diff, n: Diff);
330	}
331
332	return Dom;
333	}
334
335	/// Compute the isl representation for the SCEV @p E in this BB.
336	///
337	/// @param BB The BB for which isl representation is to be
338	/// computed.
339	/// @param InvalidDomainMap A map of BB to their invalid domains.
340	/// @param E The SCEV that should be translated.
341	/// @param NonNegative Flag to indicate the @p E has to be non-negative.
342	///
343	/// Note that this function will also adjust the invalid context accordingly.
344
345	__isl_give isl_pw_aff *
346	ScopBuilder::getPwAff(BasicBlock *BB,
347	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
348	const SCEV E, bool* NonNegative) {
349	PWACtx PWAC = scop ->getPwAff(E, BB, NonNegative, RecordedAssumptions: &RecordedAssumptions);
350	InvalidDomainMap [BB] = InvalidDomainMap [BB].unite(set2: PWAC.second);
351	return PWAC.first.release();
352	}
353
354	/// Build condition sets for unsigned ICmpInst(s).
355	/// Special handling is required for unsigned operands to ensure that if
356	/// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
357	/// it should wrap around.
358	///
359	/// @param IsStrictUpperBound holds information on the predicate relation
360	/// between TestVal and UpperBound, i.e,
361	/// TestVal < UpperBound OR TestVal <= UpperBound
362	__isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
363	BasicBlock BB, Value Condition, __isl_keep isl_set *Domain,
364	const SCEV SCEV_TestVal, const* SCEV *SCEV_UpperBound,
365	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
366	bool IsStrictUpperBound) {
367	// Do not take NonNeg assumption on TestVal
368	// as it might have MSB (Sign bit) set.
369	isl_pw_aff TestVal = getPwAff(BB, InvalidDomainMap, E: SCEV_TestVal, NonNegative: false*);
370	// Take NonNeg assumption on UpperBound.
371	isl_pw_aff *UpperBound =
372	getPwAff(BB, InvalidDomainMap, E: SCEV_UpperBound, NonNegative: true);
373
374	// 0 <= TestVal
375	isl_set *First =
376	isl_pw_aff_le_set(pwaff1: isl_pw_aff_zero_on_domain(ls: isl_local_space_from_space(
377	space: isl_pw_aff_get_domain_space(pwaff: TestVal))),
378	pwaff2: isl_pw_aff_copy(pwaff: TestVal));
379
380	isl_set *Second;
381	if (IsStrictUpperBound)
382	// TestVal < UpperBound
383	Second = isl_pw_aff_lt_set(pwaff1: TestVal, pwaff2: UpperBound);
384	else
385	// TestVal <= UpperBound
386	Second = isl_pw_aff_le_set(pwaff1: TestVal, pwaff2: UpperBound);
387
388	isl_set *ConsequenceCondSet = isl_set_intersect(set1: First, set2: Second);
389	return ConsequenceCondSet;
390	}
391
392	bool ScopBuilder::buildConditionSets(
393	BasicBlock BB, SwitchInst SI, Loop L, __isl_keep isl_set Domain,
394	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
395	SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
396	Value *Condition = getConditionFromTerminator(TI: SI);
397	assert(Condition && "No condition for switch");
398
399	isl_pw_aff LHS, RHS;
400	LHS = getPwAff(BB, InvalidDomainMap, E: SE.getSCEVAtScope(V: Condition, L));
401
402	unsigned NumSuccessors = SI->getNumSuccessors();
403	ConditionSets.resize(N: NumSuccessors);
404	for (auto &Case : SI->cases()) {
405	unsigned Idx = Case.getSuccessorIndex();
406	ConstantInt *CaseValue = Case.getCaseValue();
407
408	RHS = getPwAff(BB, InvalidDomainMap, E: SE.getSCEV(V: CaseValue));
409	isl_set *CaseConditionSet =
410	buildConditionSet(Pred: ICmpInst::ICMP_EQ, L: isl::manage_copy(ptr: LHS),
411	R: isl::manage(ptr: RHS))
412	.release();
413	ConditionSets [Idx] = isl_set_coalesce(
414	set: isl_set_intersect(set1: CaseConditionSet, set2: isl_set_copy(set: Domain)));
415	}
416
417	assert(ConditionSets[`0`] == nullptr && "Default condition set was set");
418	isl_set *ConditionSetUnion = isl_set_copy(set: ConditionSets [`1`]);
419	for (unsigned u = `2`; u < NumSuccessors; u++)
420	ConditionSetUnion =
421	isl_set_union(set1: ConditionSetUnion, set2: isl_set_copy(set: ConditionSets [u]));
422	ConditionSets [`0`] = isl_set_subtract(set1: isl_set_copy(set: Domain), set2: ConditionSetUnion);
423
424	isl_pw_aff_free(pwaff: LHS);
425
426	return true;
427	}
428
429	bool ScopBuilder::buildConditionSets(
430	BasicBlock BB, Value Condition, Instruction TI, Loop L,
431	__isl_keep isl_set *Domain,
432	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
433	SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
434	isl_set ConsequenceCondSet = nullptr*;
435
436	if (auto Load = dyn_cast<LoadInst>(Val: Condition)) {
437	const SCEV *LHSSCEV = SE.getSCEVAtScope(V: Load, L);
438	const SCEV *RHSSCEV = SE.getZero(Ty: LHSSCEV->getType());
439	bool NonNeg = false;
440	isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, E: LHSSCEV, NonNegative: NonNeg);
441	isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, E: RHSSCEV, NonNegative: NonNeg);
442	ConsequenceCondSet = buildConditionSet(Pred: ICmpInst::ICMP_SLE, L: isl::manage(ptr: LHS),
443	R: isl::manage(ptr: RHS))
444	.release();
445	} else if (auto *PHI = dyn_cast<PHINode>(Val: Condition)) {
446	auto *Unique = dyn_cast<ConstantInt>(
447	Val: getUniqueNonErrorValue(PHI, R: &scop ->getRegion(), SD: &SD));
448	assert(Unique &&
449	"A PHINode condition should only be accepted by ScopDetection if "
450	"getUniqueNonErrorValue returns non-NULL");
451
452	if (Unique->isZero())
453	ConsequenceCondSet = isl_set_empty(space: isl_set_get_space(set: Domain));
454	else
455	ConsequenceCondSet = isl_set_universe(space: isl_set_get_space(set: Domain));
456	} else if (auto *CCond = dyn_cast<ConstantInt>(Val: Condition)) {
457	if (CCond->isZero())
458	ConsequenceCondSet = isl_set_empty(space: isl_set_get_space(set: Domain));
459	else
460	ConsequenceCondSet = isl_set_universe(space: isl_set_get_space(set: Domain));
461	} else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: Condition)) {
462	auto Opcode = BinOp->getOpcode();
463	assert(Opcode == Instruction::And \|\| Opcode == Instruction::Or);
464
465	bool Valid = buildConditionSets(BB, Condition: BinOp->getOperand(i_nocapture: `0`), TI, L, Domain,
466	InvalidDomainMap, ConditionSets) &&
467	buildConditionSets(BB, Condition: BinOp->getOperand(i_nocapture: `1`), TI, L, Domain,
468	InvalidDomainMap, ConditionSets);
469	if (!Valid) {
470	while (!ConditionSets.empty())
471	isl_set_free(set: ConditionSets.pop_back_val());
472	return false;
473	}
474
475	isl_set_free(set: ConditionSets.pop_back_val());
476	isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
477	isl_set_free(set: ConditionSets.pop_back_val());
478	isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
479
480	if (Opcode == Instruction::And)
481	ConsequenceCondSet = isl_set_intersect(set1: ConsCondPart0, set2: ConsCondPart1);
482	else
483	ConsequenceCondSet = isl_set_union(set1: ConsCondPart0, set2: ConsCondPart1);
484	} else {
485	auto *ICond = dyn_cast<ICmpInst>(Val: Condition);
486	assert(ICond &&
487	"Condition of exiting branch was neither constant nor ICmp!");
488
489	Region &R = scop ->getRegion();
490
491	isl_pw_aff LHS, RHS;
492	// For unsigned comparisons we assumed the signed bit of neither operand
493	// to be set. The comparison is equal to a signed comparison under this
494	// assumption.
495	bool NonNeg = ICond->isUnsigned();
496	const SCEV *LeftOperand = SE.getSCEVAtScope(V: ICond->getOperand(i_nocapture: `0`), L),
497	*RightOperand = SE.getSCEVAtScope(V: ICond->getOperand(i_nocapture: `1`), L);
498
499	LeftOperand = tryForwardThroughPHI(Expr: LeftOperand, R, SE, SD: &SD);
500	RightOperand = tryForwardThroughPHI(Expr: RightOperand, R, SE, SD: &SD);
501
502	switch (ICond->getPredicate()) {
503	case ICmpInst::ICMP_ULT:
504	ConsequenceCondSet =
505	buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: LeftOperand,
506	SCEV_UpperBound: RightOperand, InvalidDomainMap, IsStrictUpperBound: true);
507	break;
508	case ICmpInst::ICMP_ULE:
509	ConsequenceCondSet =
510	buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: LeftOperand,
511	SCEV_UpperBound: RightOperand, InvalidDomainMap, IsStrictUpperBound: false);
512	break;
513	case ICmpInst::ICMP_UGT:
514	ConsequenceCondSet =
515	buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: RightOperand,
516	SCEV_UpperBound: LeftOperand, InvalidDomainMap, IsStrictUpperBound: true);
517	break;
518	case ICmpInst::ICMP_UGE:
519	ConsequenceCondSet =
520	buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: RightOperand,
521	SCEV_UpperBound: LeftOperand, InvalidDomainMap, IsStrictUpperBound: false);
522	break;
523	default:
524	LHS = getPwAff(BB, InvalidDomainMap, E: LeftOperand, NonNegative: NonNeg);
525	RHS = getPwAff(BB, InvalidDomainMap, E: RightOperand, NonNegative: NonNeg);
526	ConsequenceCondSet = buildConditionSet(Pred: ICond->getPredicate(),
527	L: isl::manage(ptr: LHS), R: isl::manage(ptr: RHS))
528	.release();
529	break;
530	}
531	}
532
533	// If no terminator was given we are only looking for parameter constraints
534	// under which @p Condition is true/false.
535	if (!TI)
536	ConsequenceCondSet = isl_set_params(set: ConsequenceCondSet);
537	assert(ConsequenceCondSet);
538	ConsequenceCondSet = isl_set_coalesce(
539	set: isl_set_intersect(set1: ConsequenceCondSet, set2: isl_set_copy(set: Domain)));
540
541	isl_set AlternativeCondSet = nullptr*;
542	bool TooComplex =
543	isl_set_n_basic_set(set: ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
544
545	if (!TooComplex) {
546	AlternativeCondSet = isl_set_subtract(set1: isl_set_copy(set: Domain),
547	set2: isl_set_copy(set: ConsequenceCondSet));
548	TooComplex =
549	isl_set_n_basic_set(set: AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
550	}
551
552	if (TooComplex) {
553	scop ->invalidate(Kind: COMPLEXITY, Loc: TI ? TI->getDebugLoc() : DebugLoc(),
554	BB: TI ? TI->getParent() : nullptr / BasicBlock /);
555	isl_set_free(set: AlternativeCondSet);
556	isl_set_free(set: ConsequenceCondSet);
557	return false;
558	}
559
560	ConditionSets.push_back(Elt: ConsequenceCondSet);
561	ConditionSets.push_back(Elt: isl_set_coalesce(set: AlternativeCondSet));
562
563	return true;
564	}
565
566	bool ScopBuilder::buildConditionSets(
567	BasicBlock BB, Instruction TI, Loop L, __isl_keep isl_set Domain,
568	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569	SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI))
571	return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
572	ConditionSets);
573
574	assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
575
576	if (TI->getNumSuccessors() == `1`) {
577	ConditionSets.push_back(Elt: isl_set_copy(set: Domain));
578	return true;
579	}
580
581	Value *Condition = getConditionFromTerminator(TI);
582	assert(Condition && "No condition for Terminator");
583
584	return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
585	ConditionSets);
586	}
587
588	bool ScopBuilder::propagateDomainConstraints(
589	Region R, DenseMap<BasicBlock , isl::set> &InvalidDomainMap) {
590	// Iterate over the region R and propagate the domain constrains from the
591	// predecessors to the current node. In contrast to the
592	// buildDomainsWithBranchConstraints function, this one will pull the domain
593	// information from the predecessors instead of pushing it to the successors.
594	// Additionally, we assume the domains to be already present in the domain
595	// map here. However, we iterate again in reverse post order so we know all
596	// predecessors have been visited before a block or non-affine subregion is
597	// visited.
598
599	ReversePostOrderTraversal<Region *> RTraversal(R);
600	for (auto *RN : RTraversal) {
601	// Recurse for affine subregions but go on for basic blocks and non-affine
602	// subregions.
603	if (RN->isSubRegion()) {
604	Region *SubRegion = RN->getNodeAs<Region>();
605	if (!scop ->isNonAffineSubRegion(R: SubRegion)) {
606	if (!propagateDomainConstraints(R: SubRegion, InvalidDomainMap))
607	return false;
608	continue;
609	}
610	}
611
612	BasicBlock *BB = getRegionNodeBasicBlock(RN);
613	isl::set &Domain = scop ->getOrInitEmptyDomain(BB);
614	assert(!Domain.is_null());
615
616	// Under the union of all predecessor conditions we can reach this block.
617	isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
618	Domain = Domain.intersect(set2: PredDom).coalesce();
619	Domain = Domain.align_params(model: scop ->getParamSpace());
620
621	Loop *BBLoop = getRegionNodeLoop(RN, LI);
622	if (BBLoop && BBLoop->getHeader() == BB && scop ->contains(L: BBLoop))
623	if (!addLoopBoundsToHeaderDomain(L: BBLoop, InvalidDomainMap))
624	return false;
625	}
626
627	return true;
628	}
629
630	void ScopBuilder::propagateDomainConstraintsToRegionExit(
631	BasicBlock BB, Loop BBLoop,
632	SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633	DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
634	// Check if the block @p BB is the entry of a region. If so we propagate it's
635	// domain to the exit block of the region. Otherwise we are done.
636	auto *RI = scop ->getRegion().getRegionInfo();
637	auto BBReg = RI ? RI->getRegionFor(BB) : nullptr*;
638	auto ExitBB = BBReg ? BBReg->getExit() : nullptr*;
639	if (!BBReg \|\| BBReg->getEntry() != BB \|\| !scop ->contains(BB: ExitBB))
640	return;
641
642	// Do not propagate the domain if there is a loop backedge inside the region
643	// that would prevent the exit block from being executed.
644	auto *L = BBLoop;
645	while (L && scop ->contains(L)) {
646	SmallVector<BasicBlock *, `4`> LatchBBs;
647	BBLoop->getLoopLatches(LoopLatches&: LatchBBs);
648	for (auto *LatchBB : LatchBBs)
649	if (BB != LatchBB && BBReg->contains(BB: LatchBB))
650	return;
651	L = L->getParentLoop();
652	}
653
654	isl::set Domain = scop ->getOrInitEmptyDomain(BB);
655	assert(!Domain.is_null() && "Cannot propagate a nullptr");
656
657	Loop *ExitBBLoop = getFirstNonBoxedLoopFor(BB: ExitBB, LI, BoxedLoops: scop ->getBoxedLoops());
658
659	// Since the dimensions of @p BB and @p ExitBB might be different we have to
660	// adjust the domain before we can propagate it.
661	isl::set AdjustedDomain = adjustDomainDimensions(Dom: Domain, OldL: BBLoop, NewL: ExitBBLoop);
662	isl::set &ExitDomain = scop ->getOrInitEmptyDomain(BB: ExitBB);
663
664	// If the exit domain is not yet created we set it otherwise we "add" the
665	// current domain.
666	ExitDomain =
667	!ExitDomain.is_null() ? AdjustedDomain.unite(set2: ExitDomain) : AdjustedDomain;
668
669	// Initialize the invalid domain.
670	InvalidDomainMap [ExitBB] = ExitDomain.empty(space: ExitDomain.get_space());
671
672	FinishedExitBlocks.insert(Ptr: ExitBB);
673	}
674
675	isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
676	isl::set Domain) {
677	// If @p BB is the ScopEntry we are done
678	if (scop ->getRegion().getEntry() == BB)
679	return isl::set::universe(space: Domain.get_space());
680
681	// The region info of this function.
682	auto &RI = *scop ->getRegion().getRegionInfo();
683
684	Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, BoxedLoops: scop ->getBoxedLoops());
685
686	// A domain to collect all predecessor domains, thus all conditions under
687	// which the block is executed. To this end we start with the empty domain.
688	isl::set PredDom = isl::set::empty(space: Domain.get_space());
689
690	// Set of regions of which the entry block domain has been propagated to BB.
691	// all predecessors inside any of the regions can be skipped.
692	SmallSet<Region *, `8`> PropagatedRegions;
693
694	for (auto *PredBB : predecessors(BB)) {
695	// Skip backedges.
696	if (DT.dominates(A: BB, B: PredBB))
697	continue;
698
699	// If the predecessor is in a region we used for propagation we can skip it.
700	auto PredBBInRegion = [PredBB](Region PR) { return* PR->contains(BB: PredBB); };
701	if (llvm::any_of(Range&: PropagatedRegions, P: PredBBInRegion)) {
702	continue;
703	}
704
705	// Check if there is a valid region we can use for propagation, thus look
706	// for a region that contains the predecessor and has @p BB as exit block.
707	// FIXME: This was an side-effect-free (and possibly infinite) loop when
708	// committed and seems not to be needed.
709	auto *PredR = RI.getRegionFor(BB: PredBB);
710	while (PredR->getExit() != BB && !PredR->contains(BB))
711	PredR = PredR->getParent();
712
713	// If a valid region for propagation was found use the entry of that region
714	// for propagation, otherwise the PredBB directly.
715	if (PredR->getExit() == BB) {
716	PredBB = PredR->getEntry();
717	PropagatedRegions.insert(Ptr: PredR);
718	}
719
720	isl::set PredBBDom = scop ->getDomainConditions(BB: PredBB);
721	Loop *PredBBLoop =
722	getFirstNonBoxedLoopFor(BB: PredBB, LI, BoxedLoops: scop ->getBoxedLoops());
723	PredBBDom = adjustDomainDimensions(Dom: PredBBDom, OldL: PredBBLoop, NewL: BBLoop);
724	PredDom = PredDom.unite(set2: PredBBDom);
725	}
726
727	return PredDom;
728	}
729
730	bool ScopBuilder::addLoopBoundsToHeaderDomain(
731	Loop L, DenseMap<BasicBlock , isl::set> &InvalidDomainMap) {
732	int LoopDepth = scop ->getRelativeLoopDepth(L);
733	assert(LoopDepth >= `0` && "Loop in region should have at least depth one");
734
735	BasicBlock *HeaderBB = L->getHeader();
736	assert(scop ->isDomainDefined(HeaderBB));
737	isl::set &HeaderBBDom = scop ->getOrInitEmptyDomain(BB: HeaderBB);
738
739	isl::map NextIterationMap =
740	createNextIterationMap(SetSpace: HeaderBBDom.get_space(), Dim: LoopDepth);
741
742	isl::set UnionBackedgeCondition = HeaderBBDom.empty(space: HeaderBBDom.get_space());
743
744	SmallVector<BasicBlock *, `4`> LatchBlocks;
745	L->getLoopLatches(LoopLatches&: LatchBlocks);
746
747	for (BasicBlock *LatchBB : LatchBlocks) {
748	// If the latch is only reachable via error statements we skip it.
749	if (!scop ->isDomainDefined(BB: LatchBB))
750	continue;
751
752	isl::set LatchBBDom = scop ->getDomainConditions(BB: LatchBB);
753
754	isl::set BackedgeCondition;
755
756	Instruction *TI = LatchBB->getTerminator();
757	BranchInst *BI = dyn_cast<BranchInst>(Val: TI);
758	assert(BI && "Only branch instructions allowed in loop latches");
759
760	if (BI->isUnconditional())
761	BackedgeCondition = LatchBBDom;
762	else {
763	SmallVector<isl_set *, `8`> ConditionSets;
764	int idx = BI->getSuccessor(i: `0`) != HeaderBB;
765	if (!buildConditionSets(BB: LatchBB, TI, L, Domain: LatchBBDom.get(),
766	InvalidDomainMap, ConditionSets))
767	return false;
768
769	// Free the non back edge condition set as we do not need it.
770	isl_set_free(set: ConditionSets [`1` - idx]);
771
772	BackedgeCondition = isl::manage(ptr: ConditionSets [idx]);
773	}
774
775	int LatchLoopDepth = scop ->getRelativeLoopDepth(L: LI.getLoopFor(BB: LatchBB));
776	assert(LatchLoopDepth >= LoopDepth);
777	BackedgeCondition = BackedgeCondition.project_out(
778	type: isl::dim::set, first: LoopDepth + `1`, n: LatchLoopDepth - LoopDepth);
779	UnionBackedgeCondition = UnionBackedgeCondition.unite(set2: BackedgeCondition);
780	}
781
782	isl::map ForwardMap = ForwardMap.lex_le(set_space: HeaderBBDom.get_space());
783	for (int i = `0`; i < LoopDepth; i++)
784	ForwardMap = ForwardMap.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
785
786	isl::set UnionBackedgeConditionComplement =
787	UnionBackedgeCondition.complement();
788	UnionBackedgeConditionComplement =
789	UnionBackedgeConditionComplement.lower_bound_si(type: isl::dim::set, pos: LoopDepth,
790	value: `0`);
791	UnionBackedgeConditionComplement =
792	UnionBackedgeConditionComplement.apply(map: ForwardMap);
793	HeaderBBDom = HeaderBBDom.subtract(set2: UnionBackedgeConditionComplement);
794	HeaderBBDom = HeaderBBDom.apply(map: NextIterationMap);
795
796	auto Parts = partitionSetParts(S: HeaderBBDom, Dim: LoopDepth);
797	HeaderBBDom = Parts.second;
798
799	// Check if there is a <nsw> tagged AddRec for this loop and if so do not
800	// require a runtime check. The assumption is already implied by the <nsw>
801	// tag.
802	bool RequiresRTC = !scop ->hasNSWAddRecForLoop(L);
803
804	isl::set UnboundedCtx = Parts.first.params();
805	recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: INFINITELOOP, Set: UnboundedCtx,
806	Loc: HeaderBB->getTerminator()->getDebugLoc(), Sign: AS_RESTRICTION,
807	BB: nullptr, RTC: RequiresRTC);
808	return true;
809	}
810
811	void ScopBuilder::buildInvariantEquivalenceClasses() {
812	DenseMap<std::pair<const SCEV , Type >, LoadInst *> EquivClasses;
813
814	const InvariantLoadsSetTy &RIL = scop ->getRequiredInvariantLoads();
815	for (LoadInst *LInst : RIL) {
816	const SCEV *PointerSCEV = SE.getSCEV(V: LInst->getPointerOperand());
817
818	Type *Ty = LInst->getType();
819	LoadInst *&ClassRep = EquivClasses [std::make_pair(x&: PointerSCEV, y&: Ty)];
820	if (ClassRep) {
821	scop ->addInvariantLoadMapping(LoadInst: LInst, ClassRep);
822	continue;
823	}
824
825	ClassRep = LInst;
826	scop ->addInvariantEquivClass(
827	InvariantEquivClass: InvariantEquivClassTy{.IdentifyingPointer: PointerSCEV, .InvariantAccesses: MemoryAccessList (), .ExecutionContext: {}, .AccessType: Ty});
828	}
829	}
830
831	bool ScopBuilder::buildDomains(
832	Region R, DenseMap<BasicBlock , isl::set> &InvalidDomainMap) {
833	bool IsOnlyNonAffineRegion = scop ->isNonAffineSubRegion(R);
834	auto *EntryBB = R->getEntry();
835	auto L = IsOnlyNonAffineRegion ? nullptr* : LI.getLoopFor(BB: EntryBB);
836	int LD = scop ->getRelativeLoopDepth(L);
837	auto *S =
838	isl_set_universe(space: isl_space_set_alloc(ctx: scop ->getIslCtx().get(), nparam: `0`, dim: LD + `1`));
839
840	InvalidDomainMap [EntryBB] = isl::manage(ptr: isl_set_empty(space: isl_set_get_space(set: S)));
841	isl::set Domain = isl::manage(ptr: S);
842	scop ->setDomain(BB: EntryBB, Domain);
843
844	if (IsOnlyNonAffineRegion)
845	return !containsErrorBlock(RN: R->getNode(), R: *R, SD: &SD);
846
847	if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
848	return false;
849
850	if (!propagateDomainConstraints(R, InvalidDomainMap))
851	return false;
852
853	// Error blocks and blocks dominated by them have been assumed to never be
854	// executed. Representing them in the Scop does not add any value. In fact,
855	// it is likely to cause issues during construction of the ScopStmts. The
856	// contents of error blocks have not been verified to be expressible and
857	// will cause problems when building up a ScopStmt for them.
858	// Furthermore, basic blocks dominated by error blocks may reference
859	// instructions in the error block which, if the error block is not modeled,
860	// can themselves not be constructed properly. To this end we will replace
861	// the domains of error blocks and those only reachable via error blocks
862	// with an empty set. Additionally, we will record for each block under which
863	// parameter combination it would be reached via an error block in its
864	// InvalidDomain. This information is needed during load hoisting.
865	if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
866	return false;
867
868	return true;
869	}
870
871	bool ScopBuilder::buildDomainsWithBranchConstraints(
872	Region R, DenseMap<BasicBlock , isl::set> &InvalidDomainMap) {
873	// To create the domain for each block in R we iterate over all blocks and
874	// subregions in R and propagate the conditions under which the current region
875	// element is executed. To this end we iterate in reverse post order over R as
876	// it ensures that we first visit all predecessors of a region node (either a
877	// basic block or a subregion) before we visit the region node itself.
878	// Initially, only the domain for the SCoP region entry block is set and from
879	// there we propagate the current domain to all successors, however we add the
880	// condition that the successor is actually executed next.
881	// As we are only interested in non-loop carried constraints here we can
882	// simply skip loop back edges.
883
884	SmallPtrSet<BasicBlock *, `8`> FinishedExitBlocks;
885	ReversePostOrderTraversal<Region *> RTraversal(R);
886	for (auto *RN : RTraversal) {
887	// Recurse for affine subregions but go on for basic blocks and non-affine
888	// subregions.
889	if (RN->isSubRegion()) {
890	Region *SubRegion = RN->getNodeAs<Region>();
891	if (!scop ->isNonAffineSubRegion(R: SubRegion)) {
892	if (!buildDomainsWithBranchConstraints(R: SubRegion, InvalidDomainMap))
893	return false;
894	continue;
895	}
896	}
897
898	if (containsErrorBlock(RN, R: scop ->getRegion(), SD: &SD))
899	scop ->notifyErrorBlock();
900	;
901
902	BasicBlock *BB = getRegionNodeBasicBlock(RN);
903	Instruction *TI = BB->getTerminator();
904
905	if (isa<UnreachableInst>(Val: TI))
906	continue;
907
908	if (!scop ->isDomainDefined(BB))
909	continue;
910	isl::set Domain = scop ->getDomainConditions(BB);
911
912	scop ->updateMaxLoopDepth(Depth: unsignedFromIslSize(Size: Domain.tuple_dim()));
913
914	auto *BBLoop = getRegionNodeLoop(RN, LI);
915	// Propagate the domain from BB directly to blocks that have a superset
916	// domain, at the moment only region exit nodes of regions that start in BB.
917	propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
918	InvalidDomainMap);
919
920	// If all successors of BB have been set a domain through the propagation
921	// above we do not need to build condition sets but can just skip this
922	// block. However, it is important to note that this is a local property
923	// with regards to the region @p R. To this end FinishedExitBlocks is a
924	// local variable.
925	auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
926	return FinishedExitBlocks.count(Ptr: SuccBB);
927	};
928	if (std::all_of(first: succ_begin(BB), last: succ_end(BB), pred: IsFinishedRegionExit))
929	continue;
930
931	// Build the condition sets for the successor nodes of the current region
932	// node. If it is a non-affine subregion we will always execute the single
933	// exit node, hence the single entry node domain is the condition set. For
934	// basic blocks we use the helper function buildConditionSets.
935	SmallVector<isl_set *, `8`> ConditionSets;
936	if (RN->isSubRegion())
937	ConditionSets.push_back(Elt: Domain.copy());
938	else if (!buildConditionSets(BB, TI, L: BBLoop, Domain: Domain.get(), InvalidDomainMap,
939	ConditionSets))
940	return false;
941
942	// Now iterate over the successors and set their initial domain based on
943	// their condition set. We skip back edges here and have to be careful when
944	// we leave a loop not to keep constraints over a dimension that doesn't
945	// exist anymore.
946	assert(RN->isSubRegion() \|\| TI->getNumSuccessors() == ConditionSets.size());
947	for (unsigned u = `0`, e = ConditionSets.size(); u < e; u++) {
948	isl::set CondSet = isl::manage(ptr: ConditionSets [u]);
949	BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, idx: u);
950
951	// Skip blocks outside the region.
952	if (!scop ->contains(BB: SuccBB))
953	continue;
954
955	// If we propagate the domain of some block to "SuccBB" we do not have to
956	// adjust the domain.
957	if (FinishedExitBlocks.count(Ptr: SuccBB))
958	continue;
959
960	// Skip back edges.
961	if (DT.dominates(A: SuccBB, B: BB))
962	continue;
963
964	Loop *SuccBBLoop =
965	getFirstNonBoxedLoopFor(BB: SuccBB, LI, BoxedLoops: scop ->getBoxedLoops());
966
967	CondSet = adjustDomainDimensions(Dom: CondSet, OldL: BBLoop, NewL: SuccBBLoop);
968
969	// Set the domain for the successor or merge it with an existing domain in
970	// case there are multiple paths (without loop back edges) to the
971	// successor block.
972	isl::set &SuccDomain = scop ->getOrInitEmptyDomain(BB: SuccBB);
973
974	if (!SuccDomain.is_null()) {
975	SuccDomain = SuccDomain.unite(set2: CondSet).coalesce();
976	} else {
977	// Initialize the invalid domain.
978	InvalidDomainMap [SuccBB] = CondSet.empty(space: CondSet.get_space());
979	SuccDomain = CondSet;
980	}
981
982	SuccDomain = SuccDomain.detect_equalities();
983
984	// Check if the maximal number of domain disjunctions was reached.
985	// In case this happens we will clean up and bail.
986	if (unsignedFromIslSize(Size: SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
987	continue;
988
989	scop ->invalidate(Kind: COMPLEXITY, Loc: DebugLoc());
990	while (++u < ConditionSets.size())
991	isl_set_free(set: ConditionSets [u]);
992	return false;
993	}
994	}
995
996	return true;
997	}
998
999	bool ScopBuilder::propagateInvalidStmtDomains(
1000	Region R, DenseMap<BasicBlock , isl::set> &InvalidDomainMap) {
1001	ReversePostOrderTraversal<Region *> RTraversal(R);
1002	for (auto *RN : RTraversal) {
1003
1004	// Recurse for affine subregions but go on for basic blocks and non-affine
1005	// subregions.
1006	if (RN->isSubRegion()) {
1007	Region *SubRegion = RN->getNodeAs<Region>();
1008	if (!scop ->isNonAffineSubRegion(R: SubRegion)) {
1009	propagateInvalidStmtDomains(R: SubRegion, InvalidDomainMap);
1010	continue;
1011	}
1012	}
1013
1014	bool ContainsErrorBlock = containsErrorBlock(RN, R: scop ->getRegion(), SD: &SD);
1015	BasicBlock *BB = getRegionNodeBasicBlock(RN);
1016	isl::set &Domain = scop ->getOrInitEmptyDomain(BB);
1017	assert(!Domain.is_null() && "Cannot propagate a nullptr");
1018
1019	isl::set InvalidDomain = InvalidDomainMap [BB];
1020
1021	bool IsInvalidBlock = ContainsErrorBlock \|\| Domain.is_subset(set2: InvalidDomain);
1022
1023	if (!IsInvalidBlock) {
1024	InvalidDomain = InvalidDomain.intersect(set2: Domain);
1025	} else {
1026	InvalidDomain = Domain;
1027	isl::set DomPar = Domain.params();
1028	recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: ERRORBLOCK, Set: DomPar,
1029	Loc: BB->getTerminator()->getDebugLoc(), Sign: AS_RESTRICTION);
1030	Domain = isl::set::empty(space: Domain.get_space());
1031	}
1032
1033	if (InvalidDomain.is_empty()) {
1034	InvalidDomainMap [BB] = InvalidDomain;
1035	continue;
1036	}
1037
1038	auto *BBLoop = getRegionNodeLoop(RN, LI);
1039	auto *TI = BB->getTerminator();
1040	unsigned NumSuccs = RN->isSubRegion() ? `1` : TI->getNumSuccessors();
1041	for (unsigned u = `0`; u < NumSuccs; u++) {
1042	auto *SuccBB = getRegionNodeSuccessor(RN, TI, idx: u);
1043
1044	// Skip successors outside the SCoP.
1045	if (!scop ->contains(BB: SuccBB))
1046	continue;
1047
1048	// Skip backedges.
1049	if (DT.dominates(A: SuccBB, B: BB))
1050	continue;
1051
1052	Loop *SuccBBLoop =
1053	getFirstNonBoxedLoopFor(BB: SuccBB, LI, BoxedLoops: scop ->getBoxedLoops());
1054
1055	auto AdjustedInvalidDomain =
1056	adjustDomainDimensions(Dom: InvalidDomain, OldL: BBLoop, NewL: SuccBBLoop);
1057
1058	isl::set SuccInvalidDomain = InvalidDomainMap [SuccBB];
1059	SuccInvalidDomain = SuccInvalidDomain.unite(set2: AdjustedInvalidDomain);
1060	SuccInvalidDomain = SuccInvalidDomain.coalesce();
1061
1062	InvalidDomainMap [SuccBB] = SuccInvalidDomain;
1063
1064	// Check if the maximal number of domain disjunctions was reached.
1065	// In case this happens we will bail.
1066	if (unsignedFromIslSize(Size: SuccInvalidDomain.n_basic_set()) <
1067	MaxDisjunctsInDomain)
1068	continue;
1069
1070	InvalidDomainMap.erase(Val: BB);
1071	scop ->invalidate(Kind: COMPLEXITY, Loc: TI->getDebugLoc(), BB: TI->getParent());
1072	return false;
1073	}
1074
1075	InvalidDomainMap [BB] = InvalidDomain;
1076	}
1077
1078	return true;
1079	}
1080
1081	void ScopBuilder::buildPHIAccesses(ScopStmt PHIStmt, PHINode PHI,
1082	Region *NonAffineSubRegion,
1083	bool IsExitBlock) {
1084	// PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1085	// true, are not modeled as ordinary PHI nodes as they are not part of the
1086	// region. However, we model the operands in the predecessor blocks that are
1087	// part of the region as regular scalar accesses.
1088
1089	// If we can synthesize a PHI we can skip it, however only if it is in
1090	// the region. If it is not it can only be in the exit block of the region.
1091	// In this case we model the operands but not the PHI itself.
1092	auto *Scope = LI.getLoopFor(BB: PHI->getParent());
1093	if (!IsExitBlock && canSynthesize(V: PHI, S: *scop, SE: &SE, Scope))
1094	return;
1095
1096	// PHI nodes are modeled as if they had been demoted prior to the SCoP
1097	// detection. Hence, the PHI is a load of a new memory location in which the
1098	// incoming value was written at the end of the incoming basic block.
1099	bool OnlyNonAffineSubRegionOperands = true;
1100	for (unsigned u = `0`; u < PHI->getNumIncomingValues(); u++) {
1101	Value *Op = PHI->getIncomingValue(i: u);
1102	BasicBlock *OpBB = PHI->getIncomingBlock(i: u);
1103	ScopStmt *OpStmt = scop ->getIncomingStmtFor(U: PHI->getOperandUse(i: u));
1104
1105	// Do not build PHI dependences inside a non-affine subregion, but make
1106	// sure that the necessary scalar values are still made available.
1107	if (NonAffineSubRegion && NonAffineSubRegion->contains(BB: OpBB)) {
1108	auto *OpInst = dyn_cast<Instruction>(Val: Op);
1109	if (!OpInst \|\| !NonAffineSubRegion->contains(Inst: OpInst))
1110	ensureValueRead(V: Op, UserStmt: OpStmt);
1111	continue;
1112	}
1113
1114	OnlyNonAffineSubRegionOperands = false;
1115	ensurePHIWrite(PHI, IncomintStmt: OpStmt, IncomingBlock: OpBB, IncomingValue: Op, IsExitBlock);
1116	}
1117
1118	if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1119	addPHIReadAccess(PHIStmt, PHI);
1120	}
1121	}
1122
1123	void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1124	Instruction *Inst) {
1125	assert(!isa<PHINode>(Inst));
1126
1127	// Pull-in required operands.
1128	for (Use &Op : Inst->operands())
1129	ensureValueRead(V: Op.get(), UserStmt);
1130	}
1131
1132	// Create a sequence of two schedules. Either argument may be null and is
1133	// interpreted as the empty schedule. Can also return null if both schedules are
1134	// empty.
1135	static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1136	if (Prev.is_null())
1137	return Succ;
1138	if (Succ.is_null())
1139	return Prev;
1140
1141	return Prev.sequence(schedule2: Succ);
1142	}
1143
1144	// Create an isl_multi_union_aff that defines an identity mapping from the
1145	// elements of USet to their N-th dimension.
1146	//
1147	// # Example:
1148	//
1149	// Domain: { A[i,j]; B[i,j,k] }
1150	// N: 1
1151	//
1152	// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1153	//
1154	// @param USet A union set describing the elements for which to generate a
1155	// mapping.
1156	// @param N The dimension to map to.
1157	// @returns A mapping from USet to its N-th dimension.
1158	static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1159	assert(!USet.is_null());
1160	assert(!USet.is_empty());
1161
1162	auto Result = isl::union_pw_multi_aff::empty(space: USet.get_space());
1163
1164	for (isl::set S : USet.get_set_list()) {
1165	unsigned Dim = unsignedFromIslSize(Size: S.tuple_dim());
1166	assert(Dim >= N);
1167	auto PMA = isl::pw_multi_aff::project_out_map(space: S.get_space(), type: isl::dim::set,
1168	first: N, n: Dim - N);
1169	if (N > `1`)
1170	PMA = PMA.drop_dims(type: isl::dim::out, first: `0`, n: N - `1`);
1171
1172	Result = Result.add_pw_multi_aff(pma: PMA);
1173	}
1174
1175	return isl::multi_union_pw_aff (isl::union_pw_multi_aff (Result));
1176	}
1177
1178	void ScopBuilder::buildSchedule() {
1179	Loop L = getLoopSurroundingScop(S&: scop, LI);
1180	LoopStackTy LoopStack({LoopStackElementTy (L, {}, `0`)});
1181	buildSchedule(RN: scop ->getRegion().getNode(), LoopStack);
1182	assert(LoopStack.size() == `1` && LoopStack.back().L == L);
1183	scop ->setScheduleTree(LoopStack [`0`].Schedule);
1184	}
1185
1186	/// To generate a schedule for the elements in a Region we traverse the Region
1187	/// in reverse-post-order and add the contained RegionNodes in traversal order
1188	/// to the schedule of the loop that is currently at the top of the LoopStack.
1189	/// For loop-free codes, this results in a correct sequential ordering.
1190	///
1191	/// Example:
1192	/// bb1(0)
1193	/// / \.
1194	/// bb2(1) bb3(2)
1195	/// \ / \.
1196	/// bb4(3) bb5(4)
1197	/// \ /
1198	/// bb6(5)
1199	///
1200	/// Including loops requires additional processing. Whenever a loop header is
1201	/// encountered, the corresponding loop is added to the @p LoopStack. Starting
1202	/// from an empty schedule, we first process all RegionNodes that are within
1203	/// this loop and complete the sequential schedule at this loop-level before
1204	/// processing about any other nodes. To implement this
1205	/// loop-nodes-first-processing, the reverse post-order traversal is
1206	/// insufficient. Hence, we additionally check if the traversal yields
1207	/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1208	/// These region-nodes are then queue and only traverse after the all nodes
1209	/// within the current loop have been processed.
1210	void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1211	Loop OuterScopLoop = getLoopSurroundingScop(S&: scop, LI);
1212
1213	ReversePostOrderTraversal<Region *> RTraversal(R);
1214	std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1215	std::deque<RegionNode *> DelayList;
1216	bool LastRNWaiting = false;
1217
1218	// Iterate over the region @p R in reverse post-order but queue
1219	// sub-regions/blocks iff they are not part of the last encountered but not
1220	// completely traversed loop. The variable LastRNWaiting is a flag to indicate
1221	// that we queued the last sub-region/block from the reverse post-order
1222	// iterator. If it is set we have to explore the next sub-region/block from
1223	// the iterator (if any) to guarantee progress. If it is not set we first try
1224	// the next queued sub-region/blocks.
1225	while (!WorkList.empty() \|\| !DelayList.empty()) {
1226	RegionNode *RN;
1227
1228	if ((LastRNWaiting && !WorkList.empty()) \|\| DelayList.empty()) {
1229	RN = WorkList.front();
1230	WorkList.pop_front();
1231	LastRNWaiting = false;
1232	} else {
1233	RN = DelayList.front();
1234	DelayList.pop_front();
1235	}
1236
1237	Loop *L = getRegionNodeLoop(RN, LI);
1238	if (!scop ->contains(L))
1239	L = OuterScopLoop;
1240
1241	Loop *LastLoop = LoopStack.back().L;
1242	if (LastLoop != L) {
1243	if (LastLoop && !LastLoop->contains(L)) {
1244	LastRNWaiting = true;
1245	DelayList.push_back(x: RN);
1246	continue;
1247	}
1248	LoopStack.push_back(Elt: {L, {}, `0`});
1249	}
1250	buildSchedule(RN, LoopStack);
1251	}
1252	}
1253
1254	void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1255	if (RN->isSubRegion()) {
1256	auto *LocalRegion = RN->getNodeAs<Region>();
1257	if (!scop ->isNonAffineSubRegion(R: LocalRegion)) {
1258	buildSchedule(R: LocalRegion, LoopStack);
1259	return;
1260	}
1261	}
1262
1263	assert(LoopStack.rbegin() != LoopStack.rend());
1264	auto LoopData = LoopStack.rbegin();
1265	LoopData ->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1266
1267	for (auto *Stmt : scop ->getStmtListFor(RN)) {
1268	isl::union_set UDomain{Stmt->getDomain()};
1269	auto StmtSchedule = isl::schedule::from_domain(domain: UDomain);
1270	LoopData ->Schedule = combineInSequence(Prev: LoopData ->Schedule, Succ: StmtSchedule);
1271	}
1272
1273	// Check if we just processed the last node in this loop. If we did, finalize
1274	// the loop by:
1275	//
1276	// - adding new schedule dimensions
1277	// - folding the resulting schedule into the parent loop schedule
1278	// - dropping the loop schedule from the LoopStack.
1279	//
1280	// Then continue to check surrounding loops, which might also have been
1281	// completed by this node.
1282	size_t Dimension = LoopStack.size();
1283	while (LoopData ->L &&
1284	LoopData ->NumBlocksProcessed == getNumBlocksInLoop(L: LoopData ->L)) {
1285	isl::schedule Schedule = LoopData ->Schedule;
1286	auto NumBlocksProcessed = LoopData ->NumBlocksProcessed;
1287
1288	assert(std::next(LoopData) != LoopStack.rend());
1289	Loop *L = LoopData ->L;
1290	++LoopData;
1291	--Dimension;
1292
1293	if (!Schedule.is_null()) {
1294	isl::union_set Domain = Schedule.get_domain();
1295	isl::multi_union_pw_aff MUPA = mapToDimension(USet: Domain, N: Dimension);
1296	Schedule = Schedule.insert_partial_schedule(partial: MUPA);
1297
1298	if (hasDisableAllTransformsHint(L)) {
1299	/// If any of the loops has a disable_nonforced heuristic, mark the
1300	/// entire SCoP as such. The ISL rescheduler can only reschedule the
1301	/// SCoP in its entirety.
1302	/// TODO: ScopDetection could avoid including such loops or warp them as
1303	/// boxed loop. It still needs to pass-through loop with user-defined
1304	/// metadata.
1305	scop ->markDisableHeuristics();
1306	}
1307
1308	// It is easier to insert the marks here that do it retroactively.
1309	isl::id IslLoopId = createIslLoopAttr(Ctx: scop ->getIslCtx(), L);
1310	if (!IslLoopId.is_null())
1311	Schedule =
1312	Schedule.get_root().child(pos: `0`).insert_mark(mark: IslLoopId).get_schedule();
1313
1314	LoopData ->Schedule = combineInSequence(Prev: LoopData ->Schedule, Succ: Schedule);
1315	}
1316
1317	LoopData ->NumBlocksProcessed += NumBlocksProcessed;
1318	}
1319	// Now pop all loops processed up there from the LoopStack
1320	LoopStack.erase(CS: LoopStack.begin() + Dimension, CE: LoopStack.end());
1321	}
1322
1323	void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1324	// Check for uses of this instruction outside the scop. Because we do not
1325	// iterate over such instructions and therefore did not "ensure" the existence
1326	// of a write, we must determine such use here.
1327	if (scop ->isEscaping(Inst))
1328	ensureValueWrite(Inst);
1329	}
1330
1331	void ScopBuilder::addRecordedAssumptions() {
1332	for (auto &AS : llvm::reverse(C&: RecordedAssumptions)) {
1333
1334	if (!AS.BB) {
1335	scop ->addAssumption(Kind: AS.Kind, Set: AS.Set, Loc: AS.Loc, Sign: AS.Sign,
1336	BB: nullptr / BasicBlock /, RTC: AS.RequiresRTC);
1337	continue;
1338	}
1339
1340	// If the domain was deleted the assumptions are void.
1341	isl_set *Dom = scop ->getDomainConditions(BB: AS.BB).release();
1342	if (!Dom)
1343	continue;
1344
1345	// If a basic block was given use its domain to simplify the assumption.
1346	// In case of restrictions we know they only have to hold on the domain,
1347	// thus we can intersect them with the domain of the block. However, for
1348	// assumptions the domain has to imply them, thus:
1349	// _ _____
1350	// Dom => S <==> A v B <==> A - B
1351	//
1352	// To avoid the complement we will register A - B as a restriction not an
1353	// assumption.
1354	isl_set *S = AS.Set.copy();
1355	if (AS.Sign == AS_RESTRICTION)
1356	S = isl_set_params(set: isl_set_intersect(set1: S, set2: Dom));
1357	else / (AS.Sign == AS_ASSUMPTION) /
1358	S = isl_set_params(set: isl_set_subtract(set1: Dom, set2: S));
1359
1360	scop ->addAssumption(Kind: AS.Kind, Set: isl::manage(ptr: S), Loc: AS.Loc, Sign: AS_RESTRICTION, BB: AS.BB,
1361	RTC: AS.RequiresRTC);
1362	}
1363	}
1364
1365	void ScopBuilder::addUserAssumptions(
1366	AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1367	for (auto &Assumption : AC.assumptions()) {
1368	auto *CI = dyn_cast_or_null<CallInst>(Val&: Assumption);
1369	if (!CI \|\| CI->arg_size() != `1`)
1370	continue;
1371
1372	bool InScop = scop ->contains(I: CI);
1373	if (!InScop && !scop ->isDominatedBy(DT, BB: CI->getParent()))
1374	continue;
1375
1376	auto *L = LI.getLoopFor(BB: CI->getParent());
1377	auto *Val = CI->getArgOperand(i: `0`);
1378	ParameterSetTy DetectedParams;
1379	auto &R = scop ->getRegion();
1380	if (!isAffineConstraint(V: Val, R: &R, Scope: L, SE, Params&: DetectedParams)) {
1381	ORE.emit(
1382	OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "IgnoreUserAssumption", CI)
1383	<< "Non-affine user assumption ignored.");
1384	continue;
1385	}
1386
1387	// Collect all newly introduced parameters.
1388	ParameterSetTy NewParams;
1389	for (auto *Param : DetectedParams) {
1390	Param = extractConstantFactor(M: Param, SE).second;
1391	Param = scop ->getRepresentingInvariantLoadSCEV(S: Param);
1392	if (scop ->isParam(Param))
1393	continue;
1394	NewParams.insert(X: Param);
1395	}
1396
1397	SmallVector<isl_set *, `2`> ConditionSets;
1398	auto TI = InScop ? CI->getParent()->getTerminator() : nullptr*;
1399	BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1400	auto *Dom = InScop ? isl_set_copy(set: scop ->getDomainConditions(BB).get())
1401	: isl_set_copy(set: scop ->getContext().get());
1402	assert(Dom && "Cannot propagate a nullptr.");
1403	bool Valid = buildConditionSets(BB, Condition: Val, TI, L, Domain: Dom, InvalidDomainMap,
1404	ConditionSets);
1405	isl_set_free(set: Dom);
1406
1407	if (!Valid)
1408	continue;
1409
1410	isl_set AssumptionCtx = nullptr*;
1411	if (InScop) {
1412	AssumptionCtx = isl_set_complement(set: isl_set_params(set: ConditionSets [`1`]));
1413	isl_set_free(set: ConditionSets [`0`]);
1414	} else {
1415	AssumptionCtx = isl_set_complement(set: ConditionSets [`1`]);
1416	AssumptionCtx = isl_set_intersect(set1: AssumptionCtx, set2: ConditionSets [`0`]);
1417	}
1418
1419	// Project out newly introduced parameters as they are not otherwise useful.
1420	if (!NewParams.empty()) {
1421	for (isl_size u = `0`; u < isl_set_n_param(set: AssumptionCtx); u++) {
1422	auto *Id = isl_set_get_dim_id(set: AssumptionCtx, type: isl_dim_param, pos: u);
1423	auto Param = static_cast<const* SCEV *>(isl_id_get_user(id: Id));
1424	isl_id_free(id: Id);
1425
1426	if (!NewParams.count(key: Param))
1427	continue;
1428
1429	AssumptionCtx =
1430	isl_set_project_out(set: AssumptionCtx, type: isl_dim_param, first: u--, n: `1`);
1431	}
1432	}
1433	ORE.emit(OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "UserAssumption", CI)
1434	<< "Use user assumption: "
1435	<< stringFromIslObj(Obj: AssumptionCtx, DefaultValue: "null"));
1436	isl::set newContext =
1437	scop ->getContext().intersect(set2: isl::manage(ptr: AssumptionCtx));
1438	scop ->setContext(newContext);
1439	}
1440	}
1441
1442	bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1443	// Memory builtins are not considered in this function.
1444	if (!Inst.isLoad() && !Inst.isStore())
1445	return false;
1446
1447	Value *Val = Inst.getValueOperand();
1448	Type *ElementType = Val->getType();
1449	Value *Address = Inst.getPointerOperand();
1450	const SCEV *AccessFunction =
1451	SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst ->getParent()));
1452	const SCEVUnknown *BasePointer =
1453	dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1454	enum MemoryAccess::AccessType AccType =
1455	isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1456
1457	if (auto *BitCast = dyn_cast<BitCastInst>(Val: Address))
1458	Address = BitCast->getOperand(i_nocapture: `0`);
1459
1460	auto *GEP = dyn_cast<GetElementPtrInst>(Val: Address);
1461	if (!GEP \|\| DL.getTypeAllocSize(Ty: GEP->getResultElementType()) !=
1462	DL.getTypeAllocSize(Ty: ElementType))
1463	return false;
1464
1465	SmallVector<const SCEV *, `4`> Subscripts;
1466	SmallVector<int, `4`> Sizes;
1467	getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1468	auto *BasePtr = GEP->getOperand(i_nocapture: `0`);
1469
1470	if (auto *BasePtrCast = dyn_cast<BitCastInst>(Val: BasePtr))
1471	BasePtr = BasePtrCast->getOperand(i_nocapture: `0`);
1472
1473	// Check for identical base pointers to ensure that we do not miss index
1474	// offsets that have been added before this GEP is applied.
1475	if (BasePtr != BasePointer->getValue())
1476	return false;
1477
1478	std::vector<const SCEV *> SizesSCEV;
1479
1480	const InvariantLoadsSetTy &ScopRIL = scop ->getRequiredInvariantLoads();
1481
1482	Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1483	for (auto *Subscript : Subscripts) {
1484	InvariantLoadsSetTy AccessILS;
1485	if (!isAffineExpr(R: &scop ->getRegion(), Scope: SurroundingLoop, Expression: Subscript, SE,
1486	ILS: &AccessILS))
1487	return false;
1488
1489	for (LoadInst *LInst : AccessILS)
1490	if (!ScopRIL.count(key: LInst))
1491	return false;
1492	}
1493
1494	if (Sizes.empty())
1495	return false;
1496
1497	SizesSCEV.push_back(x: nullptr);
1498
1499	for (auto V : Sizes)
1500	SizesSCEV.push_back(x: SE.getSCEV(
1501	V: ConstantInt::get(Ty: IntegerType::getInt64Ty(C&: BasePtr->getContext()), V)));
1502
1503	addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1504	IsAffine: true, Subscripts, Sizes: SizesSCEV, AccessValue: Val);
1505	return true;
1506	}
1507
1508	bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1509	// Memory builtins are not considered by this function.
1510	if (!Inst.isLoad() && !Inst.isStore())
1511	return false;
1512
1513	if (!PollyDelinearize)
1514	return false;
1515
1516	Value *Address = Inst.getPointerOperand();
1517	Value *Val = Inst.getValueOperand();
1518	Type *ElementType = Val->getType();
1519	unsigned ElementSize = DL.getTypeAllocSize(Ty: ElementType);
1520	enum MemoryAccess::AccessType AccType =
1521	isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1522
1523	const SCEV *AccessFunction =
1524	SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst ->getParent()));
1525	const SCEVUnknown *BasePointer =
1526	dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1527
1528	assert(BasePointer && "Could not find base pointer");
1529
1530	auto &InsnToMemAcc = scop ->getInsnToMemAccMap();
1531	auto AccItr = InsnToMemAcc.find(x: Inst);
1532	if (AccItr == InsnToMemAcc.end())
1533	return false;
1534
1535	std::vector<const SCEV > Sizes = {nullptr*};
1536
1537	Sizes.insert(position: Sizes.end(), first: AccItr ->second.Shape ->DelinearizedSizes.begin(),
1538	last: AccItr ->second.Shape ->DelinearizedSizes.end());
1539
1540	// In case only the element size is contained in the 'Sizes' array, the
1541	// access does not access a real multi-dimensional array. Hence, we allow
1542	// the normal single-dimensional access construction to handle this.
1543	if (Sizes.size() == `1`)
1544	return false;
1545
1546	// Remove the element size. This information is already provided by the
1547	// ElementSize parameter. In case the element size of this access and the
1548	// element size used for delinearization differs the delinearization is
1549	// incorrect. Hence, we invalidate the scop.
1550	//
1551	// TODO: Handle delinearization with differing element sizes.
1552	auto DelinearizedSize =
1553	cast<SCEVConstant>(Val: Sizes.back())->getAPInt().getSExtValue();
1554	Sizes.pop_back();
1555	if (ElementSize != DelinearizedSize)
1556	scop ->invalidate(Kind: DELINEARIZATION, Loc: Inst ->getDebugLoc(), BB: Inst ->getParent());
1557
1558	addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1559	IsAffine: true, Subscripts: AccItr ->second.DelinearizedSubscripts, Sizes, AccessValue: Val);
1560	return true;
1561	}
1562
1563	bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1564	auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Val&: Inst);
1565
1566	if (MemIntr == nullptr)
1567	return false;
1568
1569	auto *L = LI.getLoopFor(BB: Inst ->getParent());
1570	auto *LengthVal = SE.getSCEVAtScope(V: MemIntr->getLength(), L);
1571	assert(LengthVal);
1572
1573	// Check if the length val is actually affine or if we overapproximate it
1574	InvariantLoadsSetTy AccessILS;
1575	const InvariantLoadsSetTy &ScopRIL = scop ->getRequiredInvariantLoads();
1576
1577	Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1578	bool LengthIsAffine = isAffineExpr(R: &scop ->getRegion(), Scope: SurroundingLoop,
1579	Expression: LengthVal, SE, ILS: &AccessILS);
1580	for (LoadInst *LInst : AccessILS)
1581	if (!ScopRIL.count(key: LInst))
1582	LengthIsAffine = false;
1583	if (!LengthIsAffine)
1584	LengthVal = nullptr;
1585
1586	auto *DestPtrVal = MemIntr->getDest();
1587	assert(DestPtrVal);
1588
1589	auto *DestAccFunc = SE.getSCEVAtScope(V: DestPtrVal, L);
1590	assert(DestAccFunc);
1591	// Ignore accesses to "NULL".
1592	// TODO: We could use this to optimize the region further, e.g., intersect
1593	// the context with
1594	// isl_set_complement(isl_set_params(getDomain()))
1595	// as we know it would be undefined to execute this instruction anyway.
1596	if (DestAccFunc->isZero())
1597	return true;
1598
1599	if (auto *U = dyn_cast<SCEVUnknown>(Val: DestAccFunc)) {
1600	if (isa<ConstantPointerNull>(Val: U->getValue()))
1601	return true;
1602	}
1603
1604	auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: DestAccFunc));
1605	assert(DestPtrSCEV);
1606	DestAccFunc = SE.getMinusSCEV(LHS: DestAccFunc, RHS: DestPtrSCEV);
1607	addArrayAccess(Stmt, MemAccInst: Inst, AccType: MemoryAccess::MUST_WRITE, BaseAddress: DestPtrSCEV->getValue(),
1608	ElemType: IntegerType::getInt8Ty(C&: DestPtrVal->getContext()),
1609	IsAffine: LengthIsAffine, Subscripts: {DestAccFunc, LengthVal}, Sizes: {nullptr},
1610	AccessValue: Inst.getValueOperand());
1611
1612	auto *MemTrans = dyn_cast<MemTransferInst>(Val: MemIntr);
1613	if (!MemTrans)
1614	return true;
1615
1616	auto *SrcPtrVal = MemTrans->getSource();
1617	assert(SrcPtrVal);
1618
1619	auto *SrcAccFunc = SE.getSCEVAtScope(V: SrcPtrVal, L);
1620	assert(SrcAccFunc);
1621	// Ignore accesses to "NULL".
1622	// TODO: See above TODO
1623	if (SrcAccFunc->isZero())
1624	return true;
1625
1626	auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: SrcAccFunc));
1627	assert(SrcPtrSCEV);
1628	SrcAccFunc = SE.getMinusSCEV(LHS: SrcAccFunc, RHS: SrcPtrSCEV);
1629	addArrayAccess(Stmt, MemAccInst: Inst, AccType: MemoryAccess::READ, BaseAddress: SrcPtrSCEV->getValue(),
1630	ElemType: IntegerType::getInt8Ty(C&: SrcPtrVal->getContext()),
1631	IsAffine: LengthIsAffine, Subscripts: {SrcAccFunc, LengthVal}, Sizes: {nullptr},
1632	AccessValue: Inst.getValueOperand());
1633
1634	return true;
1635	}
1636
1637	bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1638	auto *CI = dyn_cast_or_null<CallInst>(Val&: Inst);
1639
1640	if (CI == nullptr)
1641	return false;
1642
1643	if (CI->doesNotAccessMemory() \|\| isIgnoredIntrinsic(V: CI) \|\| isDebugCall(Inst: CI))
1644	return true;
1645
1646	auto *AF = SE.getConstant(Ty: IntegerType::getInt64Ty(C&: CI->getContext()), V: `0`);
1647	auto *CalledFunction = CI->getCalledFunction();
1648	MemoryEffects ME = AA.getMemoryEffects(F: CalledFunction);
1649	if (ME.doesNotAccessMemory())
1650	return true;
1651
1652	if (ME.onlyAccessesArgPointees()) {
1653	ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
1654	auto AccType =
1655	!isModSet(MRI: ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1656	Loop *L = LI.getLoopFor(BB: Inst ->getParent());
1657	for (const auto &Arg : CI->args()) {
1658	if (!Arg ->getType()->isPointerTy())
1659	continue;
1660
1661	auto *ArgSCEV = SE.getSCEVAtScope(V: Arg, L);
1662	if (ArgSCEV->isZero())
1663	continue;
1664
1665	if (auto *U = dyn_cast<SCEVUnknown>(Val: ArgSCEV)) {
1666	if (isa<ConstantPointerNull>(Val: U->getValue()))
1667	return true;
1668	}
1669
1670	auto *ArgBasePtr = cast<SCEVUnknown>(Val: SE.getPointerBase(V: ArgSCEV));
1671	addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: ArgBasePtr->getValue(),
1672	ElemType: ArgBasePtr->getType(), IsAffine: false, Subscripts: {AF}, Sizes: {nullptr}, AccessValue: CI);
1673	}
1674	return true;
1675	}
1676
1677	if (ME.onlyReadsMemory()) {
1678	GlobalReads.emplace_back(Args&: Stmt, Args&: CI);
1679	return true;
1680	}
1681	return false;
1682	}
1683
1684	bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1685	// Memory builtins are not considered by this function.
1686	if (!Inst.isLoad() && !Inst.isStore())
1687	return false;
1688
1689	Value *Address = Inst.getPointerOperand();
1690	Value *Val = Inst.getValueOperand();
1691	Type *ElementType = Val->getType();
1692	enum MemoryAccess::AccessType AccType =
1693	isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1694
1695	const SCEV *AccessFunction =
1696	SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst ->getParent()));
1697	const SCEVUnknown *BasePointer =
1698	dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1699
1700	assert(BasePointer && "Could not find base pointer");
1701	AccessFunction = SE.getMinusSCEV(LHS: AccessFunction, RHS: BasePointer);
1702
1703	// Check if the access depends on a loop contained in a non-affine subregion.
1704	bool isVariantInNonAffineLoop = false;
1705	SetVector<const Loop *> Loops;
1706	findLoops(Expr: AccessFunction, Loops);
1707	for (const Loop *L : Loops)
1708	if (Stmt->contains(L)) {
1709	isVariantInNonAffineLoop = true;
1710	break;
1711	}
1712
1713	InvariantLoadsSetTy AccessILS;
1714
1715	Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1716	bool IsAffine = !isVariantInNonAffineLoop &&
1717	isAffineExpr(R: &scop ->getRegion(), Scope: SurroundingLoop,
1718	Expression: AccessFunction, SE, ILS: &AccessILS);
1719
1720	const InvariantLoadsSetTy &ScopRIL = scop ->getRequiredInvariantLoads();
1721	for (LoadInst *LInst : AccessILS)
1722	if (!ScopRIL.count(key: LInst))
1723	IsAffine = false;
1724
1725	if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1726	AccType = MemoryAccess::MAY_WRITE;
1727
1728	addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1729	IsAffine, Subscripts: {AccessFunction}, Sizes: {nullptr}, AccessValue: Val);
1730	return true;
1731	}
1732
1733	void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1734	if (buildAccessMemIntrinsic(Inst, Stmt))
1735	return;
1736
1737	if (buildAccessCallInst(Inst, Stmt))
1738	return;
1739
1740	if (buildAccessMultiDimFixed(Inst, Stmt))
1741	return;
1742
1743	if (buildAccessMultiDimParam(Inst, Stmt))
1744	return;
1745
1746	if (buildAccessSingleDim(Inst, Stmt))
1747	return;
1748
1749	llvm_unreachable(
1750	"At least one of the buildAccess functions must handled this access, or "
1751	"ScopDetection should have rejected this SCoP");
1752	}
1753
1754	void ScopBuilder::buildAccessFunctions() {
1755	for (auto &Stmt : *scop) {
1756	if (Stmt.isBlockStmt()) {
1757	buildAccessFunctions(Stmt: &Stmt, BB&: *Stmt.getBasicBlock());
1758	continue;
1759	}
1760
1761	Region *R = Stmt.getRegion();
1762	for (BasicBlock *BB : R->blocks())
1763	buildAccessFunctions(Stmt: &Stmt, BB&: *BB, NonAffineSubRegion: R);
1764	}
1765
1766	// Build write accesses for values that are used after the SCoP.
1767	// The instructions defining them might be synthesizable and therefore not
1768	// contained in any statement, hence we iterate over the original instructions
1769	// to identify all escaping values.
1770	for (BasicBlock *BB : scop ->getRegion().blocks()) {
1771	for (Instruction &Inst : *BB)
1772	buildEscapingDependences(Inst: &Inst);
1773	}
1774	}
1775
1776	bool ScopBuilder::shouldModelInst(Instruction Inst, Loop L) {
1777	return !Inst->isTerminator() && !isIgnoredIntrinsic(V: Inst) &&
1778	!canSynthesize(V: Inst, S: *scop, SE: &SE, Scope: L);
1779	}
1780
1781	/// Generate a name for a statement.
1782	///
1783	/// @param BB The basic block the statement will represent.
1784	/// @param BBIdx The index of the @p BB relative to other BBs/regions.
1785	/// @param Count The index of the created statement in @p BB.
1786	/// @param IsMain Whether this is the main of all statement for @p BB. If true,
1787	/// no suffix will be added.
1788	/// @param IsLast Uses a special indicator for the last statement of a BB.
1789	static std::string makeStmtName(BasicBlock BB, long* BBIdx, int Count,
1790	bool IsMain, bool IsLast = false) {
1791	std::string Suffix;
1792	if (!IsMain) {
1793	if (UseInstructionNames)
1794	Suffix = `'_'`;
1795	if (IsLast)
1796	Suffix += "last";
1797	else if (Count < `26`)
1798	Suffix += `'a'` + Count;
1799	else
1800	Suffix += std::to_string(val: Count);
1801	}
1802	return getIslCompatibleName(Prefix: "Stmt", Val: BB, Number: BBIdx, Suffix, UseInstructionNames);
1803	}
1804
1805	/// Generate a name for a statement that represents a non-affine subregion.
1806	///
1807	/// @param R The region the statement will represent.
1808	/// @param RIdx The index of the @p R relative to other BBs/regions.
1809	static std::string makeStmtName(Region R, long* RIdx) {
1810	return getIslCompatibleName(Prefix: "Stmt", Middle: R->getNameStr(), Number: RIdx, Suffix: "",
1811	UseInstructionNames);
1812	}
1813
1814	void ScopBuilder::buildSequentialBlockStmts(BasicBlock BB, bool* SplitOnStore) {
1815	Loop *SurroundingLoop = LI.getLoopFor(BB);
1816
1817	int Count = `0`;
1818	long BBIdx = scop ->getNextStmtIdx();
1819	std::vector<Instruction *> Instructions;
1820	for (Instruction &Inst : *BB) {
1821	if (shouldModelInst(Inst: &Inst, L: SurroundingLoop))
1822	Instructions.push_back(x: &Inst);
1823	if (Inst.getMetadata(Kind: "polly_split_after") \|\|
1824	(SplitOnStore && isa<StoreInst>(Val: Inst))) {
1825	std::string Name = makeStmtName(BB, BBIdx, Count, IsMain: Count == `0`);
1826	scop ->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827	Count++;
1828	Instructions.clear();
1829	}
1830	}
1831
1832	std::string Name = makeStmtName(BB, BBIdx, Count, IsMain: Count == `0`);
1833	scop ->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1834	}
1835
1836	/// Is @p Inst an ordered instruction?
1837	///
1838	/// An unordered instruction is an instruction, such that a sequence of
1839	/// unordered instructions can be permuted without changing semantics. Any
1840	/// instruction for which this is not always the case is ordered.
1841	static bool isOrderedInstruction(Instruction *Inst) {
1842	return Inst->mayHaveSideEffects() \|\| Inst->mayReadOrWriteMemory();
1843	}
1844
1845	/// Join instructions to the same statement if one uses the scalar result of the
1846	/// other.
1847	static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1848	ArrayRef<Instruction *> ModeledInsts) {
1849	for (Instruction *Inst : ModeledInsts) {
1850	if (isa<PHINode>(Val: Inst))
1851	continue;
1852
1853	for (Use &Op : Inst->operands()) {
1854	Instruction *OpInst = dyn_cast<Instruction>(Val: Op.get());
1855	if (!OpInst)
1856	continue;
1857
1858	// Check if OpInst is in the BB and is a modeled instruction.
1859	auto OpVal = UnionFind.findValue(V: OpInst);
1860	if (OpVal == UnionFind.end())
1861	continue;
1862
1863	UnionFind.unionSets(V1: Inst, V2: OpInst);
1864	}
1865	}
1866	}
1867
1868	/// Ensure that the order of ordered instructions does not change.
1869	///
1870	/// If we encounter an ordered instruction enclosed in instructions belonging to
1871	/// a different statement (which might as well contain ordered instructions, but
1872	/// this is not tested here), join them.
1873	static void
1874	joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1875	ArrayRef<Instruction *> ModeledInsts) {
1876	SetVector<Instruction *> SeenLeaders;
1877	for (Instruction *Inst : ModeledInsts) {
1878	if (!isOrderedInstruction(Inst))
1879	continue;
1880
1881	Instruction *Leader = UnionFind.getLeaderValue(V: Inst);
1882	// Since previous iterations might have merged sets, some items in
1883	// SeenLeaders are not leaders anymore. However, The new leader of
1884	// previously merged instructions must be one of the former leaders of
1885	// these merged instructions.
1886	bool Inserted = SeenLeaders.insert(X: Leader);
1887	if (Inserted)
1888	continue;
1889
1890	// Merge statements to close holes. Say, we have already seen statements A
1891	// and B, in this order. Then we see an instruction of A again and we would
1892	// see the pattern "A B A". This function joins all statements until the
1893	// only seen occurrence of A.
1894	for (Instruction *Prev : reverse(C&: SeenLeaders)) {
1895	// We are backtracking from the last element until we see Inst's leader
1896	// in SeenLeaders and merge all into one set. Although leaders of
1897	// instructions change during the execution of this loop, it's irrelevant
1898	// as we are just searching for the element that we already confirmed is
1899	// in the list.
1900	if (Prev == Leader)
1901	break;
1902	UnionFind.unionSets(V1: Prev, V2: Leader);
1903	}
1904	}
1905	}
1906
1907	/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1908	/// the incoming values from this block are executed after the PHI READ.
1909	///
1910	/// Otherwise it could overwrite the incoming value from before the BB with the
1911	/// value for the next execution. This can happen if the PHI WRITE is added to
1912	/// the statement with the instruction that defines the incoming value (instead
1913	/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1914	/// are in order, we put both into the statement. PHI WRITEs are always executed
1915	/// after PHI READs when they are in the same statement.
1916	///
1917	/// TODO: This is an overpessimization. We only have to ensure that the PHI
1918	/// WRITE is not put into a statement containing the PHI itself. That could also
1919	/// be done by
1920	/// - having all (strongly connected) PHIs in a single statement,
1921	/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1922	/// has a chance of being lifted before a PHI by being in a statement with a
1923	/// PHI that comes before in the basic block), or
1924	/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1925	static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1926	ArrayRef<Instruction *> ModeledInsts) {
1927	for (Instruction *Inst : ModeledInsts) {
1928	PHINode *PHI = dyn_cast<PHINode>(Val: Inst);
1929	if (!PHI)
1930	continue;
1931
1932	int Idx = PHI->getBasicBlockIndex(BB: PHI->getParent());
1933	if (Idx < `0`)
1934	continue;
1935
1936	Instruction *IncomingVal =
1937	dyn_cast<Instruction>(Val: PHI->getIncomingValue(i: Idx));
1938	if (!IncomingVal)
1939	continue;
1940
1941	UnionFind.unionSets(V1: PHI, V2: IncomingVal);
1942	}
1943	}
1944
1945	void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1946	Loop *L = LI.getLoopFor(BB);
1947
1948	// Extracting out modeled instructions saves us from checking
1949	// shouldModelInst() repeatedly.
1950	SmallVector<Instruction *, `32`> ModeledInsts;
1951	EquivalenceClasses<Instruction *> UnionFind;
1952	Instruction MainInst = nullptr, MainLeader = nullptr;
1953	for (Instruction &Inst : *BB) {
1954	if (!shouldModelInst(Inst: &Inst, L))
1955	continue;
1956	ModeledInsts.push_back(Elt: &Inst);
1957	UnionFind.insert(Data: &Inst);
1958
1959	// When a BB is split into multiple statements, the main statement is the
1960	// one containing the 'main' instruction. We select the first instruction
1961	// that is unlikely to be removed (because it has side-effects) as the main
1962	// one. It is used to ensure that at least one statement from the bb has the
1963	// same name as with -polly-stmt-granularity=bb.
1964	if (!MainInst && (isa<StoreInst>(Val: Inst) \|\|
1965	(isa<CallInst>(Val: Inst) && !isa<IntrinsicInst>(Val: Inst))))
1966	MainInst = &Inst;
1967	}
1968
1969	joinOperandTree(UnionFind, ModeledInsts);
1970	joinOrderedInstructions(UnionFind, ModeledInsts);
1971	joinOrderedPHIs(UnionFind, ModeledInsts);
1972
1973	// The list of instructions for statement (statement represented by the leader
1974	// instruction).
1975	MapVector<Instruction , std::vector<Instruction >> LeaderToInstList;
1976
1977	// The order of statements must be preserved w.r.t. their ordered
1978	// instructions. Without this explicit scan, we would also use non-ordered
1979	// instructions (whose order is arbitrary) to determine statement order.
1980	for (Instruction *Inst : ModeledInsts) {
1981	if (!isOrderedInstruction(Inst))
1982	continue;
1983
1984	auto LeaderIt = UnionFind.findLeader(V: Inst);
1985	if (LeaderIt == UnionFind.member_end())
1986	continue;
1987
1988	// Insert element for the leader instruction.
1989	(void)LeaderToInstList [*LeaderIt];
1990	}
1991
1992	// Collect the instructions of all leaders. UnionFind's member iterator
1993	// unfortunately are not in any specific order.
1994	for (Instruction *Inst : ModeledInsts) {
1995	auto LeaderIt = UnionFind.findLeader(V: Inst);
1996	if (LeaderIt == UnionFind.member_end())
1997	continue;
1998
1999	if (Inst == MainInst)
2000	MainLeader = *LeaderIt;
2001	std::vector<Instruction > &InstList = LeaderToInstList [LeaderIt];
2002	InstList.push_back(x: Inst);
2003	}
2004
2005	// Finally build the statements.
2006	int Count = `0`;
2007	long BBIdx = scop ->getNextStmtIdx();
2008	for (auto &Instructions : LeaderToInstList) {
2009	std::vector<Instruction *> &InstList = Instructions.second;
2010
2011	// If there is no main instruction, make the first statement the main.
2012	bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == `0`);
2013
2014	std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2015	scop ->addScopStmt(BB, Name, SurroundingLoop: L, Instructions: std::move(InstList));
2016	Count += `1`;
2017	}
2018
2019	// Unconditionally add an epilogue (last statement). It contains no
2020	// instructions, but holds the PHI write accesses for successor basic blocks,
2021	// if the incoming value is not defined in another statement if the same BB.
2022	// The epilogue becomes the main statement only if there is no other
2023	// statement that could become main.
2024	// The epilogue will be removed if no PHIWrite is added to it.
2025	std::string EpilogueName = makeStmtName(BB, BBIdx, Count, IsMain: Count == `0`, IsLast: true);
2026	scop ->addScopStmt(BB, Name: EpilogueName, SurroundingLoop: L, Instructions: {});
2027	}
2028
2029	void ScopBuilder::buildStmts(Region &SR) {
2030	if (scop ->isNonAffineSubRegion(R: &SR)) {
2031	std::vector<Instruction *> Instructions;
2032	Loop *SurroundingLoop =
2033	getFirstNonBoxedLoopFor(BB: SR.getEntry(), LI, BoxedLoops: scop ->getBoxedLoops());
2034	for (Instruction &Inst : *SR.getEntry())
2035	if (shouldModelInst(Inst: &Inst, L: SurroundingLoop))
2036	Instructions.push_back(x: &Inst);
2037	long RIdx = scop ->getNextStmtIdx();
2038	std::string Name = makeStmtName(R: &SR, RIdx);
2039	scop ->addScopStmt(R: &SR, Name, SurroundingLoop, EntryBlockInstructions: Instructions);
2040	return;
2041	}
2042
2043	for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2044	if (I ->isSubRegion())
2045	buildStmts(SR&: *I ->getNodeAs<Region>());
2046	else {
2047	BasicBlock *BB = I ->getNodeAs<BasicBlock>();
2048	switch (StmtGranularity) {
2049	case GranularityChoice::BasicBlocks:
2050	buildSequentialBlockStmts(BB);
2051	break;
2052	case GranularityChoice::ScalarIndependence:
2053	buildEqivClassBlockStmts(BB);
2054	break;
2055	case GranularityChoice::Stores:
2056	buildSequentialBlockStmts(BB, SplitOnStore: true);
2057	break;
2058	}
2059	}
2060	}
2061
2062	void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2063	Region *NonAffineSubRegion) {
2064	assert(
2065	Stmt &&
2066	"The exit BB is the only one that cannot be represented by a statement");
2067	assert(Stmt->represents(&BB));
2068
2069	// We do not build access functions for error blocks, as they may contain
2070	// instructions we can not model.
2071	if (SD.isErrorBlock(BB, R: scop ->getRegion()))
2072	return;
2073
2074	auto BuildAccessesForInst = [this, Stmt,
2075	NonAffineSubRegion](Instruction *Inst) {
2076	PHINode *PHI = dyn_cast<PHINode>(Val: Inst);
2077	if (PHI)
2078	buildPHIAccesses(PHIStmt: Stmt, PHI, NonAffineSubRegion, IsExitBlock: false);
2079
2080	if (auto MemInst = MemAccInst::dyn_cast(V&: *Inst)) {
2081	assert(Stmt && "Cannot build access function in non-existing statement");
2082	buildMemoryAccess(Inst: MemInst, Stmt);
2083	}
2084
2085	// PHI nodes have already been modeled above and terminators that are
2086	// not part of a non-affine subregion are fully modeled and regenerated
2087	// from the polyhedral domains. Hence, they do not need to be modeled as
2088	// explicit data dependences.
2089	if (!PHI)
2090	buildScalarDependences(UserStmt: Stmt, Inst);
2091	};
2092
2093	const InvariantLoadsSetTy &RIL = scop ->getRequiredInvariantLoads();
2094	bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2095	if (IsEntryBlock) {
2096	for (Instruction *Inst : Stmt->getInstructions())
2097	BuildAccessesForInst (Inst);
2098	if (Stmt->isRegionStmt())
2099	BuildAccessesForInst (BB.getTerminator());
2100	} else {
2101	for (Instruction &Inst : BB) {
2102	if (isIgnoredIntrinsic(V: &Inst))
2103	continue;
2104
2105	// Invariant loads already have been processed.
2106	if (isa<LoadInst>(Val: Inst) && RIL.count(key: cast<LoadInst>(Val: &Inst)))
2107	continue;
2108
2109	BuildAccessesForInst (&Inst);
2110	}
2111	}
2112	}
2113
2114	MemoryAccess *ScopBuilder::addMemoryAccess(
2115	ScopStmt Stmt, Instruction Inst, MemoryAccess::AccessType AccType,
2116	Value BaseAddress, Type ElementType, bool Affine, Value *AccessValue,
2117	ArrayRef<const SCEV > Subscripts, ArrayRef<const* SCEV *> Sizes,
2118	MemoryKind Kind) {
2119	bool isKnownMustAccess = false;
2120
2121	// Accesses in single-basic block statements are always executed.
2122	if (Stmt->isBlockStmt())
2123	isKnownMustAccess = true;
2124
2125	if (Stmt->isRegionStmt()) {
2126	// Accesses that dominate the exit block of a non-affine region are always
2127	// executed. In non-affine regions there may exist MemoryKind::Values that
2128	// do not dominate the exit. MemoryKind::Values will always dominate the
2129	// exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2130	// non-affine region.
2131	if (Inst && DT.dominates(A: Inst->getParent(), B: Stmt->getRegion()->getExit()))
2132	isKnownMustAccess = true;
2133	}
2134
2135	// Non-affine PHI writes do not "happen" at a particular instruction, but
2136	// after exiting the statement. Therefore they are guaranteed to execute and
2137	// overwrite the old value.
2138	if (Kind == MemoryKind::PHI \|\| Kind == MemoryKind::ExitPHI)
2139	isKnownMustAccess = true;
2140
2141	if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2142	AccType = MemoryAccess::MAY_WRITE;
2143
2144	auto Access = new* MemoryAccess (Stmt, Inst, AccType, BaseAddress, ElementType,
2145	Affine, Subscripts, Sizes, AccessValue, Kind);
2146
2147	scop ->addAccessFunction(Access);
2148	Stmt->addAccess(Access);
2149	return Access;
2150	}
2151
2152	void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2153	MemoryAccess::AccessType AccType,
2154	Value BaseAddress, Type ElementType,
2155	bool IsAffine,
2156	ArrayRef<const SCEV *> Subscripts,
2157	ArrayRef<const SCEV *> Sizes,
2158	Value *AccessValue) {
2159	ArrayBasePointers.insert(X: BaseAddress);
2160	addMemoryAccess(Stmt, Inst: MemAccInst, AccType, BaseAddress, ElementType, Affine: IsAffine,
2161	AccessValue, Subscripts, Sizes, Kind: MemoryKind::Array);
2162	}
2163
2164	/// Check if @p Expr is divisible by @p Size.
2165	static bool isDivisible(const SCEV Expr, unsigned* Size, ScalarEvolution &SE) {
2166	assert(Size != `0`);
2167	if (Size == `1`)
2168	return true;
2169
2170	// Only one factor needs to be divisible.
2171	if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Val: Expr)) {
2172	for (auto *FactorExpr : MulExpr->operands())
2173	if (isDivisible(Expr: FactorExpr, Size, SE))
2174	return true;
2175	return false;
2176	}
2177
2178	// For other n-ary expressions (Add, AddRec, Max,...) all operands need
2179	// to be divisible.
2180	if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Val: Expr)) {
2181	for (auto *OpExpr : NAryExpr->operands())
2182	if (!isDivisible(Expr: OpExpr, Size, SE))
2183	return false;
2184	return true;
2185	}
2186
2187	auto *SizeSCEV = SE.getConstant(Ty: Expr->getType(), V: Size);
2188	auto *UDivSCEV = SE.getUDivExpr(LHS: Expr, RHS: SizeSCEV);
2189	auto *MulSCEV = SE.getMulExpr(LHS: UDivSCEV, RHS: SizeSCEV);
2190	return MulSCEV == Expr;
2191	}
2192
2193	void ScopBuilder::foldSizeConstantsToRight() {
2194	isl::union_set Accessed = scop ->getAccesses().range();
2195
2196	for (auto Array : scop ->arrays()) {
2197	if (Array->getNumberOfDimensions() <= `1`)
2198	continue;
2199
2200	isl::space Space = Array->getSpace();
2201	Space = Space.align_params(space2: Accessed.get_space());
2202
2203	if (!Accessed.contains(space: Space))
2204	continue;
2205
2206	isl::set Elements = Accessed.extract_set(space: Space);
2207	isl::map Transform = isl::map::universe(space: Array->getSpace().map_from_set());
2208
2209	std::vector<int> Int;
2210	unsigned Dims = unsignedFromIslSize(Size: Elements.tuple_dim());
2211	for (unsigned i = `0`; i < Dims; i++) {
2212	isl::set DimOnly = isl::set (Elements).project_out(type: isl::dim::set, first: `0`, n: i);
2213	DimOnly = DimOnly.project_out(type: isl::dim::set, first: `1`, n: Dims - i - `1`);
2214	DimOnly = DimOnly.lower_bound_si(type: isl::dim::set, pos: `0`, value: `0`);
2215
2216	isl::basic_set DimHull = DimOnly.affine_hull();
2217
2218	if (i == Dims - `1`) {
2219	Int.push_back(x: `1`);
2220	Transform = Transform.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
2221	continue;
2222	}
2223
2224	if (unsignedFromIslSize(Size: DimHull.dim(type: isl::dim::div)) == `1`) {
2225	isl::aff Diff = DimHull.get_div(pos: `0`);
2226	isl::val Val = Diff.get_denominator_val();
2227
2228	int ValInt = `1`;
2229	if (Val.is_int()) {
2230	auto ValAPInt = APIntFromVal(V: Val);
2231	if (ValAPInt.isSignedIntN(N: `32`))
2232	ValInt = ValAPInt.getSExtValue();
2233	} else {
2234	}
2235
2236	Int.push_back(x: ValInt);
2237	isl::constraint C = isl::constraint::alloc_equality(
2238	ls: isl::local_space (Transform.get_space()));
2239	C = C.set_coefficient_si(type: isl::dim::out, pos: i, v: ValInt);
2240	C = C.set_coefficient_si(type: isl::dim::in, pos: i, v: -`1`);
2241	Transform = Transform.add_constraint(constraint: C);
2242	continue;
2243	}
2244
2245	isl::basic_set ZeroSet = isl::basic_set (DimHull);
2246	ZeroSet = ZeroSet.fix_si(type: isl::dim::set, pos: `0`, value: `0`);
2247
2248	int ValInt = `1`;
2249	if (ZeroSet.is_equal(bset2: DimHull)) {
2250	ValInt = `0`;
2251	}
2252
2253	Int.push_back(x: ValInt);
2254	Transform = Transform.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
2255	}
2256
2257	isl::set MappedElements = isl::map (Transform).domain();
2258	if (!Elements.is_subset(set2: MappedElements))
2259	continue;
2260
2261	bool CanFold = true;
2262	if (Int [`0`] <= `1`)
2263	CanFold = false;
2264
2265	unsigned NumDims = Array->getNumberOfDimensions();
2266	for (unsigned i = `1`; i < NumDims - `1`; i++)
2267	if (Int [`0`] != Int [i] && Int [i])
2268	CanFold = false;
2269
2270	if (!CanFold)
2271	continue;
2272
2273	for (auto &Access : scop ->access_functions())
2274	if (Access ->getScopArrayInfo() == Array)
2275	Access ->setAccessRelation(
2276	Access ->getAccessRelation().apply_range(map2: Transform));
2277
2278	std::vector<const SCEV *> Sizes;
2279	for (unsigned i = `0`; i < NumDims; i++) {
2280	auto Size = Array->getDimensionSize(Dim: i);
2281
2282	if (i == NumDims - `1`)
2283	Size = SE.getMulExpr(LHS: Size, RHS: SE.getConstant(Ty: Size->getType(), V: Int [`0`]));
2284	Sizes.push_back(x: Size);
2285	}
2286
2287	Array->updateSizes(Sizes, CheckConsistency: false / CheckConsistency /);
2288	}
2289	}
2290
2291	void ScopBuilder::finalizeAccesses() {
2292	updateAccessDimensionality();
2293	foldSizeConstantsToRight();
2294	foldAccessRelations();
2295	assumeNoOutOfBounds();
2296	}
2297
2298	void ScopBuilder::updateAccessDimensionality() {
2299	// Check all array accesses for each base pointer and find a (virtual) element
2300	// size for the base pointer that divides all access functions.
2301	for (ScopStmt &Stmt : *scop)
2302	for (MemoryAccess *Access : Stmt) {
2303	if (!Access->isArrayKind())
2304	continue;
2305	ScopArrayInfo *Array =
2306	const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2307
2308	if (Array->getNumberOfDimensions() != `1`)
2309	continue;
2310	unsigned DivisibleSize = Array->getElemSizeInBytes();
2311	const SCEV *Subscript = Access->getSubscript(Dim: `0`);
2312	while (!isDivisible(Expr: Subscript, Size: DivisibleSize, SE))
2313	DivisibleSize /= `2`;
2314	auto Ty = IntegerType::get(C&: SE.getContext(), NumBits: DivisibleSize `8`);
2315	Array->updateElementType(NewElementType: Ty);
2316	}
2317
2318	for (auto &Stmt : *scop)
2319	for (auto &Access : Stmt)
2320	Access->updateDimensionality();
2321	}
2322
2323	void ScopBuilder::foldAccessRelations() {
2324	for (auto &Stmt : *scop)
2325	for (auto &Access : Stmt)
2326	Access->foldAccessRelation();
2327	}
2328
2329	void ScopBuilder::assumeNoOutOfBounds() {
2330	if (PollyIgnoreInbounds)
2331	return;
2332	for (auto &Stmt : *scop)
2333	for (auto &Access : Stmt) {
2334	isl::set Outside = Access->assumeNoOutOfBound();
2335	const auto &Loc = Access->getAccessInstruction()
2336	? Access->getAccessInstruction()->getDebugLoc()
2337	: DebugLoc();
2338	recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: INBOUNDS, Set: Outside, Loc,
2339	Sign: AS_ASSUMPTION);
2340	}
2341	}
2342
2343	void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2344	// Find the statement that defines the value of Inst. That statement has to
2345	// write the value to make it available to those statements that read it.
2346	ScopStmt *Stmt = scop ->getStmtFor(Inst);
2347
2348	// It is possible that the value is synthesizable within a loop (such that it
2349	// is not part of any statement), but not after the loop (where you need the
2350	// number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2351	// avoid this. In case the IR has no such PHI, use the last statement (where
2352	// the value is synthesizable) to write the value.
2353	if (!Stmt)
2354	Stmt = scop ->getLastStmtFor(BB: Inst->getParent());
2355
2356	// Inst not defined within this SCoP.
2357	if (!Stmt)
2358	return;
2359
2360	// Do not process further if the instruction is already written.
2361	if (Stmt->lookupValueWriteOf(Inst))
2362	return;
2363
2364	addMemoryAccess(Stmt, Inst, AccType: MemoryAccess::MUST_WRITE, BaseAddress: Inst, ElementType: Inst->getType(),
2365	Affine: true, AccessValue: Inst, Subscripts: ArrayRef<const SCEV *>(),
2366	Sizes: ArrayRef<const SCEV *>(), Kind: MemoryKind::Value);
2367	}
2368
2369	void ScopBuilder::ensureValueRead(Value V, ScopStmt UserStmt) {
2370	// TODO: Make ScopStmt::ensureValueRead(Value) offer the same functionality*
2371	// to be able to replace this one. Currently, there is a split responsibility.
2372	// In a first step, the MemoryAccess is created, but without the
2373	// AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2374	// AccessRelation is created. At least for scalar accesses, there is no new
2375	// information available at ScopStmt::buildAccessRelations(), so we could
2376	// create the AccessRelation right away. This is what
2377	// ScopStmt::ensureValueRead(Value) does.*
2378
2379	auto *Scope = UserStmt->getSurroundingLoop();
2380	auto VUse = VirtualUse::create(S: scop.get(), UserStmt, UserScope: Scope, Val: V, Virtual: false);
2381	switch (VUse.getKind()) {
2382	case VirtualUse::Constant:
2383	case VirtualUse::Block:
2384	case VirtualUse::Synthesizable:
2385	case VirtualUse::Hoisted:
2386	case VirtualUse::Intra:
2387	// Uses of these kinds do not need a MemoryAccess.
2388	break;
2389
2390	case VirtualUse::ReadOnly:
2391	// Add MemoryAccess for invariant values only if requested.
2392	if (!ModelReadOnlyScalars)
2393	break;
2394
2395	[[fallthrough]];
2396	case VirtualUse::Inter:
2397
2398	// Do not create another MemoryAccess for reloading the value if one already
2399	// exists.
2400	if (UserStmt->lookupValueReadOf(Inst: V))
2401	break;
2402
2403	addMemoryAccess(Stmt: UserStmt, Inst: nullptr, AccType: MemoryAccess::READ, BaseAddress: V, ElementType: V->getType(),
2404	Affine: true, AccessValue: V, Subscripts: ArrayRef<const SCEV >(), Sizes: ArrayRef<const* SCEV *>(),
2405	Kind: MemoryKind::Value);
2406
2407	// Inter-statement uses need to write the value in their defining statement.
2408	if (VUse.isInter())
2409	ensureValueWrite(Inst: cast<Instruction>(Val: V));
2410	break;
2411	}
2412	}
2413
2414	void ScopBuilder::ensurePHIWrite(PHINode PHI, ScopStmt IncomingStmt,
2415	BasicBlock *IncomingBlock,
2416	Value IncomingValue, bool* IsExitBlock) {
2417	// As the incoming block might turn out to be an error statement ensure we
2418	// will create an exit PHI SAI object. It is needed during code generation
2419	// and would be created later anyway.
2420	if (IsExitBlock)
2421	scop ->getOrCreateScopArrayInfo(BasePtr: PHI, ElementType: PHI->getType(), Sizes: {},
2422	Kind: MemoryKind::ExitPHI);
2423
2424	// This is possible if PHI is in the SCoP's entry block. The incoming blocks
2425	// from outside the SCoP's region have no statement representation.
2426	if (!IncomingStmt)
2427	return;
2428
2429	// Take care for the incoming value being available in the incoming block.
2430	// This must be done before the check for multiple PHI writes because multiple
2431	// exiting edges from subregion each can be the effective written value of the
2432	// subregion. As such, all of them must be made available in the subregion
2433	// statement.
2434	ensureValueRead(V: IncomingValue, UserStmt: IncomingStmt);
2435
2436	// Do not add more than one MemoryAccess per PHINode and ScopStmt.
2437	if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2438	assert(Acc->getAccessInstruction() == PHI);
2439	Acc->addIncoming(IncomingBlock, IncomingValue);
2440	return;
2441	}
2442
2443	MemoryAccess *Acc = addMemoryAccess(
2444	Stmt: IncomingStmt, Inst: PHI, AccType: MemoryAccess::MUST_WRITE, BaseAddress: PHI, ElementType: PHI->getType(), Affine: true,
2445	AccessValue: PHI, Subscripts: ArrayRef<const SCEV >(), Sizes: ArrayRef<const* SCEV *>(),
2446	Kind: IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2447	assert(Acc);
2448	Acc->addIncoming(IncomingBlock, IncomingValue);
2449	}
2450
2451	void ScopBuilder::addPHIReadAccess(ScopStmt PHIStmt, PHINode PHI) {
2452	addMemoryAccess(Stmt: PHIStmt, Inst: PHI, AccType: MemoryAccess::READ, BaseAddress: PHI, ElementType: PHI->getType(), Affine: true,
2453	AccessValue: PHI, Subscripts: ArrayRef<const SCEV >(), Sizes: ArrayRef<const* SCEV *>(),
2454	Kind: MemoryKind::PHI);
2455	}
2456
2457	void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2458	isl::id Id = isl::id::alloc(ctx: scop ->getIslCtx(), name: Stmt.getBaseName(), user: &Stmt);
2459
2460	Stmt.Domain = scop ->getDomainConditions(Stmt: &Stmt);
2461	Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2462	}
2463
2464	void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2465	isl::set Domain = Stmt.getDomain();
2466	BasicBlock *BB = Stmt.getEntryBlock();
2467
2468	Loop *L = LI.getLoopFor(BB);
2469
2470	while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2471	L = L->getParentLoop();
2472
2473	SmallVector<llvm::Loop *, `8`> Loops;
2474
2475	while (L && Stmt.getParent()->getRegion().contains(L)) {
2476	Loops.push_back(Elt: L);
2477	L = L->getParentLoop();
2478	}
2479
2480	Stmt.NestLoops.insert(I: Stmt.NestLoops.begin(), From: Loops.rbegin(), To: Loops.rend());
2481	}
2482
2483	/// Return the reduction type for a given binary operator.
2484	static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2485	const Instruction *Load) {
2486	if (!BinOp)
2487	return MemoryAccess::RT_NONE;
2488	switch (BinOp->getOpcode()) {
2489	case Instruction::FAdd:
2490	if (!BinOp->isFast())
2491	return MemoryAccess::RT_NONE;
2492	[[fallthrough]];
2493	case Instruction::Add:
2494	return MemoryAccess::RT_ADD;
2495	case Instruction::Or:
2496	return MemoryAccess::RT_BOR;
2497	case Instruction::Xor:
2498	return MemoryAccess::RT_BXOR;
2499	case Instruction::And:
2500	return MemoryAccess::RT_BAND;
2501	case Instruction::FMul:
2502	if (!BinOp->isFast())
2503	return MemoryAccess::RT_NONE;
2504	[[fallthrough]];
2505	case Instruction::Mul:
2506	if (DisableMultiplicativeReductions)
2507	return MemoryAccess::RT_NONE;
2508	return MemoryAccess::RT_MUL;
2509	default:
2510	return MemoryAccess::RT_NONE;
2511	}
2512	}
2513
2514	/// True if @p AllAccs intersects with @p MemAccs execpt @p LoadMA and @p
2515	/// StoreMA
2516	bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
2517	MemoryAccess *StoreMA, isl::set Domain,
2518	SmallVector<MemoryAccess *, `8`> &MemAccs) {
2519	bool HasIntersectingAccs = false;
2520	auto AllAccsNoParams = AllAccs.project_out_all_params();
2521
2522	for (MemoryAccess *MA : MemAccs) {
2523	if (MA == LoadMA \|\| MA == StoreMA)
2524	continue;
2525	auto AccRel = MA->getAccessRelation().intersect_domain(set: Domain);
2526	auto Accs = AccRel.range();
2527	auto AccsNoParams = Accs.project_out_all_params();
2528
2529	bool CompatibleSpace = AllAccsNoParams.has_equal_space(set2: AccsNoParams);
2530
2531	if (CompatibleSpace) {
2532	auto OverlapAccs = Accs.intersect(set2: AllAccs);
2533	bool DoesIntersect = !OverlapAccs.is_empty();
2534	HasIntersectingAccs \|= DoesIntersect;
2535	}
2536	}
2537	return HasIntersectingAccs;
2538	}
2539
2540	/// Test if the accesses of @p LoadMA and @p StoreMA can form a reduction
2541	bool checkCandidatePairAccesses(MemoryAccess LoadMA, MemoryAccess StoreMA,
2542	isl::set Domain,
2543	SmallVector<MemoryAccess *, `8`> &MemAccs) {
2544	// First check if the base value is the same.
2545	isl::map LoadAccs = LoadMA->getAccessRelation();
2546	isl::map StoreAccs = StoreMA->getAccessRelation();
2547	bool Valid = LoadAccs.has_equal_space(map2: StoreAccs);
2548	POLLY_DEBUG(dbgs() << " == The accessed space below is "
2549	<< (Valid ? "" : "not ") << "equal!\n");
2550	POLLY_DEBUG(LoadMA->dump(); StoreMA->dump());
2551
2552	if (Valid) {
2553	// Then check if they actually access the same memory.
2554	isl::map R = isl::manage(ptr: LoadAccs.copy())
2555	.intersect_domain(set: isl::manage(ptr: Domain.copy()));
2556	isl::map W = isl::manage(ptr: StoreAccs.copy())
2557	.intersect_domain(set: isl::manage(ptr: Domain.copy()));
2558	isl::set RS = R.range();
2559	isl::set WS = W.range();
2560
2561	isl::set InterAccs =
2562	isl::manage(ptr: RS.copy()).intersect(set2: isl::manage(ptr: WS.copy()));
2563	Valid = !InterAccs.is_empty();
2564	POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
2565	<< "overlapping!\n");
2566	}
2567
2568	if (Valid) {
2569	// Finally, check if they are no other instructions accessing this memory
2570	isl::map AllAccsRel = LoadAccs.unite(map2: StoreAccs);
2571	AllAccsRel = AllAccsRel.intersect_domain(set: Domain);
2572	isl::set AllAccs = AllAccsRel.range();
2573	Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
2574
2575	POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
2576	<< "accessed by other instructions!\n");
2577	}
2578	return Valid;
2579	}
2580
2581	void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2582	SmallVector<MemoryAccess *, `2`> Loads;
2583	SmallVector<std::pair<MemoryAccess , MemoryAccess >, `4`> Candidates;
2584
2585	// First collect candidate load-store reduction chains by iterating over all
2586	// stores and collecting possible reduction loads.
2587	for (MemoryAccess *StoreMA : Stmt) {
2588	if (StoreMA->isRead())
2589	continue;
2590
2591	Loads.clear();
2592	collectCandidateReductionLoads(StoreMA, Loads);
2593	for (MemoryAccess *LoadMA : Loads)
2594	Candidates.push_back(Elt: std::make_pair(x&: LoadMA, y&: StoreMA));
2595	}
2596
2597	// Then check each possible candidate pair.
2598	for (const auto &CandidatePair : Candidates) {
2599	MemoryAccess *LoadMA = CandidatePair.first;
2600	MemoryAccess *StoreMA = CandidatePair.second;
2601	bool Valid = checkCandidatePairAccesses(LoadMA, StoreMA, Domain: Stmt.getDomain(),
2602	MemAccs&: Stmt.MemAccs);
2603	if (!Valid)
2604	continue;
2605
2606	const LoadInst *Load =
2607	dyn_cast<const LoadInst>(Val: CandidatePair.first->getAccessInstruction());
2608	MemoryAccess::ReductionType RT =
2609	getReductionType(BinOp: dyn_cast<BinaryOperator>(Val: Load->user_back()), Load);
2610
2611	// If no overlapping access was found we mark the load and store as
2612	// reduction like.
2613	LoadMA->markAsReductionLike(RT);
2614	StoreMA->markAsReductionLike(RT);
2615	}
2616	}
2617
2618	void ScopBuilder::verifyInvariantLoads() {
2619	auto &RIL = scop ->getRequiredInvariantLoads();
2620	for (LoadInst *LI : RIL) {
2621	assert(LI && scop ->contains(LI));
2622	// If there exists a statement in the scop which has a memory access for
2623	// @p LI, then mark this scop as infeasible for optimization.
2624	for (ScopStmt &Stmt : *scop)
2625	if (Stmt.getArrayAccessOrNULLFor(Inst: LI)) {
2626	scop ->invalidate(Kind: INVARIANTLOAD, Loc: LI->getDebugLoc(), BB: LI->getParent());
2627	return;
2628	}
2629	}
2630	}
2631
2632	void ScopBuilder::hoistInvariantLoads() {
2633	if (!PollyInvariantLoadHoisting)
2634	return;
2635
2636	isl::union_map Writes = scop ->getWrites();
2637	for (ScopStmt &Stmt : *scop) {
2638	InvariantAccessesTy InvariantAccesses;
2639
2640	for (MemoryAccess *Access : Stmt) {
2641	isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2642	if (!NHCtx.is_null())
2643	InvariantAccesses.push_back(Elt: {.MA: Access, .NonHoistableCtx: NHCtx});
2644	}
2645
2646	// Transfer the memory access from the statement to the SCoP.
2647	for (auto InvMA : InvariantAccesses)
2648	Stmt.removeMemoryAccess(MA: InvMA.MA);
2649	addInvariantLoads(Stmt, InvMAs&: InvariantAccesses);
2650	}
2651	}
2652
2653	/// Check if an access range is too complex.
2654	///
2655	/// An access range is too complex, if it contains either many disjuncts or
2656	/// very complex expressions. As a simple heuristic, we assume if a set to
2657	/// be too complex if the sum of existentially quantified dimensions and
2658	/// set dimensions is larger than a threshold. This reliably detects both
2659	/// sets with many disjuncts as well as sets with many divisions as they
2660	/// arise in h264.
2661	///
2662	/// @param AccessRange The range to check for complexity.
2663	///
2664	/// @returns True if the access range is too complex.
2665	static bool isAccessRangeTooComplex(isl::set AccessRange) {
2666	unsigned NumTotalDims = `0`;
2667
2668	for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2669	NumTotalDims += unsignedFromIslSize(Size: BSet.dim(type: isl::dim::div));
2670	NumTotalDims += unsignedFromIslSize(Size: BSet.dim(type: isl::dim::set));
2671	}
2672
2673	if (NumTotalDims > MaxDimensionsInAccessRange)
2674	return true;
2675
2676	return false;
2677	}
2678
2679	bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2680	isl::union_map Writes) {
2681	if (auto *BasePtrMA = scop ->lookupBasePtrAccess(MA)) {
2682	return getNonHoistableCtx(Access: BasePtrMA, Writes).is_null();
2683	}
2684
2685	Value *BaseAddr = MA->getOriginalBaseAddr();
2686	if (auto *BasePtrInst = dyn_cast<Instruction>(Val: BaseAddr))
2687	if (!isa<LoadInst>(Val: BasePtrInst))
2688	return scop ->contains(I: BasePtrInst);
2689
2690	return false;
2691	}
2692
2693	void ScopBuilder::addUserContext() {
2694	if (UserContextStr.empty())
2695	return;
2696
2697	isl::set UserContext = isl::set (scop ->getIslCtx(), UserContextStr.c_str());
2698	isl::space Space = scop ->getParamSpace();
2699	isl::size SpaceParams = Space.dim(type: isl::dim::param);
2700	if (unsignedFromIslSize(Size: SpaceParams) !=
2701	unsignedFromIslSize(Size: UserContext.dim(type: isl::dim::param))) {
2702	std::string SpaceStr = stringFromIslObj(Obj: Space, DefaultValue: "null");
2703	errs() << "Error: the context provided in -polly-context has not the same "
2704	<< "number of dimensions than the computed context. Due to this "
2705	<< "mismatch, the -polly-context option is ignored. Please provide "
2706	<< "the context in the parameter space: " << SpaceStr << ".\n";
2707	return;
2708	}
2709
2710	for (auto i : rangeIslSize(Begin: `0`, End: SpaceParams)) {
2711	std::string NameContext =
2712	scop ->getContext().get_dim_name(type: isl::dim::param, pos: i);
2713	std::string NameUserContext = UserContext.get_dim_name(type: isl::dim::param, pos: i);
2714
2715	if (NameContext != NameUserContext) {
2716	std::string SpaceStr = stringFromIslObj(Obj: Space, DefaultValue: "null");
2717	errs() << "Error: the name of dimension " << i
2718	<< " provided in -polly-context "
2719	<< "is '" << NameUserContext << "', but the name in the computed "
2720	<< "context is '" << NameContext
2721	<< "'. Due to this name mismatch, "
2722	<< "the -polly-context option is ignored. Please provide "
2723	<< "the context in the parameter space: " << SpaceStr << ".\n";
2724	return;
2725	}
2726
2727	UserContext = UserContext.set_dim_id(type: isl::dim::param, pos: i,
2728	id: Space.get_dim_id(type: isl::dim::param, pos: i));
2729	}
2730	isl::set newContext = scop ->getContext().intersect(set2: UserContext);
2731	scop ->setContext(newContext);
2732	}
2733
2734	isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2735	isl::union_map Writes) {
2736	// TODO: Loads that are not loop carried, hence are in a statement with
2737	// zero iterators, are by construction invariant, though we
2738	// currently "hoist" them anyway. This is necessary because we allow
2739	// them to be treated as parameters (e.g., in conditions) and our code
2740	// generation would otherwise use the old value.
2741
2742	auto &Stmt = *Access->getStatement();
2743	BasicBlock *BB = Stmt.getEntryBlock();
2744
2745	if (Access->isScalarKind() \|\| Access->isWrite() \|\| !Access->isAffine() \|\|
2746	Access->isMemoryIntrinsic())
2747	return {};
2748
2749	// Skip accesses that have an invariant base pointer which is defined but
2750	// not loaded inside the SCoP. This can happened e.g., if a readnone call
2751	// returns a pointer that is used as a base address. However, as we want
2752	// to hoist indirect pointers, we allow the base pointer to be defined in
2753	// the region if it is also a memory access. Each ScopArrayInfo object
2754	// that has a base pointer origin has a base pointer that is loaded and
2755	// that it is invariant, thus it will be hoisted too. However, if there is
2756	// no base pointer origin we check that the base pointer is defined
2757	// outside the region.
2758	auto *LI = cast<LoadInst>(Val: Access->getAccessInstruction());
2759	if (hasNonHoistableBasePtrInScop(MA: Access, Writes))
2760	return {};
2761
2762	isl::map AccessRelation = Access->getAccessRelation();
2763	assert(!AccessRelation.is_empty());
2764
2765	if (AccessRelation.involves_dims(type: isl::dim::in, first: `0`, n: Stmt.getNumIterators()))
2766	return {};
2767
2768	AccessRelation = AccessRelation.intersect_domain(set: Stmt.getDomain());
2769	isl::set SafeToLoad;
2770
2771	auto &DL = scop ->getFunction().getParent()->getDataLayout();
2772	if (isSafeToLoadUnconditionally(V: LI->getPointerOperand(), Ty: LI->getType(),
2773	Alignment: LI->getAlign(), DL)) {
2774	SafeToLoad = isl::set::universe(space: AccessRelation.get_space().range());
2775	} else if (BB != LI->getParent()) {
2776	// Skip accesses in non-affine subregions as they might not be executed
2777	// under the same condition as the entry of the non-affine subregion.
2778	return {};
2779	} else {
2780	SafeToLoad = AccessRelation.range();
2781	}
2782
2783	if (isAccessRangeTooComplex(AccessRange: AccessRelation.range()))
2784	return {};
2785
2786	isl::union_map Written = Writes.intersect_range(uset: SafeToLoad);
2787	isl::set WrittenCtx = Written.params();
2788	bool IsWritten = !WrittenCtx.is_empty();
2789
2790	if (!IsWritten)
2791	return WrittenCtx;
2792
2793	WrittenCtx = WrittenCtx.remove_divs();
2794	bool TooComplex =
2795	unsignedFromIslSize(Size: WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2796	if (TooComplex \|\| !isRequiredInvariantLoad(LI))
2797	return {};
2798
2799	scop ->addAssumption(Kind: INVARIANTLOAD, Set: WrittenCtx, Loc: LI->getDebugLoc(),
2800	Sign: AS_RESTRICTION, BB: LI->getParent());
2801	return WrittenCtx;
2802	}
2803
2804	static bool isAParameter(llvm::Value maybeParam, const* Function &F) {
2805	for (const llvm::Argument &Arg : F.args())
2806	if (&Arg == maybeParam)
2807	return true;
2808
2809	return false;
2810	}
2811
2812	bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2813	bool StmtInvalidCtxIsEmpty,
2814	bool MAInvalidCtxIsEmpty,
2815	bool NonHoistableCtxIsEmpty) {
2816	LoadInst *LInst = cast<LoadInst>(Val: MA->getAccessInstruction());
2817	const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2818	if (PollyAllowDereferenceOfAllFunctionParams &&
2819	isAParameter(maybeParam: LInst->getPointerOperand(), F: scop ->getFunction()))
2820	return true;
2821
2822	// TODO: We can provide more information for better but more expensive
2823	// results.
2824	if (!isDereferenceableAndAlignedPointer(
2825	V: LInst->getPointerOperand(), Ty: LInst->getType(), Alignment: LInst->getAlign(), DL))
2826	return false;
2827
2828	// If the location might be overwritten we do not hoist it unconditionally.
2829	//
2830	// TODO: This is probably too conservative.
2831	if (!NonHoistableCtxIsEmpty)
2832	return false;
2833
2834	// If a dereferenceable load is in a statement that is modeled precisely we
2835	// can hoist it.
2836	if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2837	return true;
2838
2839	// Even if the statement is not modeled precisely we can hoist the load if it
2840	// does not involve any parameters that might have been specialized by the
2841	// statement domain.
2842	for (const SCEV *Subscript : MA->subscripts())
2843	if (!isa<SCEVConstant>(Val: Subscript))
2844	return false;
2845	return true;
2846	}
2847
2848	void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2849	InvariantAccessesTy &InvMAs) {
2850	if (InvMAs.empty())
2851	return;
2852
2853	isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2854	bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2855
2856	// Get the context under which the statement is executed but remove the error
2857	// context under which this statement is reached.
2858	isl::set DomainCtx = Stmt.getDomain().params();
2859	DomainCtx = DomainCtx.subtract(set2: StmtInvalidCtx);
2860
2861	if (unsignedFromIslSize(Size: DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2862	auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2863	scop ->invalidate(Kind: COMPLEXITY, Loc: AccInst->getDebugLoc(), BB: AccInst->getParent());
2864	return;
2865	}
2866
2867	// Project out all parameters that relate to loads in the statement. Otherwise
2868	// we could have cyclic dependences on the constraints under which the
2869	// hoisted loads are executed and we could not determine an order in which to
2870	// pre-load them. This happens because not only lower bounds are part of the
2871	// domain but also upper bounds.
2872	for (auto &InvMA : InvMAs) {
2873	auto *MA = InvMA.MA;
2874	Instruction *AccInst = MA->getAccessInstruction();
2875	if (SE.isSCEVable(Ty: AccInst->getType())) {
2876	SetVector<Value *> Values;
2877	for (const SCEV *Parameter : scop ->parameters()) {
2878	Values.clear();
2879	findValues(Expr: Parameter, SE, Values);
2880	if (!Values.count(key: AccInst))
2881	continue;
2882
2883	isl::id ParamId = scop ->getIdForParam(Parameter);
2884	if (!ParamId.is_null()) {
2885	int Dim = DomainCtx.find_dim_by_id(type: isl::dim::param, id: ParamId);
2886	if (Dim >= `0`)
2887	DomainCtx = DomainCtx.eliminate(type: isl::dim::param, first: Dim, n: `1`);
2888	}
2889	}
2890	}
2891	}
2892
2893	for (auto &InvMA : InvMAs) {
2894	auto *MA = InvMA.MA;
2895	isl::set NHCtx = InvMA.NonHoistableCtx;
2896
2897	// Check for another invariant access that accesses the same location as
2898	// MA and if found consolidate them. Otherwise create a new equivalence
2899	// class at the end of InvariantEquivClasses.
2900	LoadInst *LInst = cast<LoadInst>(Val: MA->getAccessInstruction());
2901	Type *Ty = LInst->getType();
2902	const SCEV *PointerSCEV = SE.getSCEV(V: LInst->getPointerOperand());
2903
2904	isl::set MAInvalidCtx = MA->getInvalidContext();
2905	bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2906	bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2907
2908	isl::set MACtx;
2909	// Check if we know that this pointer can be speculatively accessed.
2910	if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2911	NonHoistableCtxIsEmpty)) {
2912	MACtx = isl::set::universe(space: DomainCtx.get_space());
2913	} else {
2914	MACtx = DomainCtx;
2915	MACtx = MACtx.subtract(set2: MAInvalidCtx.unite(set2: NHCtx));
2916	MACtx = MACtx.gist_params(context: scop ->getContext());
2917	}
2918
2919	bool Consolidated = false;
2920	for (auto &IAClass : scop ->invariantEquivClasses()) {
2921	if (PointerSCEV != IAClass.IdentifyingPointer \|\| Ty != IAClass.AccessType)
2922	continue;
2923
2924	// If the pointer and the type is equal check if the access function wrt.
2925	// to the domain is equal too. It can happen that the domain fixes
2926	// parameter values and these can be different for distinct part of the
2927	// SCoP. If this happens we cannot consolidate the loads but need to
2928	// create a new invariant load equivalence class.
2929	auto &MAs = IAClass.InvariantAccesses;
2930	if (!MAs.empty()) {
2931	auto *LastMA = MAs.front();
2932
2933	isl::set AR = MA->getAccessRelation().range();
2934	isl::set LastAR = LastMA->getAccessRelation().range();
2935	bool SameAR = AR.is_equal(set2: LastAR);
2936
2937	if (!SameAR)
2938	continue;
2939	}
2940
2941	// Add MA to the list of accesses that are in this class.
2942	MAs.push_front(val: MA);
2943
2944	Consolidated = true;
2945
2946	// Unify the execution context of the class and this statement.
2947	isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2948	if (!IAClassDomainCtx.is_null())
2949	IAClassDomainCtx = IAClassDomainCtx.unite(set2: MACtx).coalesce();
2950	else
2951	IAClassDomainCtx = MACtx;
2952	IAClass.ExecutionContext = IAClassDomainCtx;
2953	break;
2954	}
2955
2956	if (Consolidated)
2957	continue;
2958
2959	MACtx = MACtx.coalesce();
2960
2961	// If we did not consolidate MA, thus did not find an equivalence class
2962	// for it, we create a new one.
2963	scop ->addInvariantEquivClass(
2964	InvariantEquivClass: InvariantEquivClassTy{.IdentifyingPointer: PointerSCEV, .InvariantAccesses: MemoryAccessList {MA}, .ExecutionContext: MACtx, .AccessType: Ty});
2965	}
2966	}
2967
2968	void ScopBuilder::collectCandidateReductionLoads(
2969	MemoryAccess StoreMA, SmallVectorImpl<MemoryAccess > &Loads) {
2970	ScopStmt *Stmt = StoreMA->getStatement();
2971
2972	auto *Store = dyn_cast<StoreInst>(Val: StoreMA->getAccessInstruction());
2973	if (!Store)
2974	return;
2975
2976	// Skip if there is not one binary operator between the load and the store
2977	auto *BinOp = dyn_cast<BinaryOperator>(Val: Store->getValueOperand());
2978	if (!BinOp)
2979	return;
2980
2981	// Skip if the binary operators has multiple uses
2982	if (BinOp->getNumUses() != `1`)
2983	return;
2984
2985	// Skip if the opcode of the binary operator is not commutative/associative
2986	if (!BinOp->isCommutative() \|\| !BinOp->isAssociative())
2987	return;
2988
2989	// Skip if the binary operator is outside the current SCoP
2990	if (BinOp->getParent() != Store->getParent())
2991	return;
2992
2993	// Skip if it is a multiplicative reduction and we disabled them
2994	if (DisableMultiplicativeReductions &&
2995	(BinOp->getOpcode() == Instruction::Mul \|\|
2996	BinOp->getOpcode() == Instruction::FMul))
2997	return;
2998
2999	// Check the binary operator operands for a candidate load
3000	auto *PossibleLoad0 = dyn_cast<LoadInst>(Val: BinOp->getOperand(i_nocapture: `0`));
3001	auto *PossibleLoad1 = dyn_cast<LoadInst>(Val: BinOp->getOperand(i_nocapture: `1`));
3002	if (!PossibleLoad0 && !PossibleLoad1)
3003	return;
3004
3005	// A load is only a candidate if it cannot escape (thus has only this use)
3006	if (PossibleLoad0 && PossibleLoad0->getNumUses() == `1`)
3007	if (PossibleLoad0->getParent() == Store->getParent())
3008	Loads.push_back(Elt: &Stmt->getArrayAccessFor(Inst: PossibleLoad0));
3009	if (PossibleLoad1 && PossibleLoad1->getNumUses() == `1`)
3010	if (PossibleLoad1->getParent() == Store->getParent())
3011	Loads.push_back(Elt: &Stmt->getArrayAccessFor(Inst: PossibleLoad1));
3012	}
3013
3014	/// Find the canonical scop array info object for a set of invariant load
3015	/// hoisted loads. The canonical array is the one that corresponds to the
3016	/// first load in the list of accesses which is used as base pointer of a
3017	/// scop array.
3018	static const ScopArrayInfo *findCanonicalArray(Scop &S,
3019	MemoryAccessList &Accesses) {
3020	for (MemoryAccess *Access : Accesses) {
3021	const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3022	BasePtr: Access->getAccessInstruction(), Kind: MemoryKind::Array);
3023	if (CanonicalArray)
3024	return CanonicalArray;
3025	}
3026	return nullptr;
3027	}
3028
3029	/// Check if @p Array severs as base array in an invariant load.
3030	static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
3031	for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3032	for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3033	if (Access2->getScopArrayInfo() == Array)
3034	return true;
3035	return false;
3036	}
3037
3038	/// Replace the base pointer arrays in all memory accesses referencing @p Old,
3039	/// with a reference to @p New.
3040	static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3041	const ScopArrayInfo *New) {
3042	for (ScopStmt &Stmt : S)
3043	for (MemoryAccess *Access : Stmt) {
3044	if (Access->getLatestScopArrayInfo() != Old)
3045	continue;
3046
3047	isl::id Id = New->getBasePtrId();
3048	isl::map Map = Access->getAccessRelation();
3049	Map = Map.set_tuple_id(type: isl::dim::out, id: Id);
3050	Access->setAccessRelation(Map);
3051	}
3052	}
3053
3054	void ScopBuilder::canonicalizeDynamicBasePtrs() {
3055	for (InvariantEquivClassTy &EqClass : scop ->InvariantEquivClasses) {
3056	MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3057
3058	const ScopArrayInfo *CanonicalBasePtrSAI =
3059	findCanonicalArray(S&: *scop, Accesses&: BasePtrAccesses);
3060
3061	if (!CanonicalBasePtrSAI)
3062	continue;
3063
3064	for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3065	const ScopArrayInfo *BasePtrSAI = scop ->getScopArrayInfoOrNull(
3066	BasePtr: BasePtrAccess->getAccessInstruction(), Kind: MemoryKind::Array);
3067	if (!BasePtrSAI \|\| BasePtrSAI == CanonicalBasePtrSAI \|\|
3068	!BasePtrSAI->isCompatibleWith(Array: CanonicalBasePtrSAI))
3069	continue;
3070
3071	// we currently do not canonicalize arrays where some accesses are
3072	// hoisted as invariant loads. If we would, we need to update the access
3073	// function of the invariant loads as well. However, as this is not a
3074	// very common situation, we leave this for now to avoid further
3075	// complexity increases.
3076	if (isUsedForIndirectHoistedLoad(S&: *scop, Array: BasePtrSAI))
3077	continue;
3078
3079	replaceBasePtrArrays(S&: *scop, Old: BasePtrSAI, New: CanonicalBasePtrSAI);
3080	}
3081	}
3082	}
3083
3084	void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3085	for (MemoryAccess *Access : Stmt.MemAccs) {
3086	Type *ElementType = Access->getElementType();
3087
3088	MemoryKind Ty;
3089	if (Access->isPHIKind())
3090	Ty = MemoryKind::PHI;
3091	else if (Access->isExitPHIKind())
3092	Ty = MemoryKind::ExitPHI;
3093	else if (Access->isValueKind())
3094	Ty = MemoryKind::Value;
3095	else
3096	Ty = MemoryKind::Array;
3097
3098	// Create isl::pw_aff for SCEVs which describe sizes. Collect all
3099	// assumptions which are taken. isl::pw_aff objects are cached internally
3100	// and they are used later by scop.
3101	for (const SCEV *Size : Access->Sizes) {
3102	if (!Size)
3103	continue;
3104	scop ->getPwAff(E: Size, BB: nullptr, NonNegative: false, RecordedAssumptions: &RecordedAssumptions);
3105	}
3106	auto *SAI = scop ->getOrCreateScopArrayInfo(BasePtr: Access->getOriginalBaseAddr(),
3107	ElementType, Sizes: Access->Sizes, Kind: Ty);
3108
3109	// Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3110	// assumptions which are taken. isl::pw_aff objects are cached internally
3111	// and they are used later by scop.
3112	for (const SCEV *Subscript : Access->subscripts()) {
3113	if (!Access->isAffine() \|\| !Subscript)
3114	continue;
3115	scop ->getPwAff(E: Subscript, BB: Stmt.getEntryBlock(), NonNegative: false,
3116	RecordedAssumptions: &RecordedAssumptions);
3117	}
3118	Access->buildAccessRelation(SAI);
3119	scop ->addAccessData(Access);
3120	}
3121	}
3122
3123	/// Add the minimal/maximal access in @p Set to @p User.
3124	///
3125	/// @return True if more accesses should be added, false if we reached the
3126	/// maximal number of run-time checks to be generated.
3127	static bool buildMinMaxAccess(isl::set Set,
3128	Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3129	isl::pw_multi_aff MinPMA, MaxPMA;
3130	isl::pw_aff LastDimAff;
3131	isl::aff OneAff;
3132	unsigned Pos;
3133
3134	Set = Set.remove_divs();
3135	polly::simplify(Set);
3136
3137	if (unsignedFromIslSize(Size: Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3138	Set = Set.simple_hull();
3139
3140	// Restrict the number of parameters involved in the access as the lexmin/
3141	// lexmax computation will take too long if this number is high.
3142	//
3143	// Experiments with a simple test case using an i7 4800MQ:
3144	//
3145	// #Parameters involved \| Time (in sec)
3146	// 6 \| 0.01
3147	// 7 \| 0.04
3148	// 8 \| 0.12
3149	// 9 \| 0.40
3150	// 10 \| 1.54
3151	// 11 \| 6.78
3152	// 12 \| 30.38
3153	//
3154	if (isl_set_n_param(set: Set.get()) >
3155	static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3156	unsigned InvolvedParams = `0`;
3157	for (unsigned u = `0`, e = isl_set_n_param(set: Set.get()); u < e; u++)
3158	if (Set.involves_dims(type: isl::dim::param, first: u, n: `1`))
3159	InvolvedParams++;
3160
3161	if (InvolvedParams > RunTimeChecksMaxParameters)
3162	return false;
3163	}
3164
3165	MinPMA = Set.lexmin_pw_multi_aff();
3166	MaxPMA = Set.lexmax_pw_multi_aff();
3167
3168	MinPMA = MinPMA.coalesce();
3169	MaxPMA = MaxPMA.coalesce();
3170
3171	if (MaxPMA.is_null())
3172	return false;
3173
3174	unsigned MaxOutputSize = unsignedFromIslSize(Size: MaxPMA.dim(type: isl::dim::out));
3175
3176	// Adjust the last dimension of the maximal access by one as we want to
3177	// enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3178	// we test during code generation might now point after the end of the
3179	// allocated array but we will never dereference it anyway.
3180	assert(MaxOutputSize >= `1` && "Assumed at least one output dimension");
3181
3182	Pos = MaxOutputSize - `1`;
3183	LastDimAff = MaxPMA.at(pos: Pos);
3184	OneAff = isl::aff (isl::local_space (LastDimAff.get_domain_space()));
3185	OneAff = OneAff.add_constant_si(v: `1`);
3186	LastDimAff = LastDimAff.add(pwaff2: OneAff);
3187	MaxPMA = MaxPMA.set_pw_aff(pos: Pos, pa: LastDimAff);
3188
3189	if (MinPMA.is_null() \|\| MaxPMA.is_null())
3190	return false;
3191
3192	MinMaxAccesses.push_back(Elt: std::make_pair(x&: MinPMA, y&: MaxPMA));
3193
3194	return true;
3195	}
3196
3197	/// Wrapper function to calculate minimal/maximal accesses to each array.
3198	bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3199	Scop::MinMaxVectorTy &MinMaxAccesses) {
3200	MinMaxAccesses.reserve(N: AliasGroup.size());
3201
3202	isl::union_set Domains = scop ->getDomains();
3203	isl::union_map Accesses = isl::union_map::empty(ctx: scop ->getIslCtx());
3204
3205	for (MemoryAccess *MA : AliasGroup)
3206	Accesses = Accesses.unite(umap2: MA->getAccessRelation());
3207
3208	Accesses = Accesses.intersect_domain(uset: Domains);
3209	isl::union_set Locations = Accesses.range();
3210
3211	bool LimitReached = false;
3212	for (isl::set Set : Locations.get_set_list()) {
3213	LimitReached \|= !buildMinMaxAccess(Set, MinMaxAccesses, S&: *scop);
3214	if (LimitReached)
3215	break;
3216	}
3217
3218	return !LimitReached;
3219	}
3220
3221	static isl::set getAccessDomain(MemoryAccess *MA) {
3222	isl::set Domain = MA->getStatement()->getDomain();
3223	Domain = Domain.project_out(type: isl::dim::set, first: `0`,
3224	n: unsignedFromIslSize(Size: Domain.tuple_dim()));
3225	return Domain.reset_tuple_id();
3226	}
3227
3228	bool ScopBuilder::buildAliasChecks() {
3229	if (!PollyUseRuntimeAliasChecks)
3230	return true;
3231
3232	if (buildAliasGroups()) {
3233	// Aliasing assumptions do not go through addAssumption but we still want to
3234	// collect statistics so we do it here explicitly.
3235	if (scop ->getAliasGroups().size())
3236	Scop::incrementNumberOfAliasingAssumptions(Step: `1`);
3237	return true;
3238	}
3239
3240	// If a problem occurs while building the alias groups we need to delete
3241	// this SCoP and pretend it wasn't valid in the first place. To this end
3242	// we make the assumed context infeasible.
3243	scop ->invalidate(Kind: ALIASING, Loc: DebugLoc());
3244
3245	POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop ->getNameStr()
3246	<< " could not be created. This SCoP has been dismissed.");
3247	return false;
3248	}
3249
3250	std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3251	ScopBuilder::buildAliasGroupsForAccesses() {
3252	BatchAAResults BAA(AA);
3253	AliasSetTracker AST(BAA);
3254
3255	DenseMap<Value , MemoryAccess > PtrToAcc;
3256	DenseSet<const ScopArrayInfo *> HasWriteAccess;
3257	for (ScopStmt &Stmt : *scop) {
3258
3259	isl::set StmtDomain = Stmt.getDomain();
3260	bool StmtDomainEmpty = StmtDomain.is_empty();
3261
3262	// Statements with an empty domain will never be executed.
3263	if (StmtDomainEmpty)
3264	continue;
3265
3266	for (MemoryAccess *MA : Stmt) {
3267	if (MA->isScalarKind())
3268	continue;
3269	if (!MA->isRead())
3270	HasWriteAccess.insert(V: MA->getScopArrayInfo());
3271	MemAccInst Acc(MA->getAccessInstruction());
3272	if (MA->isRead() && isa<MemTransferInst>(Val: Acc))
3273	PtrToAcc [cast<MemTransferInst>(Val&: Acc)->getRawSource()] = MA;
3274	else
3275	PtrToAcc [Acc.getPointerOperand()] = MA;
3276	AST.add(I: Acc);
3277	}
3278	}
3279
3280	AliasGroupVectorTy AliasGroups;
3281	for (AliasSet &AS : AST) {
3282	if (AS.isMustAlias() \|\| AS.isForwardingAliasSet())
3283	continue;
3284	AliasGroupTy AG;
3285	for (const Value *Ptr : AS.getPointers())
3286	AG.push_back(Elt: PtrToAcc [const_cast<Value *>(Ptr)]);
3287	if (AG.size() < `2`)
3288	continue;
3289	AliasGroups.push_back(Elt: std::move(AG));
3290	}
3291
3292	return std::make_tuple(args&: AliasGroups, args&: HasWriteAccess);
3293	}
3294
3295	bool ScopBuilder::buildAliasGroups() {
3296	// To create sound alias checks we perform the following steps:
3297	// o) We partition each group into read only and non read only accesses.
3298	// o) For each group with more than one base pointer we then compute minimal
3299	// and maximal accesses to each array of a group in read only and non
3300	// read only partitions separately.
3301	AliasGroupVectorTy AliasGroups;
3302	DenseSet<const ScopArrayInfo *> HasWriteAccess;
3303
3304	std::tie(args&: AliasGroups, args&: HasWriteAccess) = buildAliasGroupsForAccesses();
3305
3306	splitAliasGroupsByDomain(AliasGroups);
3307
3308	for (AliasGroupTy &AG : AliasGroups) {
3309	if (!scop ->hasFeasibleRuntimeContext())
3310	return false;
3311
3312	{
3313	IslMaxOperationsGuard MaxOpGuard(scop ->getIslCtx().get(), OptComputeOut);
3314	bool Valid = buildAliasGroup(AliasGroup&: AG, HasWriteAccess);
3315	if (!Valid)
3316	return false;
3317	}
3318	if (isl_ctx_last_error(ctx: scop ->getIslCtx().get()) == isl_error_quota) {
3319	scop ->invalidate(Kind: COMPLEXITY, Loc: DebugLoc());
3320	return false;
3321	}
3322	}
3323
3324	return true;
3325	}
3326
3327	bool ScopBuilder::buildAliasGroup(
3328	AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3329	AliasGroupTy ReadOnlyAccesses;
3330	AliasGroupTy ReadWriteAccesses;
3331	SmallPtrSet<const ScopArrayInfo *, `4`> ReadWriteArrays;
3332	SmallPtrSet<const ScopArrayInfo *, `4`> ReadOnlyArrays;
3333
3334	if (AliasGroup.size() < `2`)
3335	return true;
3336
3337	for (MemoryAccess *Access : AliasGroup) {
3338	ORE.emit(OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "PossibleAlias",
3339	Access->getAccessInstruction())
3340	<< "Possibly aliasing pointer, use restrict keyword.");
3341	const ScopArrayInfo *Array = Access->getScopArrayInfo();
3342	if (HasWriteAccess.count(V: Array)) {
3343	ReadWriteArrays.insert(Ptr: Array);
3344	ReadWriteAccesses.push_back(Elt: Access);
3345	} else {
3346	ReadOnlyArrays.insert(Ptr: Array);
3347	ReadOnlyAccesses.push_back(Elt: Access);
3348	}
3349	}
3350
3351	// If there are no read-only pointers, and less than two read-write pointers,
3352	// no alias check is needed.
3353	if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= `1`)
3354	return true;
3355
3356	// If there is no read-write pointer, no alias check is needed.
3357	if (ReadWriteArrays.empty())
3358	return true;
3359
3360	// For non-affine accesses, no alias check can be generated as we cannot
3361	// compute a sufficiently tight lower and upper bound: bail out.
3362	for (MemoryAccess *MA : AliasGroup) {
3363	if (!MA->isAffine()) {
3364	scop ->invalidate(Kind: ALIASING, Loc: MA->getAccessInstruction()->getDebugLoc(),
3365	BB: MA->getAccessInstruction()->getParent());
3366	return false;
3367	}
3368	}
3369
3370	// Ensure that for all memory accesses for which we generate alias checks,
3371	// their base pointers are available.
3372	for (MemoryAccess *MA : AliasGroup) {
3373	if (MemoryAccess *BasePtrMA = scop ->lookupBasePtrAccess(MA))
3374	scop ->addRequiredInvariantLoad(
3375	LI: cast<LoadInst>(Val: BasePtrMA->getAccessInstruction()));
3376	}
3377
3378	// scop->getAliasGroups().emplace_back();
3379	// Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3380	Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3381	Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3382
3383	bool Valid;
3384
3385	Valid = calculateMinMaxAccess(AliasGroup: ReadWriteAccesses, MinMaxAccesses&: MinMaxAccessesReadWrite);
3386
3387	if (!Valid)
3388	return false;
3389
3390	// Bail out if the number of values we need to compare is too large.
3391	// This is important as the number of comparisons grows quadratically with
3392	// the number of values we need to compare.
3393	if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3394	RunTimeChecksMaxArraysPerGroup)
3395	return false;
3396
3397	Valid = calculateMinMaxAccess(AliasGroup: ReadOnlyAccesses, MinMaxAccesses&: MinMaxAccessesReadOnly);
3398
3399	scop ->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3400	if (!Valid)
3401	return false;
3402
3403	return true;
3404	}
3405
3406	void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3407	for (unsigned u = `0`; u < AliasGroups.size(); u++) {
3408	AliasGroupTy NewAG;
3409	AliasGroupTy &AG = AliasGroups [u];
3410	AliasGroupTy::iterator AGI = AG.begin();
3411	isl::set AGDomain = getAccessDomain(MA: *AGI);
3412	while (AGI != AG.end()) {
3413	MemoryAccess MA = AGI;
3414	isl::set MADomain = getAccessDomain(MA);
3415	if (AGDomain.is_disjoint(set2: MADomain)) {
3416	NewAG.push_back(Elt: MA);
3417	AGI = AG.erase(CI: AGI);
3418	} else {
3419	AGDomain = AGDomain.unite(set2: MADomain);
3420	AGI++;
3421	}
3422	}
3423	if (NewAG.size() > `1`)
3424	AliasGroups.push_back(Elt: std::move(NewAG));
3425	}
3426	}
3427
3428	#ifndef NDEBUG
3429	static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3430	auto PhysUse = VirtualUse::create(S, U: Op, LI: &LI, Virtual: false);
3431	auto VirtUse = VirtualUse::create(S, U: Op, LI: &LI, Virtual: true);
3432	assert(PhysUse.getKind() == VirtUse.getKind());
3433	}
3434
3435	/// Check the consistency of every statement's MemoryAccesses.
3436	///
3437	/// The check is carried out by expecting the "physical" kind of use (derived
3438	/// from the BasicBlocks instructions resides in) to be same as the "virtual"
3439	/// kind of use (derived from a statement's MemoryAccess).
3440	///
3441	/// The "physical" uses are taken by ensureValueRead to determine whether to
3442	/// create MemoryAccesses. When done, the kind of scalar access should be the
3443	/// same no matter which way it was derived.
3444	///
3445	/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3446	/// can intentionally influence on the kind of uses (not corresponding to the
3447	/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3448	/// to pick up the virtual uses. But here in the code generator, this has not
3449	/// happened yet, such that virtual and physical uses are equivalent.
3450	static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3451	for (auto *BB : S->getRegion().blocks()) {
3452	for (auto &Inst : *BB) {
3453	auto *Stmt = S->getStmtFor(Inst: &Inst);
3454	if (!Stmt)
3455	continue;
3456
3457	if (isIgnoredIntrinsic(V: &Inst))
3458	continue;
3459
3460	// Branch conditions are encoded in the statement domains.
3461	if (Inst.isTerminator() && Stmt->isBlockStmt())
3462	continue;
3463
3464	// Verify all uses.
3465	for (auto &Op : Inst.operands())
3466	verifyUse(S, Op, LI);
3467
3468	// Stores do not produce values used by other statements.
3469	if (isa<StoreInst>(Val: Inst))
3470	continue;
3471
3472	// For every value defined in the block, also check that a use of that
3473	// value in the same statement would not be an inter-statement use. It can
3474	// still be synthesizable or load-hoisted, but these kind of instructions
3475	// are not directly copied in code-generation.
3476	auto VirtDef =
3477	VirtualUse::create(S, UserStmt: Stmt, UserScope: Stmt->getSurroundingLoop(), Val: &Inst, Virtual: true);
3478	assert(VirtDef.getKind() == VirtualUse::Synthesizable \|\|
3479	VirtDef.getKind() == VirtualUse::Intra \|\|
3480	VirtDef.getKind() == VirtualUse::Hoisted);
3481	}
3482	}
3483
3484	if (S->hasSingleExitEdge())
3485	return;
3486
3487	// PHINodes in the SCoP region's exit block are also uses to be checked.
3488	if (!S->getRegion().isTopLevelRegion()) {
3489	for (auto &Inst : *S->getRegion().getExit()) {
3490	if (!isa<PHINode>(Val: Inst))
3491	break;
3492
3493	for (auto &Op : Inst.operands())
3494	verifyUse(S, Op, LI);
3495	}
3496	}
3497	}
3498	#endif
3499
3500	void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3501	scop.reset(p: new Scop (R, SE, LI, DT, *SD.getDetectionContext(R: &R), ORE,
3502	SD.getNextID()));
3503
3504	buildStmts(SR&: R);
3505
3506	// Create all invariant load instructions first. These are categorized as
3507	// 'synthesizable', therefore are not part of any ScopStmt but need to be
3508	// created somewhere.
3509	const InvariantLoadsSetTy &RIL = scop ->getRequiredInvariantLoads();
3510	for (BasicBlock *BB : scop ->getRegion().blocks()) {
3511	if (SD.isErrorBlock(BB&: *BB, R: scop ->getRegion()))
3512	continue;
3513
3514	for (Instruction &Inst : *BB) {
3515	LoadInst *Load = dyn_cast<LoadInst>(Val: &Inst);
3516	if (!Load)
3517	continue;
3518
3519	if (!RIL.count(key: Load))
3520	continue;
3521
3522	// Invariant loads require a MemoryAccess to be created in some statement.
3523	// It is not important to which statement the MemoryAccess is added
3524	// because it will later be removed from the ScopStmt again. We chose the
3525	// first statement of the basic block the LoadInst is in.
3526	ArrayRef<ScopStmt *> List = scop ->getStmtListFor(BB);
3527	assert(!List.empty());
3528	ScopStmt *RILStmt = List.front();
3529	buildMemoryAccess(Inst: Load, Stmt: RILStmt);
3530	}
3531	}
3532	buildAccessFunctions();
3533
3534	// In case the region does not have an exiting block we will later (during
3535	// code generation) split the exit block. This will move potential PHI nodes
3536	// from the current exit block into the new region exiting block. Hence, PHI
3537	// nodes that are at this point not part of the region will be.
3538	// To handle these PHI nodes later we will now model their operands as scalar
3539	// accesses. Note that we do not model anything in the exit block if we have
3540	// an exiting block in the region, as there will not be any splitting later.
3541	if (!R.isTopLevelRegion() && !scop ->hasSingleExitEdge()) {
3542	for (Instruction &Inst : *R.getExit()) {
3543	PHINode *PHI = dyn_cast<PHINode>(Val: &Inst);
3544	if (!PHI)
3545	break;
3546
3547	buildPHIAccesses(PHIStmt: nullptr, PHI, NonAffineSubRegion: nullptr, IsExitBlock: true);
3548	}
3549	}
3550
3551	// Create memory accesses for global reads since all arrays are now known.
3552	auto *AF = SE.getConstant(Ty: IntegerType::getInt64Ty(C&: SE.getContext()), V: `0`);
3553	for (auto GlobalReadPair : GlobalReads) {
3554	ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3555	Instruction *GlobalRead = GlobalReadPair.second;
3556	for (auto *BP : ArrayBasePointers)
3557	addArrayAccess(Stmt: GlobalReadStmt, MemAccInst: MemAccInst (GlobalRead), AccType: MemoryAccess::READ,
3558	BaseAddress: BP, ElementType: BP->getType(), IsAffine: false, Subscripts: {AF}, Sizes: {nullptr}, AccessValue: GlobalRead);
3559	}
3560
3561	buildInvariantEquivalenceClasses();
3562
3563	/// A map from basic blocks to their invalid domains.
3564	DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3565
3566	if (!buildDomains(R: &R, InvalidDomainMap)) {
3567	POLLY_DEBUG(
3568	dbgs() << "Bailing-out because buildDomains encountered problems\n");
3569	return;
3570	}
3571
3572	addUserAssumptions(AC, InvalidDomainMap);
3573
3574	// Initialize the invalid domain.
3575	for (ScopStmt &Stmt : scop ->Stmts)
3576	if (Stmt.isBlockStmt())
3577	Stmt.setInvalidDomain(InvalidDomainMap [Stmt.getEntryBlock()]);
3578	else
3579	Stmt.setInvalidDomain(InvalidDomainMap [getRegionNodeBasicBlock(
3580	RN: Stmt.getRegion()->getNode())]);
3581
3582	// Remove empty statements.
3583	// Exit early in case there are no executable statements left in this scop.
3584	scop ->removeStmtNotInDomainMap();
3585	scop ->simplifySCoP(AfterHoisting: false);
3586	if (scop ->isEmpty()) {
3587	POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3588	return;
3589	}
3590
3591	// The ScopStmts now have enough information to initialize themselves.
3592	for (ScopStmt &Stmt : *scop) {
3593	collectSurroundingLoops(Stmt);
3594
3595	buildDomain(Stmt);
3596	buildAccessRelations(Stmt);
3597
3598	if (DetectReductions)
3599	checkForReductions(Stmt);
3600	}
3601
3602	// Check early for a feasible runtime context.
3603	if (!scop ->hasFeasibleRuntimeContext()) {
3604	POLLY_DEBUG(
3605	dbgs() << "Bailing-out because of unfeasible context (early)\n");
3606	return;
3607	}
3608
3609	// Check early for profitability. Afterwards it cannot change anymore,
3610	// only the runtime context could become infeasible.
3611	if (!scop ->isProfitable(ScalarsAreUnprofitable: UnprofitableScalarAccs)) {
3612	scop ->invalidate(Kind: PROFITABLE, Loc: DebugLoc());
3613	POLLY_DEBUG(
3614	dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3615	return;
3616	}
3617
3618	buildSchedule();
3619
3620	finalizeAccesses();
3621
3622	scop ->realignParams();
3623	addUserContext();
3624
3625	// After the context was fully constructed, thus all our knowledge about
3626	// the parameters is in there, we add all recorded assumptions to the
3627	// assumed/invalid context.
3628	addRecordedAssumptions();
3629
3630	scop ->simplifyContexts();
3631	if (!buildAliasChecks()) {
3632	POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3633	return;
3634	}
3635
3636	hoistInvariantLoads();
3637	canonicalizeDynamicBasePtrs();
3638	verifyInvariantLoads();
3639	scop ->simplifySCoP(AfterHoisting: true);
3640
3641	// Check late for a feasible runtime context because profitability did not
3642	// change.
3643	if (!scop ->hasFeasibleRuntimeContext()) {
3644	POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3645	return;
3646	}
3647
3648	#ifndef NDEBUG
3649	verifyUses(S: scop.get(), LI, DT);
3650	#endif
3651	}
3652
3653	ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3654	const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3655	ScopDetection &SD, ScalarEvolution &SE,
3656	OptimizationRemarkEmitter &ORE)
3657	: AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3658	DebugLoc Beg, End;
3659	auto P = getBBPairForRegion(R);
3660	getDebugLocations(P, Begin&: Beg, End);
3661
3662	std::string Msg = "SCoP begins here.";
3663	ORE.emit(OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "ScopEntry", Beg, P.first)
3664	<< Msg);
3665
3666	buildScop(R&: *R, AC);
3667
3668	POLLY_DEBUG(dbgs() << *scop);
3669
3670	if (!scop ->hasFeasibleRuntimeContext()) {
3671	InfeasibleScops ++;
3672	Msg = "SCoP ends here but was dismissed.";
3673	POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3674	RecordedAssumptions.clear();
3675	scop.reset();
3676	} else {
3677	Msg = "SCoP ends here.";
3678	++ScopFound;
3679	if (scop ->getMaxLoopDepth() > `0`)
3680	++RichScopFound;
3681	}
3682
3683	if (R->isTopLevelRegion())
3684	ORE.emit(OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "ScopEnd", End, P.first)
3685	<< Msg);
3686	else
3687	ORE.emit(OptDiag&: OptimizationRemarkAnalysis (DEBUG_TYPE, "ScopEnd", End, P.second)
3688	<< Msg);
3689	}
3690

source code of polly/lib/Analysis/ScopBuilder.cpp