1//===- ScopBuilder.cpp ----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Create a polyhedral description for a static control flow region.
10//
11// The pass creates a polyhedral description of the Scops detected by the SCoP
12// detection derived from their LLVM-IR code.
13//
14//===----------------------------------------------------------------------===//
15
16#include "polly/ScopBuilder.h"
17#include "polly/Options.h"
18#include "polly/ScopDetection.h"
19#include "polly/ScopInfo.h"
20#include "polly/Support/GICHelper.h"
21#include "polly/Support/ISLTools.h"
22#include "polly/Support/SCEVValidator.h"
23#include "polly/Support/ScopHelper.h"
24#include "polly/Support/VirtualInstruction.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/EquivalenceClasses.h"
27#include "llvm/ADT/PostOrderIterator.h"
28#include "llvm/ADT/Sequence.h"
29#include "llvm/ADT/SmallSet.h"
30#include "llvm/ADT/Statistic.h"
31#include "llvm/Analysis/AliasAnalysis.h"
32#include "llvm/Analysis/AssumptionCache.h"
33#include "llvm/Analysis/Delinearization.h"
34#include "llvm/Analysis/Loads.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/OptimizationRemarkEmitter.h"
37#include "llvm/Analysis/RegionInfo.h"
38#include "llvm/Analysis/RegionIterator.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/IR/BasicBlock.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/DerivedTypes.h"
45#include "llvm/IR/Dominators.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/InstrTypes.h"
48#include "llvm/IR/Instruction.h"
49#include "llvm/IR/Instructions.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/Use.h"
52#include "llvm/IR/Value.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/raw_ostream.h"
58#include <cassert>
59
60using namespace llvm;
61using namespace polly;
62
63#include "polly/Support/PollyDebug.h"
64#define DEBUG_TYPE "polly-scops"
65
66STATISTIC(ScopFound, "Number of valid Scops");
67STATISTIC(RichScopFound, "Number of Scops containing a loop");
68STATISTIC(InfeasibleScops,
69 "Number of SCoPs with statically infeasible context.");
70
71bool polly::ModelReadOnlyScalars;
72
73// The maximal number of dimensions we allow during invariant load construction.
74// More complex access ranges will result in very high compile time and are also
75// unlikely to result in good code. This value is very high and should only
76// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
77static unsigned const MaxDimensionsInAccessRange = 9;
78
79static cl::opt<bool, true> XModelReadOnlyScalars(
80 "polly-analyze-read-only-scalars",
81 cl::desc("Model read-only scalar values in the scop description"),
82 cl::location(L&: ModelReadOnlyScalars), cl::Hidden, cl::init(Val: true),
83 cl::cat(PollyCategory));
84
85static cl::opt<int>
86 OptComputeOut("polly-analysis-computeout",
87 cl::desc("Bound the scop analysis by a maximal amount of "
88 "computational steps (0 means no bound)"),
89 cl::Hidden, cl::init(Val: 800000), cl::cat(PollyCategory));
90
91static cl::opt<bool> PollyAllowDereferenceOfAllFunctionParams(
92 "polly-allow-dereference-of-all-function-parameters",
93 cl::desc(
94 "Treat all parameters to functions that are pointers as dereferencible."
95 " This is useful for invariant load hoisting, since we can generate"
96 " less runtime checks. This is only valid if all pointers to functions"
97 " are always initialized, so that Polly can choose to hoist"
98 " their loads. "),
99 cl::Hidden, cl::init(Val: false), cl::cat(PollyCategory));
100
101static cl::opt<bool>
102 PollyIgnoreInbounds("polly-ignore-inbounds",
103 cl::desc("Do not take inbounds assumptions at all"),
104 cl::Hidden, cl::init(Val: false), cl::cat(PollyCategory));
105
106static cl::opt<unsigned> RunTimeChecksMaxArraysPerGroup(
107 "polly-rtc-max-arrays-per-group",
108 cl::desc("The maximal number of arrays to compare in each alias group."),
109 cl::Hidden, cl::init(Val: 20), cl::cat(PollyCategory));
110
111static cl::opt<unsigned> RunTimeChecksMaxAccessDisjuncts(
112 "polly-rtc-max-array-disjuncts",
113 cl::desc("The maximal number of disjunts allowed in memory accesses to "
114 "to build RTCs."),
115 cl::Hidden, cl::init(Val: 8), cl::cat(PollyCategory));
116
117static cl::opt<unsigned> RunTimeChecksMaxParameters(
118 "polly-rtc-max-parameters",
119 cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden,
120 cl::init(Val: 8), cl::cat(PollyCategory));
121
122static cl::opt<bool> UnprofitableScalarAccs(
123 "polly-unprofitable-scalar-accs",
124 cl::desc("Count statements with scalar accesses as not optimizable"),
125 cl::Hidden, cl::init(Val: false), cl::cat(PollyCategory));
126
127static cl::opt<std::string> UserContextStr(
128 "polly-context", cl::value_desc("isl parameter set"),
129 cl::desc("Provide additional constraints on the context parameters"),
130 cl::init(Val: ""), cl::cat(PollyCategory));
131
132static cl::opt<bool> DetectReductions("polly-detect-reductions",
133 cl::desc("Detect and exploit reductions"),
134 cl::Hidden, cl::init(Val: true),
135 cl::cat(PollyCategory));
136
137// Multiplicative reductions can be disabled separately as these kind of
138// operations can overflow easily. Additive reductions and bit operations
139// are in contrast pretty stable.
140static cl::opt<bool> DisableMultiplicativeReductions(
141 "polly-disable-multiplicative-reductions",
142 cl::desc("Disable multiplicative reductions"), cl::Hidden,
143 cl::cat(PollyCategory));
144
145enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores };
146
147static cl::opt<GranularityChoice> StmtGranularity(
148 "polly-stmt-granularity",
149 cl::desc(
150 "Algorithm to use for splitting basic blocks into multiple statements"),
151 cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
152 "One statement per basic block"),
153 clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep",
154 "Scalar independence heuristic"),
155 clEnumValN(GranularityChoice::Stores, "store",
156 "Store-level granularity")),
157 cl::init(Val: GranularityChoice::ScalarIndependence), cl::cat(PollyCategory));
158
159/// Helper to treat non-affine regions and basic blocks the same.
160///
161///{
162
163/// Return the block that is the representing block for @p RN.
164static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) {
165 return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry()
166 : RN->getNodeAs<BasicBlock>();
167}
168
169/// Return the @p idx'th block that is executed after @p RN.
170static inline BasicBlock *
171getRegionNodeSuccessor(RegionNode *RN, Instruction *TI, unsigned idx) {
172 if (RN->isSubRegion()) {
173 assert(idx == 0);
174 return RN->getNodeAs<Region>()->getExit();
175 }
176 return TI->getSuccessor(Idx: idx);
177}
178
179static bool containsErrorBlock(RegionNode *RN, const Region &R,
180 ScopDetection *SD) {
181 if (!RN->isSubRegion())
182 return SD->isErrorBlock(BB&: *RN->getNodeAs<BasicBlock>(), R);
183 for (BasicBlock *BB : RN->getNodeAs<Region>()->blocks())
184 if (SD->isErrorBlock(BB&: *BB, R))
185 return true;
186 return false;
187}
188
189///}
190
191/// Create a map to map from a given iteration to a subsequent iteration.
192///
193/// This map maps from SetSpace -> SetSpace where the dimensions @p Dim
194/// is incremented by one and all other dimensions are equal, e.g.,
195/// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3]
196///
197/// if @p Dim is 2 and @p SetSpace has 4 dimensions.
198static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) {
199 isl::space MapSpace = SetSpace.map_from_set();
200 isl::map NextIterationMap = isl::map::universe(space: MapSpace);
201 for (unsigned u : rangeIslSize(Begin: 0, End: NextIterationMap.domain_tuple_dim()))
202 if (u != Dim)
203 NextIterationMap =
204 NextIterationMap.equate(type1: isl::dim::in, pos1: u, type2: isl::dim::out, pos2: u);
205 isl::constraint C =
206 isl::constraint::alloc_equality(ls: isl::local_space(MapSpace));
207 C = C.set_constant_si(1);
208 C = C.set_coefficient_si(type: isl::dim::in, pos: Dim, v: 1);
209 C = C.set_coefficient_si(type: isl::dim::out, pos: Dim, v: -1);
210 NextIterationMap = NextIterationMap.add_constraint(constraint: C);
211 return NextIterationMap;
212}
213
214/// Add @p BSet to set @p BoundedParts if @p BSet is bounded.
215static isl::set collectBoundedParts(isl::set S) {
216 isl::set BoundedParts = isl::set::empty(space: S.get_space());
217 for (isl::basic_set BSet : S.get_basic_set_list())
218 if (BSet.is_bounded())
219 BoundedParts = BoundedParts.unite(set2: isl::set(BSet));
220 return BoundedParts;
221}
222
223/// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim.
224///
225/// @returns A separation of @p S into first an unbounded then a bounded subset,
226/// both with regards to the dimension @p Dim.
227static std::pair<isl::set, isl::set> partitionSetParts(isl::set S,
228 unsigned Dim) {
229 for (unsigned u : rangeIslSize(Begin: 0, End: S.tuple_dim()))
230 S = S.lower_bound_si(type: isl::dim::set, pos: u, value: 0);
231
232 unsigned NumDimsS = unsignedFromIslSize(Size: S.tuple_dim());
233 isl::set OnlyDimS = S;
234
235 // Remove dimensions that are greater than Dim as they are not interesting.
236 assert(NumDimsS >= Dim + 1);
237 OnlyDimS = OnlyDimS.project_out(type: isl::dim::set, first: Dim + 1, n: NumDimsS - Dim - 1);
238
239 // Create artificial parametric upper bounds for dimensions smaller than Dim
240 // as we are not interested in them.
241 OnlyDimS = OnlyDimS.insert_dims(type: isl::dim::param, pos: 0, n: Dim);
242
243 for (unsigned u = 0; u < Dim; u++) {
244 isl::constraint C = isl::constraint::alloc_inequality(
245 ls: isl::local_space(OnlyDimS.get_space()));
246 C = C.set_coefficient_si(type: isl::dim::param, pos: u, v: 1);
247 C = C.set_coefficient_si(type: isl::dim::set, pos: u, v: -1);
248 OnlyDimS = OnlyDimS.add_constraint(constraint: C);
249 }
250
251 // Collect all bounded parts of OnlyDimS.
252 isl::set BoundedParts = collectBoundedParts(S: OnlyDimS);
253
254 // Create the dimensions greater than Dim again.
255 BoundedParts =
256 BoundedParts.insert_dims(type: isl::dim::set, pos: Dim + 1, n: NumDimsS - Dim - 1);
257
258 // Remove the artificial upper bound parameters again.
259 BoundedParts = BoundedParts.remove_dims(type: isl::dim::param, first: 0, n: Dim);
260
261 isl::set UnboundedParts = S.subtract(set2: BoundedParts);
262 return std::make_pair(x&: UnboundedParts, y&: BoundedParts);
263}
264
265/// Create the conditions under which @p L @p Pred @p R is true.
266static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L,
267 isl::pw_aff R) {
268 switch (Pred) {
269 case ICmpInst::ICMP_EQ:
270 return L.eq_set(pwaff2: R);
271 case ICmpInst::ICMP_NE:
272 return L.ne_set(pwaff2: R);
273 case ICmpInst::ICMP_SLT:
274 return L.lt_set(pwaff2: R);
275 case ICmpInst::ICMP_SLE:
276 return L.le_set(pwaff2: R);
277 case ICmpInst::ICMP_SGT:
278 return L.gt_set(pwaff2: R);
279 case ICmpInst::ICMP_SGE:
280 return L.ge_set(pwaff2: R);
281 case ICmpInst::ICMP_ULT:
282 return L.lt_set(pwaff2: R);
283 case ICmpInst::ICMP_UGT:
284 return L.gt_set(pwaff2: R);
285 case ICmpInst::ICMP_ULE:
286 return L.le_set(pwaff2: R);
287 case ICmpInst::ICMP_UGE:
288 return L.ge_set(pwaff2: R);
289 default:
290 llvm_unreachable("Non integer predicate not supported");
291 }
292}
293
294isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL,
295 Loop *NewL) {
296 // If the loops are the same there is nothing to do.
297 if (NewL == OldL)
298 return Dom;
299
300 int OldDepth = scop->getRelativeLoopDepth(L: OldL);
301 int NewDepth = scop->getRelativeLoopDepth(L: NewL);
302 // If both loops are non-affine loops there is nothing to do.
303 if (OldDepth == -1 && NewDepth == -1)
304 return Dom;
305
306 // Distinguish three cases:
307 // 1) The depth is the same but the loops are not.
308 // => One loop was left one was entered.
309 // 2) The depth increased from OldL to NewL.
310 // => One loop was entered, none was left.
311 // 3) The depth decreased from OldL to NewL.
312 // => Loops were left were difference of the depths defines how many.
313 if (OldDepth == NewDepth) {
314 assert(OldL->getParentLoop() == NewL->getParentLoop());
315 Dom = Dom.project_out(type: isl::dim::set, first: NewDepth, n: 1);
316 Dom = Dom.add_dims(type: isl::dim::set, n: 1);
317 } else if (OldDepth < NewDepth) {
318 assert(OldDepth + 1 == NewDepth);
319 auto &R = scop->getRegion();
320 (void)R;
321 assert(NewL->getParentLoop() == OldL ||
322 ((!OldL || !R.contains(OldL)) && R.contains(NewL)));
323 Dom = Dom.add_dims(type: isl::dim::set, n: 1);
324 } else {
325 assert(OldDepth > NewDepth);
326 unsigned Diff = OldDepth - NewDepth;
327 unsigned NumDim = unsignedFromIslSize(Size: Dom.tuple_dim());
328 assert(NumDim >= Diff);
329 Dom = Dom.project_out(type: isl::dim::set, first: NumDim - Diff, n: Diff);
330 }
331
332 return Dom;
333}
334
335/// Compute the isl representation for the SCEV @p E in this BB.
336///
337/// @param BB The BB for which isl representation is to be
338/// computed.
339/// @param InvalidDomainMap A map of BB to their invalid domains.
340/// @param E The SCEV that should be translated.
341/// @param NonNegative Flag to indicate the @p E has to be non-negative.
342///
343/// Note that this function will also adjust the invalid context accordingly.
344
345__isl_give isl_pw_aff *
346ScopBuilder::getPwAff(BasicBlock *BB,
347 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
348 const SCEV *E, bool NonNegative) {
349 PWACtx PWAC = scop->getPwAff(E, BB, NonNegative, RecordedAssumptions: &RecordedAssumptions);
350 InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(set2: PWAC.second);
351 return PWAC.first.release();
352}
353
354/// Build condition sets for unsigned ICmpInst(s).
355/// Special handling is required for unsigned operands to ensure that if
356/// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst
357/// it should wrap around.
358///
359/// @param IsStrictUpperBound holds information on the predicate relation
360/// between TestVal and UpperBound, i.e,
361/// TestVal < UpperBound OR TestVal <= UpperBound
362__isl_give isl_set *ScopBuilder::buildUnsignedConditionSets(
363 BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain,
364 const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound,
365 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
366 bool IsStrictUpperBound) {
367 // Do not take NonNeg assumption on TestVal
368 // as it might have MSB (Sign bit) set.
369 isl_pw_aff *TestVal = getPwAff(BB, InvalidDomainMap, E: SCEV_TestVal, NonNegative: false);
370 // Take NonNeg assumption on UpperBound.
371 isl_pw_aff *UpperBound =
372 getPwAff(BB, InvalidDomainMap, E: SCEV_UpperBound, NonNegative: true);
373
374 // 0 <= TestVal
375 isl_set *First =
376 isl_pw_aff_le_set(pwaff1: isl_pw_aff_zero_on_domain(ls: isl_local_space_from_space(
377 space: isl_pw_aff_get_domain_space(pwaff: TestVal))),
378 pwaff2: isl_pw_aff_copy(pwaff: TestVal));
379
380 isl_set *Second;
381 if (IsStrictUpperBound)
382 // TestVal < UpperBound
383 Second = isl_pw_aff_lt_set(pwaff1: TestVal, pwaff2: UpperBound);
384 else
385 // TestVal <= UpperBound
386 Second = isl_pw_aff_le_set(pwaff1: TestVal, pwaff2: UpperBound);
387
388 isl_set *ConsequenceCondSet = isl_set_intersect(set1: First, set2: Second);
389 return ConsequenceCondSet;
390}
391
392bool ScopBuilder::buildConditionSets(
393 BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain,
394 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
395 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
396 Value *Condition = getConditionFromTerminator(TI: SI);
397 assert(Condition && "No condition for switch");
398
399 isl_pw_aff *LHS, *RHS;
400 LHS = getPwAff(BB, InvalidDomainMap, E: SE.getSCEVAtScope(V: Condition, L));
401
402 unsigned NumSuccessors = SI->getNumSuccessors();
403 ConditionSets.resize(N: NumSuccessors);
404 for (auto &Case : SI->cases()) {
405 unsigned Idx = Case.getSuccessorIndex();
406 ConstantInt *CaseValue = Case.getCaseValue();
407
408 RHS = getPwAff(BB, InvalidDomainMap, E: SE.getSCEV(V: CaseValue));
409 isl_set *CaseConditionSet =
410 buildConditionSet(Pred: ICmpInst::ICMP_EQ, L: isl::manage_copy(ptr: LHS),
411 R: isl::manage(ptr: RHS))
412 .release();
413 ConditionSets[Idx] = isl_set_coalesce(
414 set: isl_set_intersect(set1: CaseConditionSet, set2: isl_set_copy(set: Domain)));
415 }
416
417 assert(ConditionSets[0] == nullptr && "Default condition set was set");
418 isl_set *ConditionSetUnion = isl_set_copy(set: ConditionSets[1]);
419 for (unsigned u = 2; u < NumSuccessors; u++)
420 ConditionSetUnion =
421 isl_set_union(set1: ConditionSetUnion, set2: isl_set_copy(set: ConditionSets[u]));
422 ConditionSets[0] = isl_set_subtract(set1: isl_set_copy(set: Domain), set2: ConditionSetUnion);
423
424 isl_pw_aff_free(pwaff: LHS);
425
426 return true;
427}
428
429bool ScopBuilder::buildConditionSets(
430 BasicBlock *BB, Value *Condition, Instruction *TI, Loop *L,
431 __isl_keep isl_set *Domain,
432 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
433 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
434 isl_set *ConsequenceCondSet = nullptr;
435
436 if (auto Load = dyn_cast<LoadInst>(Val: Condition)) {
437 const SCEV *LHSSCEV = SE.getSCEVAtScope(V: Load, L);
438 const SCEV *RHSSCEV = SE.getZero(Ty: LHSSCEV->getType());
439 bool NonNeg = false;
440 isl_pw_aff *LHS = getPwAff(BB, InvalidDomainMap, E: LHSSCEV, NonNegative: NonNeg);
441 isl_pw_aff *RHS = getPwAff(BB, InvalidDomainMap, E: RHSSCEV, NonNegative: NonNeg);
442 ConsequenceCondSet = buildConditionSet(Pred: ICmpInst::ICMP_SLE, L: isl::manage(ptr: LHS),
443 R: isl::manage(ptr: RHS))
444 .release();
445 } else if (auto *PHI = dyn_cast<PHINode>(Val: Condition)) {
446 auto *Unique = dyn_cast<ConstantInt>(
447 Val: getUniqueNonErrorValue(PHI, R: &scop->getRegion(), SD: &SD));
448 assert(Unique &&
449 "A PHINode condition should only be accepted by ScopDetection if "
450 "getUniqueNonErrorValue returns non-NULL");
451
452 if (Unique->isZero())
453 ConsequenceCondSet = isl_set_empty(space: isl_set_get_space(set: Domain));
454 else
455 ConsequenceCondSet = isl_set_universe(space: isl_set_get_space(set: Domain));
456 } else if (auto *CCond = dyn_cast<ConstantInt>(Val: Condition)) {
457 if (CCond->isZero())
458 ConsequenceCondSet = isl_set_empty(space: isl_set_get_space(set: Domain));
459 else
460 ConsequenceCondSet = isl_set_universe(space: isl_set_get_space(set: Domain));
461 } else if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: Condition)) {
462 auto Opcode = BinOp->getOpcode();
463 assert(Opcode == Instruction::And || Opcode == Instruction::Or);
464
465 bool Valid = buildConditionSets(BB, Condition: BinOp->getOperand(i_nocapture: 0), TI, L, Domain,
466 InvalidDomainMap, ConditionSets) &&
467 buildConditionSets(BB, Condition: BinOp->getOperand(i_nocapture: 1), TI, L, Domain,
468 InvalidDomainMap, ConditionSets);
469 if (!Valid) {
470 while (!ConditionSets.empty())
471 isl_set_free(set: ConditionSets.pop_back_val());
472 return false;
473 }
474
475 isl_set_free(set: ConditionSets.pop_back_val());
476 isl_set *ConsCondPart0 = ConditionSets.pop_back_val();
477 isl_set_free(set: ConditionSets.pop_back_val());
478 isl_set *ConsCondPart1 = ConditionSets.pop_back_val();
479
480 if (Opcode == Instruction::And)
481 ConsequenceCondSet = isl_set_intersect(set1: ConsCondPart0, set2: ConsCondPart1);
482 else
483 ConsequenceCondSet = isl_set_union(set1: ConsCondPart0, set2: ConsCondPart1);
484 } else {
485 auto *ICond = dyn_cast<ICmpInst>(Val: Condition);
486 assert(ICond &&
487 "Condition of exiting branch was neither constant nor ICmp!");
488
489 Region &R = scop->getRegion();
490
491 isl_pw_aff *LHS, *RHS;
492 // For unsigned comparisons we assumed the signed bit of neither operand
493 // to be set. The comparison is equal to a signed comparison under this
494 // assumption.
495 bool NonNeg = ICond->isUnsigned();
496 const SCEV *LeftOperand = SE.getSCEVAtScope(V: ICond->getOperand(i_nocapture: 0), L),
497 *RightOperand = SE.getSCEVAtScope(V: ICond->getOperand(i_nocapture: 1), L);
498
499 LeftOperand = tryForwardThroughPHI(Expr: LeftOperand, R, SE, SD: &SD);
500 RightOperand = tryForwardThroughPHI(Expr: RightOperand, R, SE, SD: &SD);
501
502 switch (ICond->getPredicate()) {
503 case ICmpInst::ICMP_ULT:
504 ConsequenceCondSet =
505 buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: LeftOperand,
506 SCEV_UpperBound: RightOperand, InvalidDomainMap, IsStrictUpperBound: true);
507 break;
508 case ICmpInst::ICMP_ULE:
509 ConsequenceCondSet =
510 buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: LeftOperand,
511 SCEV_UpperBound: RightOperand, InvalidDomainMap, IsStrictUpperBound: false);
512 break;
513 case ICmpInst::ICMP_UGT:
514 ConsequenceCondSet =
515 buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: RightOperand,
516 SCEV_UpperBound: LeftOperand, InvalidDomainMap, IsStrictUpperBound: true);
517 break;
518 case ICmpInst::ICMP_UGE:
519 ConsequenceCondSet =
520 buildUnsignedConditionSets(BB, Condition, Domain, SCEV_TestVal: RightOperand,
521 SCEV_UpperBound: LeftOperand, InvalidDomainMap, IsStrictUpperBound: false);
522 break;
523 default:
524 LHS = getPwAff(BB, InvalidDomainMap, E: LeftOperand, NonNegative: NonNeg);
525 RHS = getPwAff(BB, InvalidDomainMap, E: RightOperand, NonNegative: NonNeg);
526 ConsequenceCondSet = buildConditionSet(Pred: ICond->getPredicate(),
527 L: isl::manage(ptr: LHS), R: isl::manage(ptr: RHS))
528 .release();
529 break;
530 }
531 }
532
533 // If no terminator was given we are only looking for parameter constraints
534 // under which @p Condition is true/false.
535 if (!TI)
536 ConsequenceCondSet = isl_set_params(set: ConsequenceCondSet);
537 assert(ConsequenceCondSet);
538 ConsequenceCondSet = isl_set_coalesce(
539 set: isl_set_intersect(set1: ConsequenceCondSet, set2: isl_set_copy(set: Domain)));
540
541 isl_set *AlternativeCondSet = nullptr;
542 bool TooComplex =
543 isl_set_n_basic_set(set: ConsequenceCondSet) >= (int)MaxDisjunctsInDomain;
544
545 if (!TooComplex) {
546 AlternativeCondSet = isl_set_subtract(set1: isl_set_copy(set: Domain),
547 set2: isl_set_copy(set: ConsequenceCondSet));
548 TooComplex =
549 isl_set_n_basic_set(set: AlternativeCondSet) >= (int)MaxDisjunctsInDomain;
550 }
551
552 if (TooComplex) {
553 scop->invalidate(Kind: COMPLEXITY, Loc: TI ? TI->getDebugLoc() : DebugLoc(),
554 BB: TI ? TI->getParent() : nullptr /* BasicBlock */);
555 isl_set_free(set: AlternativeCondSet);
556 isl_set_free(set: ConsequenceCondSet);
557 return false;
558 }
559
560 ConditionSets.push_back(Elt: ConsequenceCondSet);
561 ConditionSets.push_back(Elt: isl_set_coalesce(set: AlternativeCondSet));
562
563 return true;
564}
565
566bool ScopBuilder::buildConditionSets(
567 BasicBlock *BB, Instruction *TI, Loop *L, __isl_keep isl_set *Domain,
568 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap,
569 SmallVectorImpl<__isl_give isl_set *> &ConditionSets) {
570 if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI))
571 return buildConditionSets(BB, SI, L, Domain, InvalidDomainMap,
572 ConditionSets);
573
574 assert(isa<BranchInst>(TI) && "Terminator was neither branch nor switch.");
575
576 if (TI->getNumSuccessors() == 1) {
577 ConditionSets.push_back(Elt: isl_set_copy(set: Domain));
578 return true;
579 }
580
581 Value *Condition = getConditionFromTerminator(TI);
582 assert(Condition && "No condition for Terminator");
583
584 return buildConditionSets(BB, Condition, TI, L, Domain, InvalidDomainMap,
585 ConditionSets);
586}
587
588bool ScopBuilder::propagateDomainConstraints(
589 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
590 // Iterate over the region R and propagate the domain constrains from the
591 // predecessors to the current node. In contrast to the
592 // buildDomainsWithBranchConstraints function, this one will pull the domain
593 // information from the predecessors instead of pushing it to the successors.
594 // Additionally, we assume the domains to be already present in the domain
595 // map here. However, we iterate again in reverse post order so we know all
596 // predecessors have been visited before a block or non-affine subregion is
597 // visited.
598
599 ReversePostOrderTraversal<Region *> RTraversal(R);
600 for (auto *RN : RTraversal) {
601 // Recurse for affine subregions but go on for basic blocks and non-affine
602 // subregions.
603 if (RN->isSubRegion()) {
604 Region *SubRegion = RN->getNodeAs<Region>();
605 if (!scop->isNonAffineSubRegion(R: SubRegion)) {
606 if (!propagateDomainConstraints(R: SubRegion, InvalidDomainMap))
607 return false;
608 continue;
609 }
610 }
611
612 BasicBlock *BB = getRegionNodeBasicBlock(RN);
613 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
614 assert(!Domain.is_null());
615
616 // Under the union of all predecessor conditions we can reach this block.
617 isl::set PredDom = getPredecessorDomainConstraints(BB, Domain);
618 Domain = Domain.intersect(set2: PredDom).coalesce();
619 Domain = Domain.align_params(model: scop->getParamSpace());
620
621 Loop *BBLoop = getRegionNodeLoop(RN, LI);
622 if (BBLoop && BBLoop->getHeader() == BB && scop->contains(L: BBLoop))
623 if (!addLoopBoundsToHeaderDomain(L: BBLoop, InvalidDomainMap))
624 return false;
625 }
626
627 return true;
628}
629
630void ScopBuilder::propagateDomainConstraintsToRegionExit(
631 BasicBlock *BB, Loop *BBLoop,
632 SmallPtrSetImpl<BasicBlock *> &FinishedExitBlocks,
633 DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
634 // Check if the block @p BB is the entry of a region. If so we propagate it's
635 // domain to the exit block of the region. Otherwise we are done.
636 auto *RI = scop->getRegion().getRegionInfo();
637 auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr;
638 auto *ExitBB = BBReg ? BBReg->getExit() : nullptr;
639 if (!BBReg || BBReg->getEntry() != BB || !scop->contains(BB: ExitBB))
640 return;
641
642 // Do not propagate the domain if there is a loop backedge inside the region
643 // that would prevent the exit block from being executed.
644 auto *L = BBLoop;
645 while (L && scop->contains(L)) {
646 SmallVector<BasicBlock *, 4> LatchBBs;
647 BBLoop->getLoopLatches(LoopLatches&: LatchBBs);
648 for (auto *LatchBB : LatchBBs)
649 if (BB != LatchBB && BBReg->contains(BB: LatchBB))
650 return;
651 L = L->getParentLoop();
652 }
653
654 isl::set Domain = scop->getOrInitEmptyDomain(BB);
655 assert(!Domain.is_null() && "Cannot propagate a nullptr");
656
657 Loop *ExitBBLoop = getFirstNonBoxedLoopFor(BB: ExitBB, LI, BoxedLoops: scop->getBoxedLoops());
658
659 // Since the dimensions of @p BB and @p ExitBB might be different we have to
660 // adjust the domain before we can propagate it.
661 isl::set AdjustedDomain = adjustDomainDimensions(Dom: Domain, OldL: BBLoop, NewL: ExitBBLoop);
662 isl::set &ExitDomain = scop->getOrInitEmptyDomain(BB: ExitBB);
663
664 // If the exit domain is not yet created we set it otherwise we "add" the
665 // current domain.
666 ExitDomain =
667 !ExitDomain.is_null() ? AdjustedDomain.unite(set2: ExitDomain) : AdjustedDomain;
668
669 // Initialize the invalid domain.
670 InvalidDomainMap[ExitBB] = ExitDomain.empty(space: ExitDomain.get_space());
671
672 FinishedExitBlocks.insert(Ptr: ExitBB);
673}
674
675isl::set ScopBuilder::getPredecessorDomainConstraints(BasicBlock *BB,
676 isl::set Domain) {
677 // If @p BB is the ScopEntry we are done
678 if (scop->getRegion().getEntry() == BB)
679 return isl::set::universe(space: Domain.get_space());
680
681 // The region info of this function.
682 auto &RI = *scop->getRegion().getRegionInfo();
683
684 Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, BoxedLoops: scop->getBoxedLoops());
685
686 // A domain to collect all predecessor domains, thus all conditions under
687 // which the block is executed. To this end we start with the empty domain.
688 isl::set PredDom = isl::set::empty(space: Domain.get_space());
689
690 // Set of regions of which the entry block domain has been propagated to BB.
691 // all predecessors inside any of the regions can be skipped.
692 SmallSet<Region *, 8> PropagatedRegions;
693
694 for (auto *PredBB : predecessors(BB)) {
695 // Skip backedges.
696 if (DT.dominates(A: BB, B: PredBB))
697 continue;
698
699 // If the predecessor is in a region we used for propagation we can skip it.
700 auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(BB: PredBB); };
701 if (llvm::any_of(Range&: PropagatedRegions, P: PredBBInRegion)) {
702 continue;
703 }
704
705 // Check if there is a valid region we can use for propagation, thus look
706 // for a region that contains the predecessor and has @p BB as exit block.
707 // FIXME: This was an side-effect-free (and possibly infinite) loop when
708 // committed and seems not to be needed.
709 auto *PredR = RI.getRegionFor(BB: PredBB);
710 while (PredR->getExit() != BB && !PredR->contains(BB))
711 PredR = PredR->getParent();
712
713 // If a valid region for propagation was found use the entry of that region
714 // for propagation, otherwise the PredBB directly.
715 if (PredR->getExit() == BB) {
716 PredBB = PredR->getEntry();
717 PropagatedRegions.insert(Ptr: PredR);
718 }
719
720 isl::set PredBBDom = scop->getDomainConditions(BB: PredBB);
721 Loop *PredBBLoop =
722 getFirstNonBoxedLoopFor(BB: PredBB, LI, BoxedLoops: scop->getBoxedLoops());
723 PredBBDom = adjustDomainDimensions(Dom: PredBBDom, OldL: PredBBLoop, NewL: BBLoop);
724 PredDom = PredDom.unite(set2: PredBBDom);
725 }
726
727 return PredDom;
728}
729
730bool ScopBuilder::addLoopBoundsToHeaderDomain(
731 Loop *L, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
732 int LoopDepth = scop->getRelativeLoopDepth(L);
733 assert(LoopDepth >= 0 && "Loop in region should have at least depth one");
734
735 BasicBlock *HeaderBB = L->getHeader();
736 assert(scop->isDomainDefined(HeaderBB));
737 isl::set &HeaderBBDom = scop->getOrInitEmptyDomain(BB: HeaderBB);
738
739 isl::map NextIterationMap =
740 createNextIterationMap(SetSpace: HeaderBBDom.get_space(), Dim: LoopDepth);
741
742 isl::set UnionBackedgeCondition = HeaderBBDom.empty(space: HeaderBBDom.get_space());
743
744 SmallVector<BasicBlock *, 4> LatchBlocks;
745 L->getLoopLatches(LoopLatches&: LatchBlocks);
746
747 for (BasicBlock *LatchBB : LatchBlocks) {
748 // If the latch is only reachable via error statements we skip it.
749 if (!scop->isDomainDefined(BB: LatchBB))
750 continue;
751
752 isl::set LatchBBDom = scop->getDomainConditions(BB: LatchBB);
753
754 isl::set BackedgeCondition;
755
756 Instruction *TI = LatchBB->getTerminator();
757 BranchInst *BI = dyn_cast<BranchInst>(Val: TI);
758 assert(BI && "Only branch instructions allowed in loop latches");
759
760 if (BI->isUnconditional())
761 BackedgeCondition = LatchBBDom;
762 else {
763 SmallVector<isl_set *, 8> ConditionSets;
764 int idx = BI->getSuccessor(i: 0) != HeaderBB;
765 if (!buildConditionSets(BB: LatchBB, TI, L, Domain: LatchBBDom.get(),
766 InvalidDomainMap, ConditionSets))
767 return false;
768
769 // Free the non back edge condition set as we do not need it.
770 isl_set_free(set: ConditionSets[1 - idx]);
771
772 BackedgeCondition = isl::manage(ptr: ConditionSets[idx]);
773 }
774
775 int LatchLoopDepth = scop->getRelativeLoopDepth(L: LI.getLoopFor(BB: LatchBB));
776 assert(LatchLoopDepth >= LoopDepth);
777 BackedgeCondition = BackedgeCondition.project_out(
778 type: isl::dim::set, first: LoopDepth + 1, n: LatchLoopDepth - LoopDepth);
779 UnionBackedgeCondition = UnionBackedgeCondition.unite(set2: BackedgeCondition);
780 }
781
782 isl::map ForwardMap = ForwardMap.lex_le(set_space: HeaderBBDom.get_space());
783 for (int i = 0; i < LoopDepth; i++)
784 ForwardMap = ForwardMap.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
785
786 isl::set UnionBackedgeConditionComplement =
787 UnionBackedgeCondition.complement();
788 UnionBackedgeConditionComplement =
789 UnionBackedgeConditionComplement.lower_bound_si(type: isl::dim::set, pos: LoopDepth,
790 value: 0);
791 UnionBackedgeConditionComplement =
792 UnionBackedgeConditionComplement.apply(map: ForwardMap);
793 HeaderBBDom = HeaderBBDom.subtract(set2: UnionBackedgeConditionComplement);
794 HeaderBBDom = HeaderBBDom.apply(map: NextIterationMap);
795
796 auto Parts = partitionSetParts(S: HeaderBBDom, Dim: LoopDepth);
797 HeaderBBDom = Parts.second;
798
799 // Check if there is a <nsw> tagged AddRec for this loop and if so do not
800 // require a runtime check. The assumption is already implied by the <nsw>
801 // tag.
802 bool RequiresRTC = !scop->hasNSWAddRecForLoop(L);
803
804 isl::set UnboundedCtx = Parts.first.params();
805 recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: INFINITELOOP, Set: UnboundedCtx,
806 Loc: HeaderBB->getTerminator()->getDebugLoc(), Sign: AS_RESTRICTION,
807 BB: nullptr, RTC: RequiresRTC);
808 return true;
809}
810
811void ScopBuilder::buildInvariantEquivalenceClasses() {
812 DenseMap<std::pair<const SCEV *, Type *>, LoadInst *> EquivClasses;
813
814 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
815 for (LoadInst *LInst : RIL) {
816 const SCEV *PointerSCEV = SE.getSCEV(V: LInst->getPointerOperand());
817
818 Type *Ty = LInst->getType();
819 LoadInst *&ClassRep = EquivClasses[std::make_pair(x&: PointerSCEV, y&: Ty)];
820 if (ClassRep) {
821 scop->addInvariantLoadMapping(LoadInst: LInst, ClassRep);
822 continue;
823 }
824
825 ClassRep = LInst;
826 scop->addInvariantEquivClass(
827 InvariantEquivClass: InvariantEquivClassTy{.IdentifyingPointer: PointerSCEV, .InvariantAccesses: MemoryAccessList(), .ExecutionContext: {}, .AccessType: Ty});
828 }
829}
830
831bool ScopBuilder::buildDomains(
832 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
833 bool IsOnlyNonAffineRegion = scop->isNonAffineSubRegion(R);
834 auto *EntryBB = R->getEntry();
835 auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(BB: EntryBB);
836 int LD = scop->getRelativeLoopDepth(L);
837 auto *S =
838 isl_set_universe(space: isl_space_set_alloc(ctx: scop->getIslCtx().get(), nparam: 0, dim: LD + 1));
839
840 InvalidDomainMap[EntryBB] = isl::manage(ptr: isl_set_empty(space: isl_set_get_space(set: S)));
841 isl::set Domain = isl::manage(ptr: S);
842 scop->setDomain(BB: EntryBB, Domain);
843
844 if (IsOnlyNonAffineRegion)
845 return !containsErrorBlock(RN: R->getNode(), R: *R, SD: &SD);
846
847 if (!buildDomainsWithBranchConstraints(R, InvalidDomainMap))
848 return false;
849
850 if (!propagateDomainConstraints(R, InvalidDomainMap))
851 return false;
852
853 // Error blocks and blocks dominated by them have been assumed to never be
854 // executed. Representing them in the Scop does not add any value. In fact,
855 // it is likely to cause issues during construction of the ScopStmts. The
856 // contents of error blocks have not been verified to be expressible and
857 // will cause problems when building up a ScopStmt for them.
858 // Furthermore, basic blocks dominated by error blocks may reference
859 // instructions in the error block which, if the error block is not modeled,
860 // can themselves not be constructed properly. To this end we will replace
861 // the domains of error blocks and those only reachable via error blocks
862 // with an empty set. Additionally, we will record for each block under which
863 // parameter combination it would be reached via an error block in its
864 // InvalidDomain. This information is needed during load hoisting.
865 if (!propagateInvalidStmtDomains(R, InvalidDomainMap))
866 return false;
867
868 return true;
869}
870
871bool ScopBuilder::buildDomainsWithBranchConstraints(
872 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
873 // To create the domain for each block in R we iterate over all blocks and
874 // subregions in R and propagate the conditions under which the current region
875 // element is executed. To this end we iterate in reverse post order over R as
876 // it ensures that we first visit all predecessors of a region node (either a
877 // basic block or a subregion) before we visit the region node itself.
878 // Initially, only the domain for the SCoP region entry block is set and from
879 // there we propagate the current domain to all successors, however we add the
880 // condition that the successor is actually executed next.
881 // As we are only interested in non-loop carried constraints here we can
882 // simply skip loop back edges.
883
884 SmallPtrSet<BasicBlock *, 8> FinishedExitBlocks;
885 ReversePostOrderTraversal<Region *> RTraversal(R);
886 for (auto *RN : RTraversal) {
887 // Recurse for affine subregions but go on for basic blocks and non-affine
888 // subregions.
889 if (RN->isSubRegion()) {
890 Region *SubRegion = RN->getNodeAs<Region>();
891 if (!scop->isNonAffineSubRegion(R: SubRegion)) {
892 if (!buildDomainsWithBranchConstraints(R: SubRegion, InvalidDomainMap))
893 return false;
894 continue;
895 }
896 }
897
898 if (containsErrorBlock(RN, R: scop->getRegion(), SD: &SD))
899 scop->notifyErrorBlock();
900 ;
901
902 BasicBlock *BB = getRegionNodeBasicBlock(RN);
903 Instruction *TI = BB->getTerminator();
904
905 if (isa<UnreachableInst>(Val: TI))
906 continue;
907
908 if (!scop->isDomainDefined(BB))
909 continue;
910 isl::set Domain = scop->getDomainConditions(BB);
911
912 scop->updateMaxLoopDepth(Depth: unsignedFromIslSize(Size: Domain.tuple_dim()));
913
914 auto *BBLoop = getRegionNodeLoop(RN, LI);
915 // Propagate the domain from BB directly to blocks that have a superset
916 // domain, at the moment only region exit nodes of regions that start in BB.
917 propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks,
918 InvalidDomainMap);
919
920 // If all successors of BB have been set a domain through the propagation
921 // above we do not need to build condition sets but can just skip this
922 // block. However, it is important to note that this is a local property
923 // with regards to the region @p R. To this end FinishedExitBlocks is a
924 // local variable.
925 auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) {
926 return FinishedExitBlocks.count(Ptr: SuccBB);
927 };
928 if (std::all_of(first: succ_begin(BB), last: succ_end(BB), pred: IsFinishedRegionExit))
929 continue;
930
931 // Build the condition sets for the successor nodes of the current region
932 // node. If it is a non-affine subregion we will always execute the single
933 // exit node, hence the single entry node domain is the condition set. For
934 // basic blocks we use the helper function buildConditionSets.
935 SmallVector<isl_set *, 8> ConditionSets;
936 if (RN->isSubRegion())
937 ConditionSets.push_back(Elt: Domain.copy());
938 else if (!buildConditionSets(BB, TI, L: BBLoop, Domain: Domain.get(), InvalidDomainMap,
939 ConditionSets))
940 return false;
941
942 // Now iterate over the successors and set their initial domain based on
943 // their condition set. We skip back edges here and have to be careful when
944 // we leave a loop not to keep constraints over a dimension that doesn't
945 // exist anymore.
946 assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size());
947 for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) {
948 isl::set CondSet = isl::manage(ptr: ConditionSets[u]);
949 BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, idx: u);
950
951 // Skip blocks outside the region.
952 if (!scop->contains(BB: SuccBB))
953 continue;
954
955 // If we propagate the domain of some block to "SuccBB" we do not have to
956 // adjust the domain.
957 if (FinishedExitBlocks.count(Ptr: SuccBB))
958 continue;
959
960 // Skip back edges.
961 if (DT.dominates(A: SuccBB, B: BB))
962 continue;
963
964 Loop *SuccBBLoop =
965 getFirstNonBoxedLoopFor(BB: SuccBB, LI, BoxedLoops: scop->getBoxedLoops());
966
967 CondSet = adjustDomainDimensions(Dom: CondSet, OldL: BBLoop, NewL: SuccBBLoop);
968
969 // Set the domain for the successor or merge it with an existing domain in
970 // case there are multiple paths (without loop back edges) to the
971 // successor block.
972 isl::set &SuccDomain = scop->getOrInitEmptyDomain(BB: SuccBB);
973
974 if (!SuccDomain.is_null()) {
975 SuccDomain = SuccDomain.unite(set2: CondSet).coalesce();
976 } else {
977 // Initialize the invalid domain.
978 InvalidDomainMap[SuccBB] = CondSet.empty(space: CondSet.get_space());
979 SuccDomain = CondSet;
980 }
981
982 SuccDomain = SuccDomain.detect_equalities();
983
984 // Check if the maximal number of domain disjunctions was reached.
985 // In case this happens we will clean up and bail.
986 if (unsignedFromIslSize(Size: SuccDomain.n_basic_set()) < MaxDisjunctsInDomain)
987 continue;
988
989 scop->invalidate(Kind: COMPLEXITY, Loc: DebugLoc());
990 while (++u < ConditionSets.size())
991 isl_set_free(set: ConditionSets[u]);
992 return false;
993 }
994 }
995
996 return true;
997}
998
999bool ScopBuilder::propagateInvalidStmtDomains(
1000 Region *R, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1001 ReversePostOrderTraversal<Region *> RTraversal(R);
1002 for (auto *RN : RTraversal) {
1003
1004 // Recurse for affine subregions but go on for basic blocks and non-affine
1005 // subregions.
1006 if (RN->isSubRegion()) {
1007 Region *SubRegion = RN->getNodeAs<Region>();
1008 if (!scop->isNonAffineSubRegion(R: SubRegion)) {
1009 propagateInvalidStmtDomains(R: SubRegion, InvalidDomainMap);
1010 continue;
1011 }
1012 }
1013
1014 bool ContainsErrorBlock = containsErrorBlock(RN, R: scop->getRegion(), SD: &SD);
1015 BasicBlock *BB = getRegionNodeBasicBlock(RN);
1016 isl::set &Domain = scop->getOrInitEmptyDomain(BB);
1017 assert(!Domain.is_null() && "Cannot propagate a nullptr");
1018
1019 isl::set InvalidDomain = InvalidDomainMap[BB];
1020
1021 bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(set2: InvalidDomain);
1022
1023 if (!IsInvalidBlock) {
1024 InvalidDomain = InvalidDomain.intersect(set2: Domain);
1025 } else {
1026 InvalidDomain = Domain;
1027 isl::set DomPar = Domain.params();
1028 recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: ERRORBLOCK, Set: DomPar,
1029 Loc: BB->getTerminator()->getDebugLoc(), Sign: AS_RESTRICTION);
1030 Domain = isl::set::empty(space: Domain.get_space());
1031 }
1032
1033 if (InvalidDomain.is_empty()) {
1034 InvalidDomainMap[BB] = InvalidDomain;
1035 continue;
1036 }
1037
1038 auto *BBLoop = getRegionNodeLoop(RN, LI);
1039 auto *TI = BB->getTerminator();
1040 unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors();
1041 for (unsigned u = 0; u < NumSuccs; u++) {
1042 auto *SuccBB = getRegionNodeSuccessor(RN, TI, idx: u);
1043
1044 // Skip successors outside the SCoP.
1045 if (!scop->contains(BB: SuccBB))
1046 continue;
1047
1048 // Skip backedges.
1049 if (DT.dominates(A: SuccBB, B: BB))
1050 continue;
1051
1052 Loop *SuccBBLoop =
1053 getFirstNonBoxedLoopFor(BB: SuccBB, LI, BoxedLoops: scop->getBoxedLoops());
1054
1055 auto AdjustedInvalidDomain =
1056 adjustDomainDimensions(Dom: InvalidDomain, OldL: BBLoop, NewL: SuccBBLoop);
1057
1058 isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB];
1059 SuccInvalidDomain = SuccInvalidDomain.unite(set2: AdjustedInvalidDomain);
1060 SuccInvalidDomain = SuccInvalidDomain.coalesce();
1061
1062 InvalidDomainMap[SuccBB] = SuccInvalidDomain;
1063
1064 // Check if the maximal number of domain disjunctions was reached.
1065 // In case this happens we will bail.
1066 if (unsignedFromIslSize(Size: SuccInvalidDomain.n_basic_set()) <
1067 MaxDisjunctsInDomain)
1068 continue;
1069
1070 InvalidDomainMap.erase(Val: BB);
1071 scop->invalidate(Kind: COMPLEXITY, Loc: TI->getDebugLoc(), BB: TI->getParent());
1072 return false;
1073 }
1074
1075 InvalidDomainMap[BB] = InvalidDomain;
1076 }
1077
1078 return true;
1079}
1080
1081void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
1082 Region *NonAffineSubRegion,
1083 bool IsExitBlock) {
1084 // PHI nodes that are in the exit block of the region, hence if IsExitBlock is
1085 // true, are not modeled as ordinary PHI nodes as they are not part of the
1086 // region. However, we model the operands in the predecessor blocks that are
1087 // part of the region as regular scalar accesses.
1088
1089 // If we can synthesize a PHI we can skip it, however only if it is in
1090 // the region. If it is not it can only be in the exit block of the region.
1091 // In this case we model the operands but not the PHI itself.
1092 auto *Scope = LI.getLoopFor(BB: PHI->getParent());
1093 if (!IsExitBlock && canSynthesize(V: PHI, S: *scop, SE: &SE, Scope))
1094 return;
1095
1096 // PHI nodes are modeled as if they had been demoted prior to the SCoP
1097 // detection. Hence, the PHI is a load of a new memory location in which the
1098 // incoming value was written at the end of the incoming basic block.
1099 bool OnlyNonAffineSubRegionOperands = true;
1100 for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) {
1101 Value *Op = PHI->getIncomingValue(i: u);
1102 BasicBlock *OpBB = PHI->getIncomingBlock(i: u);
1103 ScopStmt *OpStmt = scop->getIncomingStmtFor(U: PHI->getOperandUse(i: u));
1104
1105 // Do not build PHI dependences inside a non-affine subregion, but make
1106 // sure that the necessary scalar values are still made available.
1107 if (NonAffineSubRegion && NonAffineSubRegion->contains(BB: OpBB)) {
1108 auto *OpInst = dyn_cast<Instruction>(Val: Op);
1109 if (!OpInst || !NonAffineSubRegion->contains(Inst: OpInst))
1110 ensureValueRead(V: Op, UserStmt: OpStmt);
1111 continue;
1112 }
1113
1114 OnlyNonAffineSubRegionOperands = false;
1115 ensurePHIWrite(PHI, IncomintStmt: OpStmt, IncomingBlock: OpBB, IncomingValue: Op, IsExitBlock);
1116 }
1117
1118 if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) {
1119 addPHIReadAccess(PHIStmt, PHI);
1120 }
1121}
1122
1123void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt,
1124 Instruction *Inst) {
1125 assert(!isa<PHINode>(Inst));
1126
1127 // Pull-in required operands.
1128 for (Use &Op : Inst->operands())
1129 ensureValueRead(V: Op.get(), UserStmt);
1130}
1131
1132// Create a sequence of two schedules. Either argument may be null and is
1133// interpreted as the empty schedule. Can also return null if both schedules are
1134// empty.
1135static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) {
1136 if (Prev.is_null())
1137 return Succ;
1138 if (Succ.is_null())
1139 return Prev;
1140
1141 return Prev.sequence(schedule2: Succ);
1142}
1143
1144// Create an isl_multi_union_aff that defines an identity mapping from the
1145// elements of USet to their N-th dimension.
1146//
1147// # Example:
1148//
1149// Domain: { A[i,j]; B[i,j,k] }
1150// N: 1
1151//
1152// Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] }
1153//
1154// @param USet A union set describing the elements for which to generate a
1155// mapping.
1156// @param N The dimension to map to.
1157// @returns A mapping from USet to its N-th dimension.
1158static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) {
1159 assert(!USet.is_null());
1160 assert(!USet.is_empty());
1161
1162 auto Result = isl::union_pw_multi_aff::empty(space: USet.get_space());
1163
1164 for (isl::set S : USet.get_set_list()) {
1165 unsigned Dim = unsignedFromIslSize(Size: S.tuple_dim());
1166 assert(Dim >= N);
1167 auto PMA = isl::pw_multi_aff::project_out_map(space: S.get_space(), type: isl::dim::set,
1168 first: N, n: Dim - N);
1169 if (N > 1)
1170 PMA = PMA.drop_dims(type: isl::dim::out, first: 0, n: N - 1);
1171
1172 Result = Result.add_pw_multi_aff(pma: PMA);
1173 }
1174
1175 return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result));
1176}
1177
1178void ScopBuilder::buildSchedule() {
1179 Loop *L = getLoopSurroundingScop(S&: *scop, LI);
1180 LoopStackTy LoopStack({LoopStackElementTy(L, {}, 0)});
1181 buildSchedule(RN: scop->getRegion().getNode(), LoopStack);
1182 assert(LoopStack.size() == 1 && LoopStack.back().L == L);
1183 scop->setScheduleTree(LoopStack[0].Schedule);
1184}
1185
1186/// To generate a schedule for the elements in a Region we traverse the Region
1187/// in reverse-post-order and add the contained RegionNodes in traversal order
1188/// to the schedule of the loop that is currently at the top of the LoopStack.
1189/// For loop-free codes, this results in a correct sequential ordering.
1190///
1191/// Example:
1192/// bb1(0)
1193/// / \.
1194/// bb2(1) bb3(2)
1195/// \ / \.
1196/// bb4(3) bb5(4)
1197/// \ /
1198/// bb6(5)
1199///
1200/// Including loops requires additional processing. Whenever a loop header is
1201/// encountered, the corresponding loop is added to the @p LoopStack. Starting
1202/// from an empty schedule, we first process all RegionNodes that are within
1203/// this loop and complete the sequential schedule at this loop-level before
1204/// processing about any other nodes. To implement this
1205/// loop-nodes-first-processing, the reverse post-order traversal is
1206/// insufficient. Hence, we additionally check if the traversal yields
1207/// sub-regions or blocks that are outside the last loop on the @p LoopStack.
1208/// These region-nodes are then queue and only traverse after the all nodes
1209/// within the current loop have been processed.
1210void ScopBuilder::buildSchedule(Region *R, LoopStackTy &LoopStack) {
1211 Loop *OuterScopLoop = getLoopSurroundingScop(S&: *scop, LI);
1212
1213 ReversePostOrderTraversal<Region *> RTraversal(R);
1214 std::deque<RegionNode *> WorkList(RTraversal.begin(), RTraversal.end());
1215 std::deque<RegionNode *> DelayList;
1216 bool LastRNWaiting = false;
1217
1218 // Iterate over the region @p R in reverse post-order but queue
1219 // sub-regions/blocks iff they are not part of the last encountered but not
1220 // completely traversed loop. The variable LastRNWaiting is a flag to indicate
1221 // that we queued the last sub-region/block from the reverse post-order
1222 // iterator. If it is set we have to explore the next sub-region/block from
1223 // the iterator (if any) to guarantee progress. If it is not set we first try
1224 // the next queued sub-region/blocks.
1225 while (!WorkList.empty() || !DelayList.empty()) {
1226 RegionNode *RN;
1227
1228 if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) {
1229 RN = WorkList.front();
1230 WorkList.pop_front();
1231 LastRNWaiting = false;
1232 } else {
1233 RN = DelayList.front();
1234 DelayList.pop_front();
1235 }
1236
1237 Loop *L = getRegionNodeLoop(RN, LI);
1238 if (!scop->contains(L))
1239 L = OuterScopLoop;
1240
1241 Loop *LastLoop = LoopStack.back().L;
1242 if (LastLoop != L) {
1243 if (LastLoop && !LastLoop->contains(L)) {
1244 LastRNWaiting = true;
1245 DelayList.push_back(x: RN);
1246 continue;
1247 }
1248 LoopStack.push_back(Elt: {L, {}, 0});
1249 }
1250 buildSchedule(RN, LoopStack);
1251 }
1252}
1253
1254void ScopBuilder::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack) {
1255 if (RN->isSubRegion()) {
1256 auto *LocalRegion = RN->getNodeAs<Region>();
1257 if (!scop->isNonAffineSubRegion(R: LocalRegion)) {
1258 buildSchedule(R: LocalRegion, LoopStack);
1259 return;
1260 }
1261 }
1262
1263 assert(LoopStack.rbegin() != LoopStack.rend());
1264 auto LoopData = LoopStack.rbegin();
1265 LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN);
1266
1267 for (auto *Stmt : scop->getStmtListFor(RN)) {
1268 isl::union_set UDomain{Stmt->getDomain()};
1269 auto StmtSchedule = isl::schedule::from_domain(domain: UDomain);
1270 LoopData->Schedule = combineInSequence(Prev: LoopData->Schedule, Succ: StmtSchedule);
1271 }
1272
1273 // Check if we just processed the last node in this loop. If we did, finalize
1274 // the loop by:
1275 //
1276 // - adding new schedule dimensions
1277 // - folding the resulting schedule into the parent loop schedule
1278 // - dropping the loop schedule from the LoopStack.
1279 //
1280 // Then continue to check surrounding loops, which might also have been
1281 // completed by this node.
1282 size_t Dimension = LoopStack.size();
1283 while (LoopData->L &&
1284 LoopData->NumBlocksProcessed == getNumBlocksInLoop(L: LoopData->L)) {
1285 isl::schedule Schedule = LoopData->Schedule;
1286 auto NumBlocksProcessed = LoopData->NumBlocksProcessed;
1287
1288 assert(std::next(LoopData) != LoopStack.rend());
1289 Loop *L = LoopData->L;
1290 ++LoopData;
1291 --Dimension;
1292
1293 if (!Schedule.is_null()) {
1294 isl::union_set Domain = Schedule.get_domain();
1295 isl::multi_union_pw_aff MUPA = mapToDimension(USet: Domain, N: Dimension);
1296 Schedule = Schedule.insert_partial_schedule(partial: MUPA);
1297
1298 if (hasDisableAllTransformsHint(L)) {
1299 /// If any of the loops has a disable_nonforced heuristic, mark the
1300 /// entire SCoP as such. The ISL rescheduler can only reschedule the
1301 /// SCoP in its entirety.
1302 /// TODO: ScopDetection could avoid including such loops or warp them as
1303 /// boxed loop. It still needs to pass-through loop with user-defined
1304 /// metadata.
1305 scop->markDisableHeuristics();
1306 }
1307
1308 // It is easier to insert the marks here that do it retroactively.
1309 isl::id IslLoopId = createIslLoopAttr(Ctx: scop->getIslCtx(), L);
1310 if (!IslLoopId.is_null())
1311 Schedule =
1312 Schedule.get_root().child(pos: 0).insert_mark(mark: IslLoopId).get_schedule();
1313
1314 LoopData->Schedule = combineInSequence(Prev: LoopData->Schedule, Succ: Schedule);
1315 }
1316
1317 LoopData->NumBlocksProcessed += NumBlocksProcessed;
1318 }
1319 // Now pop all loops processed up there from the LoopStack
1320 LoopStack.erase(CS: LoopStack.begin() + Dimension, CE: LoopStack.end());
1321}
1322
1323void ScopBuilder::buildEscapingDependences(Instruction *Inst) {
1324 // Check for uses of this instruction outside the scop. Because we do not
1325 // iterate over such instructions and therefore did not "ensure" the existence
1326 // of a write, we must determine such use here.
1327 if (scop->isEscaping(Inst))
1328 ensureValueWrite(Inst);
1329}
1330
1331void ScopBuilder::addRecordedAssumptions() {
1332 for (auto &AS : llvm::reverse(C&: RecordedAssumptions)) {
1333
1334 if (!AS.BB) {
1335 scop->addAssumption(Kind: AS.Kind, Set: AS.Set, Loc: AS.Loc, Sign: AS.Sign,
1336 BB: nullptr /* BasicBlock */, RTC: AS.RequiresRTC);
1337 continue;
1338 }
1339
1340 // If the domain was deleted the assumptions are void.
1341 isl_set *Dom = scop->getDomainConditions(BB: AS.BB).release();
1342 if (!Dom)
1343 continue;
1344
1345 // If a basic block was given use its domain to simplify the assumption.
1346 // In case of restrictions we know they only have to hold on the domain,
1347 // thus we can intersect them with the domain of the block. However, for
1348 // assumptions the domain has to imply them, thus:
1349 // _ _____
1350 // Dom => S <==> A v B <==> A - B
1351 //
1352 // To avoid the complement we will register A - B as a restriction not an
1353 // assumption.
1354 isl_set *S = AS.Set.copy();
1355 if (AS.Sign == AS_RESTRICTION)
1356 S = isl_set_params(set: isl_set_intersect(set1: S, set2: Dom));
1357 else /* (AS.Sign == AS_ASSUMPTION) */
1358 S = isl_set_params(set: isl_set_subtract(set1: Dom, set2: S));
1359
1360 scop->addAssumption(Kind: AS.Kind, Set: isl::manage(ptr: S), Loc: AS.Loc, Sign: AS_RESTRICTION, BB: AS.BB,
1361 RTC: AS.RequiresRTC);
1362 }
1363}
1364
1365void ScopBuilder::addUserAssumptions(
1366 AssumptionCache &AC, DenseMap<BasicBlock *, isl::set> &InvalidDomainMap) {
1367 for (auto &Assumption : AC.assumptions()) {
1368 auto *CI = dyn_cast_or_null<CallInst>(Val&: Assumption);
1369 if (!CI || CI->arg_size() != 1)
1370 continue;
1371
1372 bool InScop = scop->contains(I: CI);
1373 if (!InScop && !scop->isDominatedBy(DT, BB: CI->getParent()))
1374 continue;
1375
1376 auto *L = LI.getLoopFor(BB: CI->getParent());
1377 auto *Val = CI->getArgOperand(i: 0);
1378 ParameterSetTy DetectedParams;
1379 auto &R = scop->getRegion();
1380 if (!isAffineConstraint(V: Val, R: &R, Scope: L, SE, Params&: DetectedParams)) {
1381 ORE.emit(
1382 OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI)
1383 << "Non-affine user assumption ignored.");
1384 continue;
1385 }
1386
1387 // Collect all newly introduced parameters.
1388 ParameterSetTy NewParams;
1389 for (auto *Param : DetectedParams) {
1390 Param = extractConstantFactor(M: Param, SE).second;
1391 Param = scop->getRepresentingInvariantLoadSCEV(S: Param);
1392 if (scop->isParam(Param))
1393 continue;
1394 NewParams.insert(X: Param);
1395 }
1396
1397 SmallVector<isl_set *, 2> ConditionSets;
1398 auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr;
1399 BasicBlock *BB = InScop ? CI->getParent() : R.getEntry();
1400 auto *Dom = InScop ? isl_set_copy(set: scop->getDomainConditions(BB).get())
1401 : isl_set_copy(set: scop->getContext().get());
1402 assert(Dom && "Cannot propagate a nullptr.");
1403 bool Valid = buildConditionSets(BB, Condition: Val, TI, L, Domain: Dom, InvalidDomainMap,
1404 ConditionSets);
1405 isl_set_free(set: Dom);
1406
1407 if (!Valid)
1408 continue;
1409
1410 isl_set *AssumptionCtx = nullptr;
1411 if (InScop) {
1412 AssumptionCtx = isl_set_complement(set: isl_set_params(set: ConditionSets[1]));
1413 isl_set_free(set: ConditionSets[0]);
1414 } else {
1415 AssumptionCtx = isl_set_complement(set: ConditionSets[1]);
1416 AssumptionCtx = isl_set_intersect(set1: AssumptionCtx, set2: ConditionSets[0]);
1417 }
1418
1419 // Project out newly introduced parameters as they are not otherwise useful.
1420 if (!NewParams.empty()) {
1421 for (isl_size u = 0; u < isl_set_n_param(set: AssumptionCtx); u++) {
1422 auto *Id = isl_set_get_dim_id(set: AssumptionCtx, type: isl_dim_param, pos: u);
1423 auto *Param = static_cast<const SCEV *>(isl_id_get_user(id: Id));
1424 isl_id_free(id: Id);
1425
1426 if (!NewParams.count(key: Param))
1427 continue;
1428
1429 AssumptionCtx =
1430 isl_set_project_out(set: AssumptionCtx, type: isl_dim_param, first: u--, n: 1);
1431 }
1432 }
1433 ORE.emit(OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI)
1434 << "Use user assumption: "
1435 << stringFromIslObj(Obj: AssumptionCtx, DefaultValue: "null"));
1436 isl::set newContext =
1437 scop->getContext().intersect(set2: isl::manage(ptr: AssumptionCtx));
1438 scop->setContext(newContext);
1439 }
1440}
1441
1442bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) {
1443 // Memory builtins are not considered in this function.
1444 if (!Inst.isLoad() && !Inst.isStore())
1445 return false;
1446
1447 Value *Val = Inst.getValueOperand();
1448 Type *ElementType = Val->getType();
1449 Value *Address = Inst.getPointerOperand();
1450 const SCEV *AccessFunction =
1451 SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst->getParent()));
1452 const SCEVUnknown *BasePointer =
1453 dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1454 enum MemoryAccess::AccessType AccType =
1455 isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1456
1457 if (auto *BitCast = dyn_cast<BitCastInst>(Val: Address))
1458 Address = BitCast->getOperand(i_nocapture: 0);
1459
1460 auto *GEP = dyn_cast<GetElementPtrInst>(Val: Address);
1461 if (!GEP || DL.getTypeAllocSize(Ty: GEP->getResultElementType()) !=
1462 DL.getTypeAllocSize(Ty: ElementType))
1463 return false;
1464
1465 SmallVector<const SCEV *, 4> Subscripts;
1466 SmallVector<int, 4> Sizes;
1467 getIndexExpressionsFromGEP(SE, GEP, Subscripts, Sizes);
1468 auto *BasePtr = GEP->getOperand(i_nocapture: 0);
1469
1470 if (auto *BasePtrCast = dyn_cast<BitCastInst>(Val: BasePtr))
1471 BasePtr = BasePtrCast->getOperand(i_nocapture: 0);
1472
1473 // Check for identical base pointers to ensure that we do not miss index
1474 // offsets that have been added before this GEP is applied.
1475 if (BasePtr != BasePointer->getValue())
1476 return false;
1477
1478 std::vector<const SCEV *> SizesSCEV;
1479
1480 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1481
1482 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1483 for (auto *Subscript : Subscripts) {
1484 InvariantLoadsSetTy AccessILS;
1485 if (!isAffineExpr(R: &scop->getRegion(), Scope: SurroundingLoop, Expression: Subscript, SE,
1486 ILS: &AccessILS))
1487 return false;
1488
1489 for (LoadInst *LInst : AccessILS)
1490 if (!ScopRIL.count(key: LInst))
1491 return false;
1492 }
1493
1494 if (Sizes.empty())
1495 return false;
1496
1497 SizesSCEV.push_back(x: nullptr);
1498
1499 for (auto V : Sizes)
1500 SizesSCEV.push_back(x: SE.getSCEV(
1501 V: ConstantInt::get(Ty: IntegerType::getInt64Ty(C&: BasePtr->getContext()), V)));
1502
1503 addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1504 IsAffine: true, Subscripts, Sizes: SizesSCEV, AccessValue: Val);
1505 return true;
1506}
1507
1508bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) {
1509 // Memory builtins are not considered by this function.
1510 if (!Inst.isLoad() && !Inst.isStore())
1511 return false;
1512
1513 if (!PollyDelinearize)
1514 return false;
1515
1516 Value *Address = Inst.getPointerOperand();
1517 Value *Val = Inst.getValueOperand();
1518 Type *ElementType = Val->getType();
1519 unsigned ElementSize = DL.getTypeAllocSize(Ty: ElementType);
1520 enum MemoryAccess::AccessType AccType =
1521 isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1522
1523 const SCEV *AccessFunction =
1524 SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst->getParent()));
1525 const SCEVUnknown *BasePointer =
1526 dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1527
1528 assert(BasePointer && "Could not find base pointer");
1529
1530 auto &InsnToMemAcc = scop->getInsnToMemAccMap();
1531 auto AccItr = InsnToMemAcc.find(x: Inst);
1532 if (AccItr == InsnToMemAcc.end())
1533 return false;
1534
1535 std::vector<const SCEV *> Sizes = {nullptr};
1536
1537 Sizes.insert(position: Sizes.end(), first: AccItr->second.Shape->DelinearizedSizes.begin(),
1538 last: AccItr->second.Shape->DelinearizedSizes.end());
1539
1540 // In case only the element size is contained in the 'Sizes' array, the
1541 // access does not access a real multi-dimensional array. Hence, we allow
1542 // the normal single-dimensional access construction to handle this.
1543 if (Sizes.size() == 1)
1544 return false;
1545
1546 // Remove the element size. This information is already provided by the
1547 // ElementSize parameter. In case the element size of this access and the
1548 // element size used for delinearization differs the delinearization is
1549 // incorrect. Hence, we invalidate the scop.
1550 //
1551 // TODO: Handle delinearization with differing element sizes.
1552 auto DelinearizedSize =
1553 cast<SCEVConstant>(Val: Sizes.back())->getAPInt().getSExtValue();
1554 Sizes.pop_back();
1555 if (ElementSize != DelinearizedSize)
1556 scop->invalidate(Kind: DELINEARIZATION, Loc: Inst->getDebugLoc(), BB: Inst->getParent());
1557
1558 addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1559 IsAffine: true, Subscripts: AccItr->second.DelinearizedSubscripts, Sizes, AccessValue: Val);
1560 return true;
1561}
1562
1563bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) {
1564 auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Val&: Inst);
1565
1566 if (MemIntr == nullptr)
1567 return false;
1568
1569 auto *L = LI.getLoopFor(BB: Inst->getParent());
1570 auto *LengthVal = SE.getSCEVAtScope(V: MemIntr->getLength(), L);
1571 assert(LengthVal);
1572
1573 // Check if the length val is actually affine or if we overapproximate it
1574 InvariantLoadsSetTy AccessILS;
1575 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1576
1577 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1578 bool LengthIsAffine = isAffineExpr(R: &scop->getRegion(), Scope: SurroundingLoop,
1579 Expression: LengthVal, SE, ILS: &AccessILS);
1580 for (LoadInst *LInst : AccessILS)
1581 if (!ScopRIL.count(key: LInst))
1582 LengthIsAffine = false;
1583 if (!LengthIsAffine)
1584 LengthVal = nullptr;
1585
1586 auto *DestPtrVal = MemIntr->getDest();
1587 assert(DestPtrVal);
1588
1589 auto *DestAccFunc = SE.getSCEVAtScope(V: DestPtrVal, L);
1590 assert(DestAccFunc);
1591 // Ignore accesses to "NULL".
1592 // TODO: We could use this to optimize the region further, e.g., intersect
1593 // the context with
1594 // isl_set_complement(isl_set_params(getDomain()))
1595 // as we know it would be undefined to execute this instruction anyway.
1596 if (DestAccFunc->isZero())
1597 return true;
1598
1599 if (auto *U = dyn_cast<SCEVUnknown>(Val: DestAccFunc)) {
1600 if (isa<ConstantPointerNull>(Val: U->getValue()))
1601 return true;
1602 }
1603
1604 auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: DestAccFunc));
1605 assert(DestPtrSCEV);
1606 DestAccFunc = SE.getMinusSCEV(LHS: DestAccFunc, RHS: DestPtrSCEV);
1607 addArrayAccess(Stmt, MemAccInst: Inst, AccType: MemoryAccess::MUST_WRITE, BaseAddress: DestPtrSCEV->getValue(),
1608 ElemType: IntegerType::getInt8Ty(C&: DestPtrVal->getContext()),
1609 IsAffine: LengthIsAffine, Subscripts: {DestAccFunc, LengthVal}, Sizes: {nullptr},
1610 AccessValue: Inst.getValueOperand());
1611
1612 auto *MemTrans = dyn_cast<MemTransferInst>(Val: MemIntr);
1613 if (!MemTrans)
1614 return true;
1615
1616 auto *SrcPtrVal = MemTrans->getSource();
1617 assert(SrcPtrVal);
1618
1619 auto *SrcAccFunc = SE.getSCEVAtScope(V: SrcPtrVal, L);
1620 assert(SrcAccFunc);
1621 // Ignore accesses to "NULL".
1622 // TODO: See above TODO
1623 if (SrcAccFunc->isZero())
1624 return true;
1625
1626 auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: SrcAccFunc));
1627 assert(SrcPtrSCEV);
1628 SrcAccFunc = SE.getMinusSCEV(LHS: SrcAccFunc, RHS: SrcPtrSCEV);
1629 addArrayAccess(Stmt, MemAccInst: Inst, AccType: MemoryAccess::READ, BaseAddress: SrcPtrSCEV->getValue(),
1630 ElemType: IntegerType::getInt8Ty(C&: SrcPtrVal->getContext()),
1631 IsAffine: LengthIsAffine, Subscripts: {SrcAccFunc, LengthVal}, Sizes: {nullptr},
1632 AccessValue: Inst.getValueOperand());
1633
1634 return true;
1635}
1636
1637bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) {
1638 auto *CI = dyn_cast_or_null<CallInst>(Val&: Inst);
1639
1640 if (CI == nullptr)
1641 return false;
1642
1643 if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(V: CI) || isDebugCall(Inst: CI))
1644 return true;
1645
1646 auto *AF = SE.getConstant(Ty: IntegerType::getInt64Ty(C&: CI->getContext()), V: 0);
1647 auto *CalledFunction = CI->getCalledFunction();
1648 MemoryEffects ME = AA.getMemoryEffects(F: CalledFunction);
1649 if (ME.doesNotAccessMemory())
1650 return true;
1651
1652 if (ME.onlyAccessesArgPointees()) {
1653 ModRefInfo ArgMR = ME.getModRef(Loc: IRMemLocation::ArgMem);
1654 auto AccType =
1655 !isModSet(MRI: ArgMR) ? MemoryAccess::READ : MemoryAccess::MAY_WRITE;
1656 Loop *L = LI.getLoopFor(BB: Inst->getParent());
1657 for (const auto &Arg : CI->args()) {
1658 if (!Arg->getType()->isPointerTy())
1659 continue;
1660
1661 auto *ArgSCEV = SE.getSCEVAtScope(V: Arg, L);
1662 if (ArgSCEV->isZero())
1663 continue;
1664
1665 if (auto *U = dyn_cast<SCEVUnknown>(Val: ArgSCEV)) {
1666 if (isa<ConstantPointerNull>(Val: U->getValue()))
1667 return true;
1668 }
1669
1670 auto *ArgBasePtr = cast<SCEVUnknown>(Val: SE.getPointerBase(V: ArgSCEV));
1671 addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: ArgBasePtr->getValue(),
1672 ElemType: ArgBasePtr->getType(), IsAffine: false, Subscripts: {AF}, Sizes: {nullptr}, AccessValue: CI);
1673 }
1674 return true;
1675 }
1676
1677 if (ME.onlyReadsMemory()) {
1678 GlobalReads.emplace_back(Args&: Stmt, Args&: CI);
1679 return true;
1680 }
1681 return false;
1682}
1683
1684bool ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) {
1685 // Memory builtins are not considered by this function.
1686 if (!Inst.isLoad() && !Inst.isStore())
1687 return false;
1688
1689 Value *Address = Inst.getPointerOperand();
1690 Value *Val = Inst.getValueOperand();
1691 Type *ElementType = Val->getType();
1692 enum MemoryAccess::AccessType AccType =
1693 isa<LoadInst>(Val: Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE;
1694
1695 const SCEV *AccessFunction =
1696 SE.getSCEVAtScope(V: Address, L: LI.getLoopFor(BB: Inst->getParent()));
1697 const SCEVUnknown *BasePointer =
1698 dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1699
1700 assert(BasePointer && "Could not find base pointer");
1701 AccessFunction = SE.getMinusSCEV(LHS: AccessFunction, RHS: BasePointer);
1702
1703 // Check if the access depends on a loop contained in a non-affine subregion.
1704 bool isVariantInNonAffineLoop = false;
1705 SetVector<const Loop *> Loops;
1706 findLoops(Expr: AccessFunction, Loops);
1707 for (const Loop *L : Loops)
1708 if (Stmt->contains(L)) {
1709 isVariantInNonAffineLoop = true;
1710 break;
1711 }
1712
1713 InvariantLoadsSetTy AccessILS;
1714
1715 Loop *SurroundingLoop = Stmt->getSurroundingLoop();
1716 bool IsAffine = !isVariantInNonAffineLoop &&
1717 isAffineExpr(R: &scop->getRegion(), Scope: SurroundingLoop,
1718 Expression: AccessFunction, SE, ILS: &AccessILS);
1719
1720 const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
1721 for (LoadInst *LInst : AccessILS)
1722 if (!ScopRIL.count(key: LInst))
1723 IsAffine = false;
1724
1725 if (!IsAffine && AccType == MemoryAccess::MUST_WRITE)
1726 AccType = MemoryAccess::MAY_WRITE;
1727
1728 addArrayAccess(Stmt, MemAccInst: Inst, AccType, BaseAddress: BasePointer->getValue(), ElemType: ElementType,
1729 IsAffine, Subscripts: {AccessFunction}, Sizes: {nullptr}, AccessValue: Val);
1730 return true;
1731}
1732
1733void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) {
1734 if (buildAccessMemIntrinsic(Inst, Stmt))
1735 return;
1736
1737 if (buildAccessCallInst(Inst, Stmt))
1738 return;
1739
1740 if (buildAccessMultiDimFixed(Inst, Stmt))
1741 return;
1742
1743 if (buildAccessMultiDimParam(Inst, Stmt))
1744 return;
1745
1746 if (buildAccessSingleDim(Inst, Stmt))
1747 return;
1748
1749 llvm_unreachable(
1750 "At least one of the buildAccess functions must handled this access, or "
1751 "ScopDetection should have rejected this SCoP");
1752}
1753
1754void ScopBuilder::buildAccessFunctions() {
1755 for (auto &Stmt : *scop) {
1756 if (Stmt.isBlockStmt()) {
1757 buildAccessFunctions(Stmt: &Stmt, BB&: *Stmt.getBasicBlock());
1758 continue;
1759 }
1760
1761 Region *R = Stmt.getRegion();
1762 for (BasicBlock *BB : R->blocks())
1763 buildAccessFunctions(Stmt: &Stmt, BB&: *BB, NonAffineSubRegion: R);
1764 }
1765
1766 // Build write accesses for values that are used after the SCoP.
1767 // The instructions defining them might be synthesizable and therefore not
1768 // contained in any statement, hence we iterate over the original instructions
1769 // to identify all escaping values.
1770 for (BasicBlock *BB : scop->getRegion().blocks()) {
1771 for (Instruction &Inst : *BB)
1772 buildEscapingDependences(Inst: &Inst);
1773 }
1774}
1775
1776bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) {
1777 return !Inst->isTerminator() && !isIgnoredIntrinsic(V: Inst) &&
1778 !canSynthesize(V: Inst, S: *scop, SE: &SE, Scope: L);
1779}
1780
1781/// Generate a name for a statement.
1782///
1783/// @param BB The basic block the statement will represent.
1784/// @param BBIdx The index of the @p BB relative to other BBs/regions.
1785/// @param Count The index of the created statement in @p BB.
1786/// @param IsMain Whether this is the main of all statement for @p BB. If true,
1787/// no suffix will be added.
1788/// @param IsLast Uses a special indicator for the last statement of a BB.
1789static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count,
1790 bool IsMain, bool IsLast = false) {
1791 std::string Suffix;
1792 if (!IsMain) {
1793 if (UseInstructionNames)
1794 Suffix = '_';
1795 if (IsLast)
1796 Suffix += "last";
1797 else if (Count < 26)
1798 Suffix += 'a' + Count;
1799 else
1800 Suffix += std::to_string(val: Count);
1801 }
1802 return getIslCompatibleName(Prefix: "Stmt", Val: BB, Number: BBIdx, Suffix, UseInstructionNames);
1803}
1804
1805/// Generate a name for a statement that represents a non-affine subregion.
1806///
1807/// @param R The region the statement will represent.
1808/// @param RIdx The index of the @p R relative to other BBs/regions.
1809static std::string makeStmtName(Region *R, long RIdx) {
1810 return getIslCompatibleName(Prefix: "Stmt", Middle: R->getNameStr(), Number: RIdx, Suffix: "",
1811 UseInstructionNames);
1812}
1813
1814void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) {
1815 Loop *SurroundingLoop = LI.getLoopFor(BB);
1816
1817 int Count = 0;
1818 long BBIdx = scop->getNextStmtIdx();
1819 std::vector<Instruction *> Instructions;
1820 for (Instruction &Inst : *BB) {
1821 if (shouldModelInst(Inst: &Inst, L: SurroundingLoop))
1822 Instructions.push_back(x: &Inst);
1823 if (Inst.getMetadata(Kind: "polly_split_after") ||
1824 (SplitOnStore && isa<StoreInst>(Val: Inst))) {
1825 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain: Count == 0);
1826 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1827 Count++;
1828 Instructions.clear();
1829 }
1830 }
1831
1832 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain: Count == 0);
1833 scop->addScopStmt(BB, Name, SurroundingLoop, Instructions);
1834}
1835
1836/// Is @p Inst an ordered instruction?
1837///
1838/// An unordered instruction is an instruction, such that a sequence of
1839/// unordered instructions can be permuted without changing semantics. Any
1840/// instruction for which this is not always the case is ordered.
1841static bool isOrderedInstruction(Instruction *Inst) {
1842 return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
1843}
1844
1845/// Join instructions to the same statement if one uses the scalar result of the
1846/// other.
1847static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
1848 ArrayRef<Instruction *> ModeledInsts) {
1849 for (Instruction *Inst : ModeledInsts) {
1850 if (isa<PHINode>(Val: Inst))
1851 continue;
1852
1853 for (Use &Op : Inst->operands()) {
1854 Instruction *OpInst = dyn_cast<Instruction>(Val: Op.get());
1855 if (!OpInst)
1856 continue;
1857
1858 // Check if OpInst is in the BB and is a modeled instruction.
1859 auto OpVal = UnionFind.findValue(V: OpInst);
1860 if (OpVal == UnionFind.end())
1861 continue;
1862
1863 UnionFind.unionSets(V1: Inst, V2: OpInst);
1864 }
1865 }
1866}
1867
1868/// Ensure that the order of ordered instructions does not change.
1869///
1870/// If we encounter an ordered instruction enclosed in instructions belonging to
1871/// a different statement (which might as well contain ordered instructions, but
1872/// this is not tested here), join them.
1873static void
1874joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
1875 ArrayRef<Instruction *> ModeledInsts) {
1876 SetVector<Instruction *> SeenLeaders;
1877 for (Instruction *Inst : ModeledInsts) {
1878 if (!isOrderedInstruction(Inst))
1879 continue;
1880
1881 Instruction *Leader = UnionFind.getLeaderValue(V: Inst);
1882 // Since previous iterations might have merged sets, some items in
1883 // SeenLeaders are not leaders anymore. However, The new leader of
1884 // previously merged instructions must be one of the former leaders of
1885 // these merged instructions.
1886 bool Inserted = SeenLeaders.insert(X: Leader);
1887 if (Inserted)
1888 continue;
1889
1890 // Merge statements to close holes. Say, we have already seen statements A
1891 // and B, in this order. Then we see an instruction of A again and we would
1892 // see the pattern "A B A". This function joins all statements until the
1893 // only seen occurrence of A.
1894 for (Instruction *Prev : reverse(C&: SeenLeaders)) {
1895 // We are backtracking from the last element until we see Inst's leader
1896 // in SeenLeaders and merge all into one set. Although leaders of
1897 // instructions change during the execution of this loop, it's irrelevant
1898 // as we are just searching for the element that we already confirmed is
1899 // in the list.
1900 if (Prev == Leader)
1901 break;
1902 UnionFind.unionSets(V1: Prev, V2: Leader);
1903 }
1904 }
1905}
1906
1907/// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for
1908/// the incoming values from this block are executed after the PHI READ.
1909///
1910/// Otherwise it could overwrite the incoming value from before the BB with the
1911/// value for the next execution. This can happen if the PHI WRITE is added to
1912/// the statement with the instruction that defines the incoming value (instead
1913/// of the last statement of the same BB). To ensure that the PHI READ and WRITE
1914/// are in order, we put both into the statement. PHI WRITEs are always executed
1915/// after PHI READs when they are in the same statement.
1916///
1917/// TODO: This is an overpessimization. We only have to ensure that the PHI
1918/// WRITE is not put into a statement containing the PHI itself. That could also
1919/// be done by
1920/// - having all (strongly connected) PHIs in a single statement,
1921/// - unite only the PHIs in the operand tree of the PHI WRITE (because it only
1922/// has a chance of being lifted before a PHI by being in a statement with a
1923/// PHI that comes before in the basic block), or
1924/// - when uniting statements, ensure that no (relevant) PHIs are overtaken.
1925static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind,
1926 ArrayRef<Instruction *> ModeledInsts) {
1927 for (Instruction *Inst : ModeledInsts) {
1928 PHINode *PHI = dyn_cast<PHINode>(Val: Inst);
1929 if (!PHI)
1930 continue;
1931
1932 int Idx = PHI->getBasicBlockIndex(BB: PHI->getParent());
1933 if (Idx < 0)
1934 continue;
1935
1936 Instruction *IncomingVal =
1937 dyn_cast<Instruction>(Val: PHI->getIncomingValue(i: Idx));
1938 if (!IncomingVal)
1939 continue;
1940
1941 UnionFind.unionSets(V1: PHI, V2: IncomingVal);
1942 }
1943}
1944
1945void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
1946 Loop *L = LI.getLoopFor(BB);
1947
1948 // Extracting out modeled instructions saves us from checking
1949 // shouldModelInst() repeatedly.
1950 SmallVector<Instruction *, 32> ModeledInsts;
1951 EquivalenceClasses<Instruction *> UnionFind;
1952 Instruction *MainInst = nullptr, *MainLeader = nullptr;
1953 for (Instruction &Inst : *BB) {
1954 if (!shouldModelInst(Inst: &Inst, L))
1955 continue;
1956 ModeledInsts.push_back(Elt: &Inst);
1957 UnionFind.insert(Data: &Inst);
1958
1959 // When a BB is split into multiple statements, the main statement is the
1960 // one containing the 'main' instruction. We select the first instruction
1961 // that is unlikely to be removed (because it has side-effects) as the main
1962 // one. It is used to ensure that at least one statement from the bb has the
1963 // same name as with -polly-stmt-granularity=bb.
1964 if (!MainInst && (isa<StoreInst>(Val: Inst) ||
1965 (isa<CallInst>(Val: Inst) && !isa<IntrinsicInst>(Val: Inst))))
1966 MainInst = &Inst;
1967 }
1968
1969 joinOperandTree(UnionFind, ModeledInsts);
1970 joinOrderedInstructions(UnionFind, ModeledInsts);
1971 joinOrderedPHIs(UnionFind, ModeledInsts);
1972
1973 // The list of instructions for statement (statement represented by the leader
1974 // instruction).
1975 MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
1976
1977 // The order of statements must be preserved w.r.t. their ordered
1978 // instructions. Without this explicit scan, we would also use non-ordered
1979 // instructions (whose order is arbitrary) to determine statement order.
1980 for (Instruction *Inst : ModeledInsts) {
1981 if (!isOrderedInstruction(Inst))
1982 continue;
1983
1984 auto LeaderIt = UnionFind.findLeader(V: Inst);
1985 if (LeaderIt == UnionFind.member_end())
1986 continue;
1987
1988 // Insert element for the leader instruction.
1989 (void)LeaderToInstList[*LeaderIt];
1990 }
1991
1992 // Collect the instructions of all leaders. UnionFind's member iterator
1993 // unfortunately are not in any specific order.
1994 for (Instruction *Inst : ModeledInsts) {
1995 auto LeaderIt = UnionFind.findLeader(V: Inst);
1996 if (LeaderIt == UnionFind.member_end())
1997 continue;
1998
1999 if (Inst == MainInst)
2000 MainLeader = *LeaderIt;
2001 std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
2002 InstList.push_back(x: Inst);
2003 }
2004
2005 // Finally build the statements.
2006 int Count = 0;
2007 long BBIdx = scop->getNextStmtIdx();
2008 for (auto &Instructions : LeaderToInstList) {
2009 std::vector<Instruction *> &InstList = Instructions.second;
2010
2011 // If there is no main instruction, make the first statement the main.
2012 bool IsMain = (MainInst ? MainLeader == Instructions.first : Count == 0);
2013
2014 std::string Name = makeStmtName(BB, BBIdx, Count, IsMain);
2015 scop->addScopStmt(BB, Name, SurroundingLoop: L, Instructions: std::move(InstList));
2016 Count += 1;
2017 }
2018
2019 // Unconditionally add an epilogue (last statement). It contains no
2020 // instructions, but holds the PHI write accesses for successor basic blocks,
2021 // if the incoming value is not defined in another statement if the same BB.
2022 // The epilogue becomes the main statement only if there is no other
2023 // statement that could become main.
2024 // The epilogue will be removed if no PHIWrite is added to it.
2025 std::string EpilogueName = makeStmtName(BB, BBIdx, Count, IsMain: Count == 0, IsLast: true);
2026 scop->addScopStmt(BB, Name: EpilogueName, SurroundingLoop: L, Instructions: {});
2027}
2028
2029void ScopBuilder::buildStmts(Region &SR) {
2030 if (scop->isNonAffineSubRegion(R: &SR)) {
2031 std::vector<Instruction *> Instructions;
2032 Loop *SurroundingLoop =
2033 getFirstNonBoxedLoopFor(BB: SR.getEntry(), LI, BoxedLoops: scop->getBoxedLoops());
2034 for (Instruction &Inst : *SR.getEntry())
2035 if (shouldModelInst(Inst: &Inst, L: SurroundingLoop))
2036 Instructions.push_back(x: &Inst);
2037 long RIdx = scop->getNextStmtIdx();
2038 std::string Name = makeStmtName(R: &SR, RIdx);
2039 scop->addScopStmt(R: &SR, Name, SurroundingLoop, EntryBlockInstructions: Instructions);
2040 return;
2041 }
2042
2043 for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I)
2044 if (I->isSubRegion())
2045 buildStmts(SR&: *I->getNodeAs<Region>());
2046 else {
2047 BasicBlock *BB = I->getNodeAs<BasicBlock>();
2048 switch (StmtGranularity) {
2049 case GranularityChoice::BasicBlocks:
2050 buildSequentialBlockStmts(BB);
2051 break;
2052 case GranularityChoice::ScalarIndependence:
2053 buildEqivClassBlockStmts(BB);
2054 break;
2055 case GranularityChoice::Stores:
2056 buildSequentialBlockStmts(BB, SplitOnStore: true);
2057 break;
2058 }
2059 }
2060}
2061
2062void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB,
2063 Region *NonAffineSubRegion) {
2064 assert(
2065 Stmt &&
2066 "The exit BB is the only one that cannot be represented by a statement");
2067 assert(Stmt->represents(&BB));
2068
2069 // We do not build access functions for error blocks, as they may contain
2070 // instructions we can not model.
2071 if (SD.isErrorBlock(BB, R: scop->getRegion()))
2072 return;
2073
2074 auto BuildAccessesForInst = [this, Stmt,
2075 NonAffineSubRegion](Instruction *Inst) {
2076 PHINode *PHI = dyn_cast<PHINode>(Val: Inst);
2077 if (PHI)
2078 buildPHIAccesses(PHIStmt: Stmt, PHI, NonAffineSubRegion, IsExitBlock: false);
2079
2080 if (auto MemInst = MemAccInst::dyn_cast(V&: *Inst)) {
2081 assert(Stmt && "Cannot build access function in non-existing statement");
2082 buildMemoryAccess(Inst: MemInst, Stmt);
2083 }
2084
2085 // PHI nodes have already been modeled above and terminators that are
2086 // not part of a non-affine subregion are fully modeled and regenerated
2087 // from the polyhedral domains. Hence, they do not need to be modeled as
2088 // explicit data dependences.
2089 if (!PHI)
2090 buildScalarDependences(UserStmt: Stmt, Inst);
2091 };
2092
2093 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
2094 bool IsEntryBlock = (Stmt->getEntryBlock() == &BB);
2095 if (IsEntryBlock) {
2096 for (Instruction *Inst : Stmt->getInstructions())
2097 BuildAccessesForInst(Inst);
2098 if (Stmt->isRegionStmt())
2099 BuildAccessesForInst(BB.getTerminator());
2100 } else {
2101 for (Instruction &Inst : BB) {
2102 if (isIgnoredIntrinsic(V: &Inst))
2103 continue;
2104
2105 // Invariant loads already have been processed.
2106 if (isa<LoadInst>(Val: Inst) && RIL.count(key: cast<LoadInst>(Val: &Inst)))
2107 continue;
2108
2109 BuildAccessesForInst(&Inst);
2110 }
2111 }
2112}
2113
2114MemoryAccess *ScopBuilder::addMemoryAccess(
2115 ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType,
2116 Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue,
2117 ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes,
2118 MemoryKind Kind) {
2119 bool isKnownMustAccess = false;
2120
2121 // Accesses in single-basic block statements are always executed.
2122 if (Stmt->isBlockStmt())
2123 isKnownMustAccess = true;
2124
2125 if (Stmt->isRegionStmt()) {
2126 // Accesses that dominate the exit block of a non-affine region are always
2127 // executed. In non-affine regions there may exist MemoryKind::Values that
2128 // do not dominate the exit. MemoryKind::Values will always dominate the
2129 // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the
2130 // non-affine region.
2131 if (Inst && DT.dominates(A: Inst->getParent(), B: Stmt->getRegion()->getExit()))
2132 isKnownMustAccess = true;
2133 }
2134
2135 // Non-affine PHI writes do not "happen" at a particular instruction, but
2136 // after exiting the statement. Therefore they are guaranteed to execute and
2137 // overwrite the old value.
2138 if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI)
2139 isKnownMustAccess = true;
2140
2141 if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE)
2142 AccType = MemoryAccess::MAY_WRITE;
2143
2144 auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType,
2145 Affine, Subscripts, Sizes, AccessValue, Kind);
2146
2147 scop->addAccessFunction(Access);
2148 Stmt->addAccess(Access);
2149 return Access;
2150}
2151
2152void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst,
2153 MemoryAccess::AccessType AccType,
2154 Value *BaseAddress, Type *ElementType,
2155 bool IsAffine,
2156 ArrayRef<const SCEV *> Subscripts,
2157 ArrayRef<const SCEV *> Sizes,
2158 Value *AccessValue) {
2159 ArrayBasePointers.insert(X: BaseAddress);
2160 addMemoryAccess(Stmt, Inst: MemAccInst, AccType, BaseAddress, ElementType, Affine: IsAffine,
2161 AccessValue, Subscripts, Sizes, Kind: MemoryKind::Array);
2162}
2163
2164/// Check if @p Expr is divisible by @p Size.
2165static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) {
2166 assert(Size != 0);
2167 if (Size == 1)
2168 return true;
2169
2170 // Only one factor needs to be divisible.
2171 if (auto *MulExpr = dyn_cast<SCEVMulExpr>(Val: Expr)) {
2172 for (auto *FactorExpr : MulExpr->operands())
2173 if (isDivisible(Expr: FactorExpr, Size, SE))
2174 return true;
2175 return false;
2176 }
2177
2178 // For other n-ary expressions (Add, AddRec, Max,...) all operands need
2179 // to be divisible.
2180 if (auto *NAryExpr = dyn_cast<SCEVNAryExpr>(Val: Expr)) {
2181 for (auto *OpExpr : NAryExpr->operands())
2182 if (!isDivisible(Expr: OpExpr, Size, SE))
2183 return false;
2184 return true;
2185 }
2186
2187 auto *SizeSCEV = SE.getConstant(Ty: Expr->getType(), V: Size);
2188 auto *UDivSCEV = SE.getUDivExpr(LHS: Expr, RHS: SizeSCEV);
2189 auto *MulSCEV = SE.getMulExpr(LHS: UDivSCEV, RHS: SizeSCEV);
2190 return MulSCEV == Expr;
2191}
2192
2193void ScopBuilder::foldSizeConstantsToRight() {
2194 isl::union_set Accessed = scop->getAccesses().range();
2195
2196 for (auto Array : scop->arrays()) {
2197 if (Array->getNumberOfDimensions() <= 1)
2198 continue;
2199
2200 isl::space Space = Array->getSpace();
2201 Space = Space.align_params(space2: Accessed.get_space());
2202
2203 if (!Accessed.contains(space: Space))
2204 continue;
2205
2206 isl::set Elements = Accessed.extract_set(space: Space);
2207 isl::map Transform = isl::map::universe(space: Array->getSpace().map_from_set());
2208
2209 std::vector<int> Int;
2210 unsigned Dims = unsignedFromIslSize(Size: Elements.tuple_dim());
2211 for (unsigned i = 0; i < Dims; i++) {
2212 isl::set DimOnly = isl::set(Elements).project_out(type: isl::dim::set, first: 0, n: i);
2213 DimOnly = DimOnly.project_out(type: isl::dim::set, first: 1, n: Dims - i - 1);
2214 DimOnly = DimOnly.lower_bound_si(type: isl::dim::set, pos: 0, value: 0);
2215
2216 isl::basic_set DimHull = DimOnly.affine_hull();
2217
2218 if (i == Dims - 1) {
2219 Int.push_back(x: 1);
2220 Transform = Transform.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
2221 continue;
2222 }
2223
2224 if (unsignedFromIslSize(Size: DimHull.dim(type: isl::dim::div)) == 1) {
2225 isl::aff Diff = DimHull.get_div(pos: 0);
2226 isl::val Val = Diff.get_denominator_val();
2227
2228 int ValInt = 1;
2229 if (Val.is_int()) {
2230 auto ValAPInt = APIntFromVal(V: Val);
2231 if (ValAPInt.isSignedIntN(N: 32))
2232 ValInt = ValAPInt.getSExtValue();
2233 } else {
2234 }
2235
2236 Int.push_back(x: ValInt);
2237 isl::constraint C = isl::constraint::alloc_equality(
2238 ls: isl::local_space(Transform.get_space()));
2239 C = C.set_coefficient_si(type: isl::dim::out, pos: i, v: ValInt);
2240 C = C.set_coefficient_si(type: isl::dim::in, pos: i, v: -1);
2241 Transform = Transform.add_constraint(constraint: C);
2242 continue;
2243 }
2244
2245 isl::basic_set ZeroSet = isl::basic_set(DimHull);
2246 ZeroSet = ZeroSet.fix_si(type: isl::dim::set, pos: 0, value: 0);
2247
2248 int ValInt = 1;
2249 if (ZeroSet.is_equal(bset2: DimHull)) {
2250 ValInt = 0;
2251 }
2252
2253 Int.push_back(x: ValInt);
2254 Transform = Transform.equate(type1: isl::dim::in, pos1: i, type2: isl::dim::out, pos2: i);
2255 }
2256
2257 isl::set MappedElements = isl::map(Transform).domain();
2258 if (!Elements.is_subset(set2: MappedElements))
2259 continue;
2260
2261 bool CanFold = true;
2262 if (Int[0] <= 1)
2263 CanFold = false;
2264
2265 unsigned NumDims = Array->getNumberOfDimensions();
2266 for (unsigned i = 1; i < NumDims - 1; i++)
2267 if (Int[0] != Int[i] && Int[i])
2268 CanFold = false;
2269
2270 if (!CanFold)
2271 continue;
2272
2273 for (auto &Access : scop->access_functions())
2274 if (Access->getScopArrayInfo() == Array)
2275 Access->setAccessRelation(
2276 Access->getAccessRelation().apply_range(map2: Transform));
2277
2278 std::vector<const SCEV *> Sizes;
2279 for (unsigned i = 0; i < NumDims; i++) {
2280 auto Size = Array->getDimensionSize(Dim: i);
2281
2282 if (i == NumDims - 1)
2283 Size = SE.getMulExpr(LHS: Size, RHS: SE.getConstant(Ty: Size->getType(), V: Int[0]));
2284 Sizes.push_back(x: Size);
2285 }
2286
2287 Array->updateSizes(Sizes, CheckConsistency: false /* CheckConsistency */);
2288 }
2289}
2290
2291void ScopBuilder::finalizeAccesses() {
2292 updateAccessDimensionality();
2293 foldSizeConstantsToRight();
2294 foldAccessRelations();
2295 assumeNoOutOfBounds();
2296}
2297
2298void ScopBuilder::updateAccessDimensionality() {
2299 // Check all array accesses for each base pointer and find a (virtual) element
2300 // size for the base pointer that divides all access functions.
2301 for (ScopStmt &Stmt : *scop)
2302 for (MemoryAccess *Access : Stmt) {
2303 if (!Access->isArrayKind())
2304 continue;
2305 ScopArrayInfo *Array =
2306 const_cast<ScopArrayInfo *>(Access->getScopArrayInfo());
2307
2308 if (Array->getNumberOfDimensions() != 1)
2309 continue;
2310 unsigned DivisibleSize = Array->getElemSizeInBytes();
2311 const SCEV *Subscript = Access->getSubscript(Dim: 0);
2312 while (!isDivisible(Expr: Subscript, Size: DivisibleSize, SE))
2313 DivisibleSize /= 2;
2314 auto *Ty = IntegerType::get(C&: SE.getContext(), NumBits: DivisibleSize * 8);
2315 Array->updateElementType(NewElementType: Ty);
2316 }
2317
2318 for (auto &Stmt : *scop)
2319 for (auto &Access : Stmt)
2320 Access->updateDimensionality();
2321}
2322
2323void ScopBuilder::foldAccessRelations() {
2324 for (auto &Stmt : *scop)
2325 for (auto &Access : Stmt)
2326 Access->foldAccessRelation();
2327}
2328
2329void ScopBuilder::assumeNoOutOfBounds() {
2330 if (PollyIgnoreInbounds)
2331 return;
2332 for (auto &Stmt : *scop)
2333 for (auto &Access : Stmt) {
2334 isl::set Outside = Access->assumeNoOutOfBound();
2335 const auto &Loc = Access->getAccessInstruction()
2336 ? Access->getAccessInstruction()->getDebugLoc()
2337 : DebugLoc();
2338 recordAssumption(RecordedAssumptions: &RecordedAssumptions, Kind: INBOUNDS, Set: Outside, Loc,
2339 Sign: AS_ASSUMPTION);
2340 }
2341}
2342
2343void ScopBuilder::ensureValueWrite(Instruction *Inst) {
2344 // Find the statement that defines the value of Inst. That statement has to
2345 // write the value to make it available to those statements that read it.
2346 ScopStmt *Stmt = scop->getStmtFor(Inst);
2347
2348 // It is possible that the value is synthesizable within a loop (such that it
2349 // is not part of any statement), but not after the loop (where you need the
2350 // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will
2351 // avoid this. In case the IR has no such PHI, use the last statement (where
2352 // the value is synthesizable) to write the value.
2353 if (!Stmt)
2354 Stmt = scop->getLastStmtFor(BB: Inst->getParent());
2355
2356 // Inst not defined within this SCoP.
2357 if (!Stmt)
2358 return;
2359
2360 // Do not process further if the instruction is already written.
2361 if (Stmt->lookupValueWriteOf(Inst))
2362 return;
2363
2364 addMemoryAccess(Stmt, Inst, AccType: MemoryAccess::MUST_WRITE, BaseAddress: Inst, ElementType: Inst->getType(),
2365 Affine: true, AccessValue: Inst, Subscripts: ArrayRef<const SCEV *>(),
2366 Sizes: ArrayRef<const SCEV *>(), Kind: MemoryKind::Value);
2367}
2368
2369void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) {
2370 // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality
2371 // to be able to replace this one. Currently, there is a split responsibility.
2372 // In a first step, the MemoryAccess is created, but without the
2373 // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the
2374 // AccessRelation is created. At least for scalar accesses, there is no new
2375 // information available at ScopStmt::buildAccessRelations(), so we could
2376 // create the AccessRelation right away. This is what
2377 // ScopStmt::ensureValueRead(Value*) does.
2378
2379 auto *Scope = UserStmt->getSurroundingLoop();
2380 auto VUse = VirtualUse::create(S: scop.get(), UserStmt, UserScope: Scope, Val: V, Virtual: false);
2381 switch (VUse.getKind()) {
2382 case VirtualUse::Constant:
2383 case VirtualUse::Block:
2384 case VirtualUse::Synthesizable:
2385 case VirtualUse::Hoisted:
2386 case VirtualUse::Intra:
2387 // Uses of these kinds do not need a MemoryAccess.
2388 break;
2389
2390 case VirtualUse::ReadOnly:
2391 // Add MemoryAccess for invariant values only if requested.
2392 if (!ModelReadOnlyScalars)
2393 break;
2394
2395 [[fallthrough]];
2396 case VirtualUse::Inter:
2397
2398 // Do not create another MemoryAccess for reloading the value if one already
2399 // exists.
2400 if (UserStmt->lookupValueReadOf(Inst: V))
2401 break;
2402
2403 addMemoryAccess(Stmt: UserStmt, Inst: nullptr, AccType: MemoryAccess::READ, BaseAddress: V, ElementType: V->getType(),
2404 Affine: true, AccessValue: V, Subscripts: ArrayRef<const SCEV *>(), Sizes: ArrayRef<const SCEV *>(),
2405 Kind: MemoryKind::Value);
2406
2407 // Inter-statement uses need to write the value in their defining statement.
2408 if (VUse.isInter())
2409 ensureValueWrite(Inst: cast<Instruction>(Val: V));
2410 break;
2411 }
2412}
2413
2414void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt,
2415 BasicBlock *IncomingBlock,
2416 Value *IncomingValue, bool IsExitBlock) {
2417 // As the incoming block might turn out to be an error statement ensure we
2418 // will create an exit PHI SAI object. It is needed during code generation
2419 // and would be created later anyway.
2420 if (IsExitBlock)
2421 scop->getOrCreateScopArrayInfo(BasePtr: PHI, ElementType: PHI->getType(), Sizes: {},
2422 Kind: MemoryKind::ExitPHI);
2423
2424 // This is possible if PHI is in the SCoP's entry block. The incoming blocks
2425 // from outside the SCoP's region have no statement representation.
2426 if (!IncomingStmt)
2427 return;
2428
2429 // Take care for the incoming value being available in the incoming block.
2430 // This must be done before the check for multiple PHI writes because multiple
2431 // exiting edges from subregion each can be the effective written value of the
2432 // subregion. As such, all of them must be made available in the subregion
2433 // statement.
2434 ensureValueRead(V: IncomingValue, UserStmt: IncomingStmt);
2435
2436 // Do not add more than one MemoryAccess per PHINode and ScopStmt.
2437 if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) {
2438 assert(Acc->getAccessInstruction() == PHI);
2439 Acc->addIncoming(IncomingBlock, IncomingValue);
2440 return;
2441 }
2442
2443 MemoryAccess *Acc = addMemoryAccess(
2444 Stmt: IncomingStmt, Inst: PHI, AccType: MemoryAccess::MUST_WRITE, BaseAddress: PHI, ElementType: PHI->getType(), Affine: true,
2445 AccessValue: PHI, Subscripts: ArrayRef<const SCEV *>(), Sizes: ArrayRef<const SCEV *>(),
2446 Kind: IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI);
2447 assert(Acc);
2448 Acc->addIncoming(IncomingBlock, IncomingValue);
2449}
2450
2451void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) {
2452 addMemoryAccess(Stmt: PHIStmt, Inst: PHI, AccType: MemoryAccess::READ, BaseAddress: PHI, ElementType: PHI->getType(), Affine: true,
2453 AccessValue: PHI, Subscripts: ArrayRef<const SCEV *>(), Sizes: ArrayRef<const SCEV *>(),
2454 Kind: MemoryKind::PHI);
2455}
2456
2457void ScopBuilder::buildDomain(ScopStmt &Stmt) {
2458 isl::id Id = isl::id::alloc(ctx: scop->getIslCtx(), name: Stmt.getBaseName(), user: &Stmt);
2459
2460 Stmt.Domain = scop->getDomainConditions(Stmt: &Stmt);
2461 Stmt.Domain = Stmt.Domain.set_tuple_id(Id);
2462}
2463
2464void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) {
2465 isl::set Domain = Stmt.getDomain();
2466 BasicBlock *BB = Stmt.getEntryBlock();
2467
2468 Loop *L = LI.getLoopFor(BB);
2469
2470 while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L))
2471 L = L->getParentLoop();
2472
2473 SmallVector<llvm::Loop *, 8> Loops;
2474
2475 while (L && Stmt.getParent()->getRegion().contains(L)) {
2476 Loops.push_back(Elt: L);
2477 L = L->getParentLoop();
2478 }
2479
2480 Stmt.NestLoops.insert(I: Stmt.NestLoops.begin(), From: Loops.rbegin(), To: Loops.rend());
2481}
2482
2483/// Return the reduction type for a given binary operator.
2484static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp,
2485 const Instruction *Load) {
2486 if (!BinOp)
2487 return MemoryAccess::RT_NONE;
2488 switch (BinOp->getOpcode()) {
2489 case Instruction::FAdd:
2490 if (!BinOp->isFast())
2491 return MemoryAccess::RT_NONE;
2492 [[fallthrough]];
2493 case Instruction::Add:
2494 return MemoryAccess::RT_ADD;
2495 case Instruction::Or:
2496 return MemoryAccess::RT_BOR;
2497 case Instruction::Xor:
2498 return MemoryAccess::RT_BXOR;
2499 case Instruction::And:
2500 return MemoryAccess::RT_BAND;
2501 case Instruction::FMul:
2502 if (!BinOp->isFast())
2503 return MemoryAccess::RT_NONE;
2504 [[fallthrough]];
2505 case Instruction::Mul:
2506 if (DisableMultiplicativeReductions)
2507 return MemoryAccess::RT_NONE;
2508 return MemoryAccess::RT_MUL;
2509 default:
2510 return MemoryAccess::RT_NONE;
2511 }
2512}
2513
2514/// True if @p AllAccs intersects with @p MemAccs execpt @p LoadMA and @p
2515/// StoreMA
2516bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
2517 MemoryAccess *StoreMA, isl::set Domain,
2518 SmallVector<MemoryAccess *, 8> &MemAccs) {
2519 bool HasIntersectingAccs = false;
2520 auto AllAccsNoParams = AllAccs.project_out_all_params();
2521
2522 for (MemoryAccess *MA : MemAccs) {
2523 if (MA == LoadMA || MA == StoreMA)
2524 continue;
2525 auto AccRel = MA->getAccessRelation().intersect_domain(set: Domain);
2526 auto Accs = AccRel.range();
2527 auto AccsNoParams = Accs.project_out_all_params();
2528
2529 bool CompatibleSpace = AllAccsNoParams.has_equal_space(set2: AccsNoParams);
2530
2531 if (CompatibleSpace) {
2532 auto OverlapAccs = Accs.intersect(set2: AllAccs);
2533 bool DoesIntersect = !OverlapAccs.is_empty();
2534 HasIntersectingAccs |= DoesIntersect;
2535 }
2536 }
2537 return HasIntersectingAccs;
2538}
2539
2540/// Test if the accesses of @p LoadMA and @p StoreMA can form a reduction
2541bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA,
2542 isl::set Domain,
2543 SmallVector<MemoryAccess *, 8> &MemAccs) {
2544 // First check if the base value is the same.
2545 isl::map LoadAccs = LoadMA->getAccessRelation();
2546 isl::map StoreAccs = StoreMA->getAccessRelation();
2547 bool Valid = LoadAccs.has_equal_space(map2: StoreAccs);
2548 POLLY_DEBUG(dbgs() << " == The accessed space below is "
2549 << (Valid ? "" : "not ") << "equal!\n");
2550 POLLY_DEBUG(LoadMA->dump(); StoreMA->dump());
2551
2552 if (Valid) {
2553 // Then check if they actually access the same memory.
2554 isl::map R = isl::manage(ptr: LoadAccs.copy())
2555 .intersect_domain(set: isl::manage(ptr: Domain.copy()));
2556 isl::map W = isl::manage(ptr: StoreAccs.copy())
2557 .intersect_domain(set: isl::manage(ptr: Domain.copy()));
2558 isl::set RS = R.range();
2559 isl::set WS = W.range();
2560
2561 isl::set InterAccs =
2562 isl::manage(ptr: RS.copy()).intersect(set2: isl::manage(ptr: WS.copy()));
2563 Valid = !InterAccs.is_empty();
2564 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
2565 << "overlapping!\n");
2566 }
2567
2568 if (Valid) {
2569 // Finally, check if they are no other instructions accessing this memory
2570 isl::map AllAccsRel = LoadAccs.unite(map2: StoreAccs);
2571 AllAccsRel = AllAccsRel.intersect_domain(set: Domain);
2572 isl::set AllAccs = AllAccsRel.range();
2573 Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
2574
2575 POLLY_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
2576 << "accessed by other instructions!\n");
2577 }
2578 return Valid;
2579}
2580
2581void ScopBuilder::checkForReductions(ScopStmt &Stmt) {
2582 SmallVector<MemoryAccess *, 2> Loads;
2583 SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
2584
2585 // First collect candidate load-store reduction chains by iterating over all
2586 // stores and collecting possible reduction loads.
2587 for (MemoryAccess *StoreMA : Stmt) {
2588 if (StoreMA->isRead())
2589 continue;
2590
2591 Loads.clear();
2592 collectCandidateReductionLoads(StoreMA, Loads);
2593 for (MemoryAccess *LoadMA : Loads)
2594 Candidates.push_back(Elt: std::make_pair(x&: LoadMA, y&: StoreMA));
2595 }
2596
2597 // Then check each possible candidate pair.
2598 for (const auto &CandidatePair : Candidates) {
2599 MemoryAccess *LoadMA = CandidatePair.first;
2600 MemoryAccess *StoreMA = CandidatePair.second;
2601 bool Valid = checkCandidatePairAccesses(LoadMA, StoreMA, Domain: Stmt.getDomain(),
2602 MemAccs&: Stmt.MemAccs);
2603 if (!Valid)
2604 continue;
2605
2606 const LoadInst *Load =
2607 dyn_cast<const LoadInst>(Val: CandidatePair.first->getAccessInstruction());
2608 MemoryAccess::ReductionType RT =
2609 getReductionType(BinOp: dyn_cast<BinaryOperator>(Val: Load->user_back()), Load);
2610
2611 // If no overlapping access was found we mark the load and store as
2612 // reduction like.
2613 LoadMA->markAsReductionLike(RT);
2614 StoreMA->markAsReductionLike(RT);
2615 }
2616}
2617
2618void ScopBuilder::verifyInvariantLoads() {
2619 auto &RIL = scop->getRequiredInvariantLoads();
2620 for (LoadInst *LI : RIL) {
2621 assert(LI && scop->contains(LI));
2622 // If there exists a statement in the scop which has a memory access for
2623 // @p LI, then mark this scop as infeasible for optimization.
2624 for (ScopStmt &Stmt : *scop)
2625 if (Stmt.getArrayAccessOrNULLFor(Inst: LI)) {
2626 scop->invalidate(Kind: INVARIANTLOAD, Loc: LI->getDebugLoc(), BB: LI->getParent());
2627 return;
2628 }
2629 }
2630}
2631
2632void ScopBuilder::hoistInvariantLoads() {
2633 if (!PollyInvariantLoadHoisting)
2634 return;
2635
2636 isl::union_map Writes = scop->getWrites();
2637 for (ScopStmt &Stmt : *scop) {
2638 InvariantAccessesTy InvariantAccesses;
2639
2640 for (MemoryAccess *Access : Stmt) {
2641 isl::set NHCtx = getNonHoistableCtx(Access, Writes);
2642 if (!NHCtx.is_null())
2643 InvariantAccesses.push_back(Elt: {.MA: Access, .NonHoistableCtx: NHCtx});
2644 }
2645
2646 // Transfer the memory access from the statement to the SCoP.
2647 for (auto InvMA : InvariantAccesses)
2648 Stmt.removeMemoryAccess(MA: InvMA.MA);
2649 addInvariantLoads(Stmt, InvMAs&: InvariantAccesses);
2650 }
2651}
2652
2653/// Check if an access range is too complex.
2654///
2655/// An access range is too complex, if it contains either many disjuncts or
2656/// very complex expressions. As a simple heuristic, we assume if a set to
2657/// be too complex if the sum of existentially quantified dimensions and
2658/// set dimensions is larger than a threshold. This reliably detects both
2659/// sets with many disjuncts as well as sets with many divisions as they
2660/// arise in h264.
2661///
2662/// @param AccessRange The range to check for complexity.
2663///
2664/// @returns True if the access range is too complex.
2665static bool isAccessRangeTooComplex(isl::set AccessRange) {
2666 unsigned NumTotalDims = 0;
2667
2668 for (isl::basic_set BSet : AccessRange.get_basic_set_list()) {
2669 NumTotalDims += unsignedFromIslSize(Size: BSet.dim(type: isl::dim::div));
2670 NumTotalDims += unsignedFromIslSize(Size: BSet.dim(type: isl::dim::set));
2671 }
2672
2673 if (NumTotalDims > MaxDimensionsInAccessRange)
2674 return true;
2675
2676 return false;
2677}
2678
2679bool ScopBuilder::hasNonHoistableBasePtrInScop(MemoryAccess *MA,
2680 isl::union_map Writes) {
2681 if (auto *BasePtrMA = scop->lookupBasePtrAccess(MA)) {
2682 return getNonHoistableCtx(Access: BasePtrMA, Writes).is_null();
2683 }
2684
2685 Value *BaseAddr = MA->getOriginalBaseAddr();
2686 if (auto *BasePtrInst = dyn_cast<Instruction>(Val: BaseAddr))
2687 if (!isa<LoadInst>(Val: BasePtrInst))
2688 return scop->contains(I: BasePtrInst);
2689
2690 return false;
2691}
2692
2693void ScopBuilder::addUserContext() {
2694 if (UserContextStr.empty())
2695 return;
2696
2697 isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str());
2698 isl::space Space = scop->getParamSpace();
2699 isl::size SpaceParams = Space.dim(type: isl::dim::param);
2700 if (unsignedFromIslSize(Size: SpaceParams) !=
2701 unsignedFromIslSize(Size: UserContext.dim(type: isl::dim::param))) {
2702 std::string SpaceStr = stringFromIslObj(Obj: Space, DefaultValue: "null");
2703 errs() << "Error: the context provided in -polly-context has not the same "
2704 << "number of dimensions than the computed context. Due to this "
2705 << "mismatch, the -polly-context option is ignored. Please provide "
2706 << "the context in the parameter space: " << SpaceStr << ".\n";
2707 return;
2708 }
2709
2710 for (auto i : rangeIslSize(Begin: 0, End: SpaceParams)) {
2711 std::string NameContext =
2712 scop->getContext().get_dim_name(type: isl::dim::param, pos: i);
2713 std::string NameUserContext = UserContext.get_dim_name(type: isl::dim::param, pos: i);
2714
2715 if (NameContext != NameUserContext) {
2716 std::string SpaceStr = stringFromIslObj(Obj: Space, DefaultValue: "null");
2717 errs() << "Error: the name of dimension " << i
2718 << " provided in -polly-context "
2719 << "is '" << NameUserContext << "', but the name in the computed "
2720 << "context is '" << NameContext
2721 << "'. Due to this name mismatch, "
2722 << "the -polly-context option is ignored. Please provide "
2723 << "the context in the parameter space: " << SpaceStr << ".\n";
2724 return;
2725 }
2726
2727 UserContext = UserContext.set_dim_id(type: isl::dim::param, pos: i,
2728 id: Space.get_dim_id(type: isl::dim::param, pos: i));
2729 }
2730 isl::set newContext = scop->getContext().intersect(set2: UserContext);
2731 scop->setContext(newContext);
2732}
2733
2734isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access,
2735 isl::union_map Writes) {
2736 // TODO: Loads that are not loop carried, hence are in a statement with
2737 // zero iterators, are by construction invariant, though we
2738 // currently "hoist" them anyway. This is necessary because we allow
2739 // them to be treated as parameters (e.g., in conditions) and our code
2740 // generation would otherwise use the old value.
2741
2742 auto &Stmt = *Access->getStatement();
2743 BasicBlock *BB = Stmt.getEntryBlock();
2744
2745 if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() ||
2746 Access->isMemoryIntrinsic())
2747 return {};
2748
2749 // Skip accesses that have an invariant base pointer which is defined but
2750 // not loaded inside the SCoP. This can happened e.g., if a readnone call
2751 // returns a pointer that is used as a base address. However, as we want
2752 // to hoist indirect pointers, we allow the base pointer to be defined in
2753 // the region if it is also a memory access. Each ScopArrayInfo object
2754 // that has a base pointer origin has a base pointer that is loaded and
2755 // that it is invariant, thus it will be hoisted too. However, if there is
2756 // no base pointer origin we check that the base pointer is defined
2757 // outside the region.
2758 auto *LI = cast<LoadInst>(Val: Access->getAccessInstruction());
2759 if (hasNonHoistableBasePtrInScop(MA: Access, Writes))
2760 return {};
2761
2762 isl::map AccessRelation = Access->getAccessRelation();
2763 assert(!AccessRelation.is_empty());
2764
2765 if (AccessRelation.involves_dims(type: isl::dim::in, first: 0, n: Stmt.getNumIterators()))
2766 return {};
2767
2768 AccessRelation = AccessRelation.intersect_domain(set: Stmt.getDomain());
2769 isl::set SafeToLoad;
2770
2771 auto &DL = scop->getFunction().getParent()->getDataLayout();
2772 if (isSafeToLoadUnconditionally(V: LI->getPointerOperand(), Ty: LI->getType(),
2773 Alignment: LI->getAlign(), DL)) {
2774 SafeToLoad = isl::set::universe(space: AccessRelation.get_space().range());
2775 } else if (BB != LI->getParent()) {
2776 // Skip accesses in non-affine subregions as they might not be executed
2777 // under the same condition as the entry of the non-affine subregion.
2778 return {};
2779 } else {
2780 SafeToLoad = AccessRelation.range();
2781 }
2782
2783 if (isAccessRangeTooComplex(AccessRange: AccessRelation.range()))
2784 return {};
2785
2786 isl::union_map Written = Writes.intersect_range(uset: SafeToLoad);
2787 isl::set WrittenCtx = Written.params();
2788 bool IsWritten = !WrittenCtx.is_empty();
2789
2790 if (!IsWritten)
2791 return WrittenCtx;
2792
2793 WrittenCtx = WrittenCtx.remove_divs();
2794 bool TooComplex =
2795 unsignedFromIslSize(Size: WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain;
2796 if (TooComplex || !isRequiredInvariantLoad(LI))
2797 return {};
2798
2799 scop->addAssumption(Kind: INVARIANTLOAD, Set: WrittenCtx, Loc: LI->getDebugLoc(),
2800 Sign: AS_RESTRICTION, BB: LI->getParent());
2801 return WrittenCtx;
2802}
2803
2804static bool isAParameter(llvm::Value *maybeParam, const Function &F) {
2805 for (const llvm::Argument &Arg : F.args())
2806 if (&Arg == maybeParam)
2807 return true;
2808
2809 return false;
2810}
2811
2812bool ScopBuilder::canAlwaysBeHoisted(MemoryAccess *MA,
2813 bool StmtInvalidCtxIsEmpty,
2814 bool MAInvalidCtxIsEmpty,
2815 bool NonHoistableCtxIsEmpty) {
2816 LoadInst *LInst = cast<LoadInst>(Val: MA->getAccessInstruction());
2817 const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout();
2818 if (PollyAllowDereferenceOfAllFunctionParams &&
2819 isAParameter(maybeParam: LInst->getPointerOperand(), F: scop->getFunction()))
2820 return true;
2821
2822 // TODO: We can provide more information for better but more expensive
2823 // results.
2824 if (!isDereferenceableAndAlignedPointer(
2825 V: LInst->getPointerOperand(), Ty: LInst->getType(), Alignment: LInst->getAlign(), DL))
2826 return false;
2827
2828 // If the location might be overwritten we do not hoist it unconditionally.
2829 //
2830 // TODO: This is probably too conservative.
2831 if (!NonHoistableCtxIsEmpty)
2832 return false;
2833
2834 // If a dereferenceable load is in a statement that is modeled precisely we
2835 // can hoist it.
2836 if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty)
2837 return true;
2838
2839 // Even if the statement is not modeled precisely we can hoist the load if it
2840 // does not involve any parameters that might have been specialized by the
2841 // statement domain.
2842 for (const SCEV *Subscript : MA->subscripts())
2843 if (!isa<SCEVConstant>(Val: Subscript))
2844 return false;
2845 return true;
2846}
2847
2848void ScopBuilder::addInvariantLoads(ScopStmt &Stmt,
2849 InvariantAccessesTy &InvMAs) {
2850 if (InvMAs.empty())
2851 return;
2852
2853 isl::set StmtInvalidCtx = Stmt.getInvalidContext();
2854 bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty();
2855
2856 // Get the context under which the statement is executed but remove the error
2857 // context under which this statement is reached.
2858 isl::set DomainCtx = Stmt.getDomain().params();
2859 DomainCtx = DomainCtx.subtract(set2: StmtInvalidCtx);
2860
2861 if (unsignedFromIslSize(Size: DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) {
2862 auto *AccInst = InvMAs.front().MA->getAccessInstruction();
2863 scop->invalidate(Kind: COMPLEXITY, Loc: AccInst->getDebugLoc(), BB: AccInst->getParent());
2864 return;
2865 }
2866
2867 // Project out all parameters that relate to loads in the statement. Otherwise
2868 // we could have cyclic dependences on the constraints under which the
2869 // hoisted loads are executed and we could not determine an order in which to
2870 // pre-load them. This happens because not only lower bounds are part of the
2871 // domain but also upper bounds.
2872 for (auto &InvMA : InvMAs) {
2873 auto *MA = InvMA.MA;
2874 Instruction *AccInst = MA->getAccessInstruction();
2875 if (SE.isSCEVable(Ty: AccInst->getType())) {
2876 SetVector<Value *> Values;
2877 for (const SCEV *Parameter : scop->parameters()) {
2878 Values.clear();
2879 findValues(Expr: Parameter, SE, Values);
2880 if (!Values.count(key: AccInst))
2881 continue;
2882
2883 isl::id ParamId = scop->getIdForParam(Parameter);
2884 if (!ParamId.is_null()) {
2885 int Dim = DomainCtx.find_dim_by_id(type: isl::dim::param, id: ParamId);
2886 if (Dim >= 0)
2887 DomainCtx = DomainCtx.eliminate(type: isl::dim::param, first: Dim, n: 1);
2888 }
2889 }
2890 }
2891 }
2892
2893 for (auto &InvMA : InvMAs) {
2894 auto *MA = InvMA.MA;
2895 isl::set NHCtx = InvMA.NonHoistableCtx;
2896
2897 // Check for another invariant access that accesses the same location as
2898 // MA and if found consolidate them. Otherwise create a new equivalence
2899 // class at the end of InvariantEquivClasses.
2900 LoadInst *LInst = cast<LoadInst>(Val: MA->getAccessInstruction());
2901 Type *Ty = LInst->getType();
2902 const SCEV *PointerSCEV = SE.getSCEV(V: LInst->getPointerOperand());
2903
2904 isl::set MAInvalidCtx = MA->getInvalidContext();
2905 bool NonHoistableCtxIsEmpty = NHCtx.is_empty();
2906 bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty();
2907
2908 isl::set MACtx;
2909 // Check if we know that this pointer can be speculatively accessed.
2910 if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty,
2911 NonHoistableCtxIsEmpty)) {
2912 MACtx = isl::set::universe(space: DomainCtx.get_space());
2913 } else {
2914 MACtx = DomainCtx;
2915 MACtx = MACtx.subtract(set2: MAInvalidCtx.unite(set2: NHCtx));
2916 MACtx = MACtx.gist_params(context: scop->getContext());
2917 }
2918
2919 bool Consolidated = false;
2920 for (auto &IAClass : scop->invariantEquivClasses()) {
2921 if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType)
2922 continue;
2923
2924 // If the pointer and the type is equal check if the access function wrt.
2925 // to the domain is equal too. It can happen that the domain fixes
2926 // parameter values and these can be different for distinct part of the
2927 // SCoP. If this happens we cannot consolidate the loads but need to
2928 // create a new invariant load equivalence class.
2929 auto &MAs = IAClass.InvariantAccesses;
2930 if (!MAs.empty()) {
2931 auto *LastMA = MAs.front();
2932
2933 isl::set AR = MA->getAccessRelation().range();
2934 isl::set LastAR = LastMA->getAccessRelation().range();
2935 bool SameAR = AR.is_equal(set2: LastAR);
2936
2937 if (!SameAR)
2938 continue;
2939 }
2940
2941 // Add MA to the list of accesses that are in this class.
2942 MAs.push_front(val: MA);
2943
2944 Consolidated = true;
2945
2946 // Unify the execution context of the class and this statement.
2947 isl::set IAClassDomainCtx = IAClass.ExecutionContext;
2948 if (!IAClassDomainCtx.is_null())
2949 IAClassDomainCtx = IAClassDomainCtx.unite(set2: MACtx).coalesce();
2950 else
2951 IAClassDomainCtx = MACtx;
2952 IAClass.ExecutionContext = IAClassDomainCtx;
2953 break;
2954 }
2955
2956 if (Consolidated)
2957 continue;
2958
2959 MACtx = MACtx.coalesce();
2960
2961 // If we did not consolidate MA, thus did not find an equivalence class
2962 // for it, we create a new one.
2963 scop->addInvariantEquivClass(
2964 InvariantEquivClass: InvariantEquivClassTy{.IdentifyingPointer: PointerSCEV, .InvariantAccesses: MemoryAccessList{MA}, .ExecutionContext: MACtx, .AccessType: Ty});
2965 }
2966}
2967
2968void ScopBuilder::collectCandidateReductionLoads(
2969 MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
2970 ScopStmt *Stmt = StoreMA->getStatement();
2971
2972 auto *Store = dyn_cast<StoreInst>(Val: StoreMA->getAccessInstruction());
2973 if (!Store)
2974 return;
2975
2976 // Skip if there is not one binary operator between the load and the store
2977 auto *BinOp = dyn_cast<BinaryOperator>(Val: Store->getValueOperand());
2978 if (!BinOp)
2979 return;
2980
2981 // Skip if the binary operators has multiple uses
2982 if (BinOp->getNumUses() != 1)
2983 return;
2984
2985 // Skip if the opcode of the binary operator is not commutative/associative
2986 if (!BinOp->isCommutative() || !BinOp->isAssociative())
2987 return;
2988
2989 // Skip if the binary operator is outside the current SCoP
2990 if (BinOp->getParent() != Store->getParent())
2991 return;
2992
2993 // Skip if it is a multiplicative reduction and we disabled them
2994 if (DisableMultiplicativeReductions &&
2995 (BinOp->getOpcode() == Instruction::Mul ||
2996 BinOp->getOpcode() == Instruction::FMul))
2997 return;
2998
2999 // Check the binary operator operands for a candidate load
3000 auto *PossibleLoad0 = dyn_cast<LoadInst>(Val: BinOp->getOperand(i_nocapture: 0));
3001 auto *PossibleLoad1 = dyn_cast<LoadInst>(Val: BinOp->getOperand(i_nocapture: 1));
3002 if (!PossibleLoad0 && !PossibleLoad1)
3003 return;
3004
3005 // A load is only a candidate if it cannot escape (thus has only this use)
3006 if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
3007 if (PossibleLoad0->getParent() == Store->getParent())
3008 Loads.push_back(Elt: &Stmt->getArrayAccessFor(Inst: PossibleLoad0));
3009 if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
3010 if (PossibleLoad1->getParent() == Store->getParent())
3011 Loads.push_back(Elt: &Stmt->getArrayAccessFor(Inst: PossibleLoad1));
3012}
3013
3014/// Find the canonical scop array info object for a set of invariant load
3015/// hoisted loads. The canonical array is the one that corresponds to the
3016/// first load in the list of accesses which is used as base pointer of a
3017/// scop array.
3018static const ScopArrayInfo *findCanonicalArray(Scop &S,
3019 MemoryAccessList &Accesses) {
3020 for (MemoryAccess *Access : Accesses) {
3021 const ScopArrayInfo *CanonicalArray = S.getScopArrayInfoOrNull(
3022 BasePtr: Access->getAccessInstruction(), Kind: MemoryKind::Array);
3023 if (CanonicalArray)
3024 return CanonicalArray;
3025 }
3026 return nullptr;
3027}
3028
3029/// Check if @p Array severs as base array in an invariant load.
3030static bool isUsedForIndirectHoistedLoad(Scop &S, const ScopArrayInfo *Array) {
3031 for (InvariantEquivClassTy &EqClass2 : S.getInvariantAccesses())
3032 for (MemoryAccess *Access2 : EqClass2.InvariantAccesses)
3033 if (Access2->getScopArrayInfo() == Array)
3034 return true;
3035 return false;
3036}
3037
3038/// Replace the base pointer arrays in all memory accesses referencing @p Old,
3039/// with a reference to @p New.
3040static void replaceBasePtrArrays(Scop &S, const ScopArrayInfo *Old,
3041 const ScopArrayInfo *New) {
3042 for (ScopStmt &Stmt : S)
3043 for (MemoryAccess *Access : Stmt) {
3044 if (Access->getLatestScopArrayInfo() != Old)
3045 continue;
3046
3047 isl::id Id = New->getBasePtrId();
3048 isl::map Map = Access->getAccessRelation();
3049 Map = Map.set_tuple_id(type: isl::dim::out, id: Id);
3050 Access->setAccessRelation(Map);
3051 }
3052}
3053
3054void ScopBuilder::canonicalizeDynamicBasePtrs() {
3055 for (InvariantEquivClassTy &EqClass : scop->InvariantEquivClasses) {
3056 MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses;
3057
3058 const ScopArrayInfo *CanonicalBasePtrSAI =
3059 findCanonicalArray(S&: *scop, Accesses&: BasePtrAccesses);
3060
3061 if (!CanonicalBasePtrSAI)
3062 continue;
3063
3064 for (MemoryAccess *BasePtrAccess : BasePtrAccesses) {
3065 const ScopArrayInfo *BasePtrSAI = scop->getScopArrayInfoOrNull(
3066 BasePtr: BasePtrAccess->getAccessInstruction(), Kind: MemoryKind::Array);
3067 if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI ||
3068 !BasePtrSAI->isCompatibleWith(Array: CanonicalBasePtrSAI))
3069 continue;
3070
3071 // we currently do not canonicalize arrays where some accesses are
3072 // hoisted as invariant loads. If we would, we need to update the access
3073 // function of the invariant loads as well. However, as this is not a
3074 // very common situation, we leave this for now to avoid further
3075 // complexity increases.
3076 if (isUsedForIndirectHoistedLoad(S&: *scop, Array: BasePtrSAI))
3077 continue;
3078
3079 replaceBasePtrArrays(S&: *scop, Old: BasePtrSAI, New: CanonicalBasePtrSAI);
3080 }
3081 }
3082}
3083
3084void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) {
3085 for (MemoryAccess *Access : Stmt.MemAccs) {
3086 Type *ElementType = Access->getElementType();
3087
3088 MemoryKind Ty;
3089 if (Access->isPHIKind())
3090 Ty = MemoryKind::PHI;
3091 else if (Access->isExitPHIKind())
3092 Ty = MemoryKind::ExitPHI;
3093 else if (Access->isValueKind())
3094 Ty = MemoryKind::Value;
3095 else
3096 Ty = MemoryKind::Array;
3097
3098 // Create isl::pw_aff for SCEVs which describe sizes. Collect all
3099 // assumptions which are taken. isl::pw_aff objects are cached internally
3100 // and they are used later by scop.
3101 for (const SCEV *Size : Access->Sizes) {
3102 if (!Size)
3103 continue;
3104 scop->getPwAff(E: Size, BB: nullptr, NonNegative: false, RecordedAssumptions: &RecordedAssumptions);
3105 }
3106 auto *SAI = scop->getOrCreateScopArrayInfo(BasePtr: Access->getOriginalBaseAddr(),
3107 ElementType, Sizes: Access->Sizes, Kind: Ty);
3108
3109 // Create isl::pw_aff for SCEVs which describe subscripts. Collect all
3110 // assumptions which are taken. isl::pw_aff objects are cached internally
3111 // and they are used later by scop.
3112 for (const SCEV *Subscript : Access->subscripts()) {
3113 if (!Access->isAffine() || !Subscript)
3114 continue;
3115 scop->getPwAff(E: Subscript, BB: Stmt.getEntryBlock(), NonNegative: false,
3116 RecordedAssumptions: &RecordedAssumptions);
3117 }
3118 Access->buildAccessRelation(SAI);
3119 scop->addAccessData(Access);
3120 }
3121}
3122
3123/// Add the minimal/maximal access in @p Set to @p User.
3124///
3125/// @return True if more accesses should be added, false if we reached the
3126/// maximal number of run-time checks to be generated.
3127static bool buildMinMaxAccess(isl::set Set,
3128 Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) {
3129 isl::pw_multi_aff MinPMA, MaxPMA;
3130 isl::pw_aff LastDimAff;
3131 isl::aff OneAff;
3132 unsigned Pos;
3133
3134 Set = Set.remove_divs();
3135 polly::simplify(Set);
3136
3137 if (unsignedFromIslSize(Size: Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts)
3138 Set = Set.simple_hull();
3139
3140 // Restrict the number of parameters involved in the access as the lexmin/
3141 // lexmax computation will take too long if this number is high.
3142 //
3143 // Experiments with a simple test case using an i7 4800MQ:
3144 //
3145 // #Parameters involved | Time (in sec)
3146 // 6 | 0.01
3147 // 7 | 0.04
3148 // 8 | 0.12
3149 // 9 | 0.40
3150 // 10 | 1.54
3151 // 11 | 6.78
3152 // 12 | 30.38
3153 //
3154 if (isl_set_n_param(set: Set.get()) >
3155 static_cast<isl_size>(RunTimeChecksMaxParameters)) {
3156 unsigned InvolvedParams = 0;
3157 for (unsigned u = 0, e = isl_set_n_param(set: Set.get()); u < e; u++)
3158 if (Set.involves_dims(type: isl::dim::param, first: u, n: 1))
3159 InvolvedParams++;
3160
3161 if (InvolvedParams > RunTimeChecksMaxParameters)
3162 return false;
3163 }
3164
3165 MinPMA = Set.lexmin_pw_multi_aff();
3166 MaxPMA = Set.lexmax_pw_multi_aff();
3167
3168 MinPMA = MinPMA.coalesce();
3169 MaxPMA = MaxPMA.coalesce();
3170
3171 if (MaxPMA.is_null())
3172 return false;
3173
3174 unsigned MaxOutputSize = unsignedFromIslSize(Size: MaxPMA.dim(type: isl::dim::out));
3175
3176 // Adjust the last dimension of the maximal access by one as we want to
3177 // enclose the accessed memory region by MinPMA and MaxPMA. The pointer
3178 // we test during code generation might now point after the end of the
3179 // allocated array but we will never dereference it anyway.
3180 assert(MaxOutputSize >= 1 && "Assumed at least one output dimension");
3181
3182 Pos = MaxOutputSize - 1;
3183 LastDimAff = MaxPMA.at(pos: Pos);
3184 OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space()));
3185 OneAff = OneAff.add_constant_si(v: 1);
3186 LastDimAff = LastDimAff.add(pwaff2: OneAff);
3187 MaxPMA = MaxPMA.set_pw_aff(pos: Pos, pa: LastDimAff);
3188
3189 if (MinPMA.is_null() || MaxPMA.is_null())
3190 return false;
3191
3192 MinMaxAccesses.push_back(Elt: std::make_pair(x&: MinPMA, y&: MaxPMA));
3193
3194 return true;
3195}
3196
3197/// Wrapper function to calculate minimal/maximal accesses to each array.
3198bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup,
3199 Scop::MinMaxVectorTy &MinMaxAccesses) {
3200 MinMaxAccesses.reserve(N: AliasGroup.size());
3201
3202 isl::union_set Domains = scop->getDomains();
3203 isl::union_map Accesses = isl::union_map::empty(ctx: scop->getIslCtx());
3204
3205 for (MemoryAccess *MA : AliasGroup)
3206 Accesses = Accesses.unite(umap2: MA->getAccessRelation());
3207
3208 Accesses = Accesses.intersect_domain(uset: Domains);
3209 isl::union_set Locations = Accesses.range();
3210
3211 bool LimitReached = false;
3212 for (isl::set Set : Locations.get_set_list()) {
3213 LimitReached |= !buildMinMaxAccess(Set, MinMaxAccesses, S&: *scop);
3214 if (LimitReached)
3215 break;
3216 }
3217
3218 return !LimitReached;
3219}
3220
3221static isl::set getAccessDomain(MemoryAccess *MA) {
3222 isl::set Domain = MA->getStatement()->getDomain();
3223 Domain = Domain.project_out(type: isl::dim::set, first: 0,
3224 n: unsignedFromIslSize(Size: Domain.tuple_dim()));
3225 return Domain.reset_tuple_id();
3226}
3227
3228bool ScopBuilder::buildAliasChecks() {
3229 if (!PollyUseRuntimeAliasChecks)
3230 return true;
3231
3232 if (buildAliasGroups()) {
3233 // Aliasing assumptions do not go through addAssumption but we still want to
3234 // collect statistics so we do it here explicitly.
3235 if (scop->getAliasGroups().size())
3236 Scop::incrementNumberOfAliasingAssumptions(Step: 1);
3237 return true;
3238 }
3239
3240 // If a problem occurs while building the alias groups we need to delete
3241 // this SCoP and pretend it wasn't valid in the first place. To this end
3242 // we make the assumed context infeasible.
3243 scop->invalidate(Kind: ALIASING, Loc: DebugLoc());
3244
3245 POLLY_DEBUG(dbgs() << "\n\nNOTE: Run time checks for " << scop->getNameStr()
3246 << " could not be created. This SCoP has been dismissed.");
3247 return false;
3248}
3249
3250std::tuple<ScopBuilder::AliasGroupVectorTy, DenseSet<const ScopArrayInfo *>>
3251ScopBuilder::buildAliasGroupsForAccesses() {
3252 BatchAAResults BAA(AA);
3253 AliasSetTracker AST(BAA);
3254
3255 DenseMap<Value *, MemoryAccess *> PtrToAcc;
3256 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3257 for (ScopStmt &Stmt : *scop) {
3258
3259 isl::set StmtDomain = Stmt.getDomain();
3260 bool StmtDomainEmpty = StmtDomain.is_empty();
3261
3262 // Statements with an empty domain will never be executed.
3263 if (StmtDomainEmpty)
3264 continue;
3265
3266 for (MemoryAccess *MA : Stmt) {
3267 if (MA->isScalarKind())
3268 continue;
3269 if (!MA->isRead())
3270 HasWriteAccess.insert(V: MA->getScopArrayInfo());
3271 MemAccInst Acc(MA->getAccessInstruction());
3272 if (MA->isRead() && isa<MemTransferInst>(Val: Acc))
3273 PtrToAcc[cast<MemTransferInst>(Val&: Acc)->getRawSource()] = MA;
3274 else
3275 PtrToAcc[Acc.getPointerOperand()] = MA;
3276 AST.add(I: Acc);
3277 }
3278 }
3279
3280 AliasGroupVectorTy AliasGroups;
3281 for (AliasSet &AS : AST) {
3282 if (AS.isMustAlias() || AS.isForwardingAliasSet())
3283 continue;
3284 AliasGroupTy AG;
3285 for (const Value *Ptr : AS.getPointers())
3286 AG.push_back(Elt: PtrToAcc[const_cast<Value *>(Ptr)]);
3287 if (AG.size() < 2)
3288 continue;
3289 AliasGroups.push_back(Elt: std::move(AG));
3290 }
3291
3292 return std::make_tuple(args&: AliasGroups, args&: HasWriteAccess);
3293}
3294
3295bool ScopBuilder::buildAliasGroups() {
3296 // To create sound alias checks we perform the following steps:
3297 // o) We partition each group into read only and non read only accesses.
3298 // o) For each group with more than one base pointer we then compute minimal
3299 // and maximal accesses to each array of a group in read only and non
3300 // read only partitions separately.
3301 AliasGroupVectorTy AliasGroups;
3302 DenseSet<const ScopArrayInfo *> HasWriteAccess;
3303
3304 std::tie(args&: AliasGroups, args&: HasWriteAccess) = buildAliasGroupsForAccesses();
3305
3306 splitAliasGroupsByDomain(AliasGroups);
3307
3308 for (AliasGroupTy &AG : AliasGroups) {
3309 if (!scop->hasFeasibleRuntimeContext())
3310 return false;
3311
3312 {
3313 IslMaxOperationsGuard MaxOpGuard(scop->getIslCtx().get(), OptComputeOut);
3314 bool Valid = buildAliasGroup(AliasGroup&: AG, HasWriteAccess);
3315 if (!Valid)
3316 return false;
3317 }
3318 if (isl_ctx_last_error(ctx: scop->getIslCtx().get()) == isl_error_quota) {
3319 scop->invalidate(Kind: COMPLEXITY, Loc: DebugLoc());
3320 return false;
3321 }
3322 }
3323
3324 return true;
3325}
3326
3327bool ScopBuilder::buildAliasGroup(
3328 AliasGroupTy &AliasGroup, DenseSet<const ScopArrayInfo *> HasWriteAccess) {
3329 AliasGroupTy ReadOnlyAccesses;
3330 AliasGroupTy ReadWriteAccesses;
3331 SmallPtrSet<const ScopArrayInfo *, 4> ReadWriteArrays;
3332 SmallPtrSet<const ScopArrayInfo *, 4> ReadOnlyArrays;
3333
3334 if (AliasGroup.size() < 2)
3335 return true;
3336
3337 for (MemoryAccess *Access : AliasGroup) {
3338 ORE.emit(OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias",
3339 Access->getAccessInstruction())
3340 << "Possibly aliasing pointer, use restrict keyword.");
3341 const ScopArrayInfo *Array = Access->getScopArrayInfo();
3342 if (HasWriteAccess.count(V: Array)) {
3343 ReadWriteArrays.insert(Ptr: Array);
3344 ReadWriteAccesses.push_back(Elt: Access);
3345 } else {
3346 ReadOnlyArrays.insert(Ptr: Array);
3347 ReadOnlyAccesses.push_back(Elt: Access);
3348 }
3349 }
3350
3351 // If there are no read-only pointers, and less than two read-write pointers,
3352 // no alias check is needed.
3353 if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1)
3354 return true;
3355
3356 // If there is no read-write pointer, no alias check is needed.
3357 if (ReadWriteArrays.empty())
3358 return true;
3359
3360 // For non-affine accesses, no alias check can be generated as we cannot
3361 // compute a sufficiently tight lower and upper bound: bail out.
3362 for (MemoryAccess *MA : AliasGroup) {
3363 if (!MA->isAffine()) {
3364 scop->invalidate(Kind: ALIASING, Loc: MA->getAccessInstruction()->getDebugLoc(),
3365 BB: MA->getAccessInstruction()->getParent());
3366 return false;
3367 }
3368 }
3369
3370 // Ensure that for all memory accesses for which we generate alias checks,
3371 // their base pointers are available.
3372 for (MemoryAccess *MA : AliasGroup) {
3373 if (MemoryAccess *BasePtrMA = scop->lookupBasePtrAccess(MA))
3374 scop->addRequiredInvariantLoad(
3375 LI: cast<LoadInst>(Val: BasePtrMA->getAccessInstruction()));
3376 }
3377
3378 // scop->getAliasGroups().emplace_back();
3379 // Scop::MinMaxVectorPairTy &pair = scop->getAliasGroups().back();
3380 Scop::MinMaxVectorTy MinMaxAccessesReadWrite;
3381 Scop::MinMaxVectorTy MinMaxAccessesReadOnly;
3382
3383 bool Valid;
3384
3385 Valid = calculateMinMaxAccess(AliasGroup: ReadWriteAccesses, MinMaxAccesses&: MinMaxAccessesReadWrite);
3386
3387 if (!Valid)
3388 return false;
3389
3390 // Bail out if the number of values we need to compare is too large.
3391 // This is important as the number of comparisons grows quadratically with
3392 // the number of values we need to compare.
3393 if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() >
3394 RunTimeChecksMaxArraysPerGroup)
3395 return false;
3396
3397 Valid = calculateMinMaxAccess(AliasGroup: ReadOnlyAccesses, MinMaxAccesses&: MinMaxAccessesReadOnly);
3398
3399 scop->addAliasGroup(MinMaxAccessesReadWrite, MinMaxAccessesReadOnly);
3400 if (!Valid)
3401 return false;
3402
3403 return true;
3404}
3405
3406void ScopBuilder::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) {
3407 for (unsigned u = 0; u < AliasGroups.size(); u++) {
3408 AliasGroupTy NewAG;
3409 AliasGroupTy &AG = AliasGroups[u];
3410 AliasGroupTy::iterator AGI = AG.begin();
3411 isl::set AGDomain = getAccessDomain(MA: *AGI);
3412 while (AGI != AG.end()) {
3413 MemoryAccess *MA = *AGI;
3414 isl::set MADomain = getAccessDomain(MA);
3415 if (AGDomain.is_disjoint(set2: MADomain)) {
3416 NewAG.push_back(Elt: MA);
3417 AGI = AG.erase(CI: AGI);
3418 } else {
3419 AGDomain = AGDomain.unite(set2: MADomain);
3420 AGI++;
3421 }
3422 }
3423 if (NewAG.size() > 1)
3424 AliasGroups.push_back(Elt: std::move(NewAG));
3425 }
3426}
3427
3428#ifndef NDEBUG
3429static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) {
3430 auto PhysUse = VirtualUse::create(S, U: Op, LI: &LI, Virtual: false);
3431 auto VirtUse = VirtualUse::create(S, U: Op, LI: &LI, Virtual: true);
3432 assert(PhysUse.getKind() == VirtUse.getKind());
3433}
3434
3435/// Check the consistency of every statement's MemoryAccesses.
3436///
3437/// The check is carried out by expecting the "physical" kind of use (derived
3438/// from the BasicBlocks instructions resides in) to be same as the "virtual"
3439/// kind of use (derived from a statement's MemoryAccess).
3440///
3441/// The "physical" uses are taken by ensureValueRead to determine whether to
3442/// create MemoryAccesses. When done, the kind of scalar access should be the
3443/// same no matter which way it was derived.
3444///
3445/// The MemoryAccesses might be changed by later SCoP-modifying passes and hence
3446/// can intentionally influence on the kind of uses (not corresponding to the
3447/// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has
3448/// to pick up the virtual uses. But here in the code generator, this has not
3449/// happened yet, such that virtual and physical uses are equivalent.
3450static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) {
3451 for (auto *BB : S->getRegion().blocks()) {
3452 for (auto &Inst : *BB) {
3453 auto *Stmt = S->getStmtFor(Inst: &Inst);
3454 if (!Stmt)
3455 continue;
3456
3457 if (isIgnoredIntrinsic(V: &Inst))
3458 continue;
3459
3460 // Branch conditions are encoded in the statement domains.
3461 if (Inst.isTerminator() && Stmt->isBlockStmt())
3462 continue;
3463
3464 // Verify all uses.
3465 for (auto &Op : Inst.operands())
3466 verifyUse(S, Op, LI);
3467
3468 // Stores do not produce values used by other statements.
3469 if (isa<StoreInst>(Val: Inst))
3470 continue;
3471
3472 // For every value defined in the block, also check that a use of that
3473 // value in the same statement would not be an inter-statement use. It can
3474 // still be synthesizable or load-hoisted, but these kind of instructions
3475 // are not directly copied in code-generation.
3476 auto VirtDef =
3477 VirtualUse::create(S, UserStmt: Stmt, UserScope: Stmt->getSurroundingLoop(), Val: &Inst, Virtual: true);
3478 assert(VirtDef.getKind() == VirtualUse::Synthesizable ||
3479 VirtDef.getKind() == VirtualUse::Intra ||
3480 VirtDef.getKind() == VirtualUse::Hoisted);
3481 }
3482 }
3483
3484 if (S->hasSingleExitEdge())
3485 return;
3486
3487 // PHINodes in the SCoP region's exit block are also uses to be checked.
3488 if (!S->getRegion().isTopLevelRegion()) {
3489 for (auto &Inst : *S->getRegion().getExit()) {
3490 if (!isa<PHINode>(Val: Inst))
3491 break;
3492
3493 for (auto &Op : Inst.operands())
3494 verifyUse(S, Op, LI);
3495 }
3496 }
3497}
3498#endif
3499
3500void ScopBuilder::buildScop(Region &R, AssumptionCache &AC) {
3501 scop.reset(p: new Scop(R, SE, LI, DT, *SD.getDetectionContext(R: &R), ORE,
3502 SD.getNextID()));
3503
3504 buildStmts(SR&: R);
3505
3506 // Create all invariant load instructions first. These are categorized as
3507 // 'synthesizable', therefore are not part of any ScopStmt but need to be
3508 // created somewhere.
3509 const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads();
3510 for (BasicBlock *BB : scop->getRegion().blocks()) {
3511 if (SD.isErrorBlock(BB&: *BB, R: scop->getRegion()))
3512 continue;
3513
3514 for (Instruction &Inst : *BB) {
3515 LoadInst *Load = dyn_cast<LoadInst>(Val: &Inst);
3516 if (!Load)
3517 continue;
3518
3519 if (!RIL.count(key: Load))
3520 continue;
3521
3522 // Invariant loads require a MemoryAccess to be created in some statement.
3523 // It is not important to which statement the MemoryAccess is added
3524 // because it will later be removed from the ScopStmt again. We chose the
3525 // first statement of the basic block the LoadInst is in.
3526 ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB);
3527 assert(!List.empty());
3528 ScopStmt *RILStmt = List.front();
3529 buildMemoryAccess(Inst: Load, Stmt: RILStmt);
3530 }
3531 }
3532 buildAccessFunctions();
3533
3534 // In case the region does not have an exiting block we will later (during
3535 // code generation) split the exit block. This will move potential PHI nodes
3536 // from the current exit block into the new region exiting block. Hence, PHI
3537 // nodes that are at this point not part of the region will be.
3538 // To handle these PHI nodes later we will now model their operands as scalar
3539 // accesses. Note that we do not model anything in the exit block if we have
3540 // an exiting block in the region, as there will not be any splitting later.
3541 if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) {
3542 for (Instruction &Inst : *R.getExit()) {
3543 PHINode *PHI = dyn_cast<PHINode>(Val: &Inst);
3544 if (!PHI)
3545 break;
3546
3547 buildPHIAccesses(PHIStmt: nullptr, PHI, NonAffineSubRegion: nullptr, IsExitBlock: true);
3548 }
3549 }
3550
3551 // Create memory accesses for global reads since all arrays are now known.
3552 auto *AF = SE.getConstant(Ty: IntegerType::getInt64Ty(C&: SE.getContext()), V: 0);
3553 for (auto GlobalReadPair : GlobalReads) {
3554 ScopStmt *GlobalReadStmt = GlobalReadPair.first;
3555 Instruction *GlobalRead = GlobalReadPair.second;
3556 for (auto *BP : ArrayBasePointers)
3557 addArrayAccess(Stmt: GlobalReadStmt, MemAccInst: MemAccInst(GlobalRead), AccType: MemoryAccess::READ,
3558 BaseAddress: BP, ElementType: BP->getType(), IsAffine: false, Subscripts: {AF}, Sizes: {nullptr}, AccessValue: GlobalRead);
3559 }
3560
3561 buildInvariantEquivalenceClasses();
3562
3563 /// A map from basic blocks to their invalid domains.
3564 DenseMap<BasicBlock *, isl::set> InvalidDomainMap;
3565
3566 if (!buildDomains(R: &R, InvalidDomainMap)) {
3567 POLLY_DEBUG(
3568 dbgs() << "Bailing-out because buildDomains encountered problems\n");
3569 return;
3570 }
3571
3572 addUserAssumptions(AC, InvalidDomainMap);
3573
3574 // Initialize the invalid domain.
3575 for (ScopStmt &Stmt : scop->Stmts)
3576 if (Stmt.isBlockStmt())
3577 Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]);
3578 else
3579 Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock(
3580 RN: Stmt.getRegion()->getNode())]);
3581
3582 // Remove empty statements.
3583 // Exit early in case there are no executable statements left in this scop.
3584 scop->removeStmtNotInDomainMap();
3585 scop->simplifySCoP(AfterHoisting: false);
3586 if (scop->isEmpty()) {
3587 POLLY_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n");
3588 return;
3589 }
3590
3591 // The ScopStmts now have enough information to initialize themselves.
3592 for (ScopStmt &Stmt : *scop) {
3593 collectSurroundingLoops(Stmt);
3594
3595 buildDomain(Stmt);
3596 buildAccessRelations(Stmt);
3597
3598 if (DetectReductions)
3599 checkForReductions(Stmt);
3600 }
3601
3602 // Check early for a feasible runtime context.
3603 if (!scop->hasFeasibleRuntimeContext()) {
3604 POLLY_DEBUG(
3605 dbgs() << "Bailing-out because of unfeasible context (early)\n");
3606 return;
3607 }
3608
3609 // Check early for profitability. Afterwards it cannot change anymore,
3610 // only the runtime context could become infeasible.
3611 if (!scop->isProfitable(ScalarsAreUnprofitable: UnprofitableScalarAccs)) {
3612 scop->invalidate(Kind: PROFITABLE, Loc: DebugLoc());
3613 POLLY_DEBUG(
3614 dbgs() << "Bailing-out because SCoP is not considered profitable\n");
3615 return;
3616 }
3617
3618 buildSchedule();
3619
3620 finalizeAccesses();
3621
3622 scop->realignParams();
3623 addUserContext();
3624
3625 // After the context was fully constructed, thus all our knowledge about
3626 // the parameters is in there, we add all recorded assumptions to the
3627 // assumed/invalid context.
3628 addRecordedAssumptions();
3629
3630 scop->simplifyContexts();
3631 if (!buildAliasChecks()) {
3632 POLLY_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n");
3633 return;
3634 }
3635
3636 hoistInvariantLoads();
3637 canonicalizeDynamicBasePtrs();
3638 verifyInvariantLoads();
3639 scop->simplifySCoP(AfterHoisting: true);
3640
3641 // Check late for a feasible runtime context because profitability did not
3642 // change.
3643 if (!scop->hasFeasibleRuntimeContext()) {
3644 POLLY_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n");
3645 return;
3646 }
3647
3648#ifndef NDEBUG
3649 verifyUses(S: scop.get(), LI, DT);
3650#endif
3651}
3652
3653ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AAResults &AA,
3654 const DataLayout &DL, DominatorTree &DT, LoopInfo &LI,
3655 ScopDetection &SD, ScalarEvolution &SE,
3656 OptimizationRemarkEmitter &ORE)
3657 : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE), ORE(ORE) {
3658 DebugLoc Beg, End;
3659 auto P = getBBPairForRegion(R);
3660 getDebugLocations(P, Begin&: Beg, End);
3661
3662 std::string Msg = "SCoP begins here.";
3663 ORE.emit(OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first)
3664 << Msg);
3665
3666 buildScop(R&: *R, AC);
3667
3668 POLLY_DEBUG(dbgs() << *scop);
3669
3670 if (!scop->hasFeasibleRuntimeContext()) {
3671 InfeasibleScops++;
3672 Msg = "SCoP ends here but was dismissed.";
3673 POLLY_DEBUG(dbgs() << "SCoP detected but dismissed\n");
3674 RecordedAssumptions.clear();
3675 scop.reset();
3676 } else {
3677 Msg = "SCoP ends here.";
3678 ++ScopFound;
3679 if (scop->getMaxLoopDepth() > 0)
3680 ++RichScopFound;
3681 }
3682
3683 if (R->isTopLevelRegion())
3684 ORE.emit(OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first)
3685 << Msg);
3686 else
3687 ORE.emit(OptDiag&: OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second)
3688 << Msg);
3689}
3690

source code of polly/lib/Analysis/ScopBuilder.cpp