1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/LinkAllPasses.h"
48#include "polly/Options.h"
49#include "polly/ScopDetectionDiagnostic.h"
50#include "polly/Support/SCEVValidator.h"
51#include "polly/Support/ScopHelper.h"
52#include "polly/Support/ScopLocation.h"
53#include "llvm/ADT/SmallPtrSet.h"
54#include "llvm/ADT/Statistic.h"
55#include "llvm/Analysis/AliasAnalysis.h"
56#include "llvm/Analysis/Delinearization.h"
57#include "llvm/Analysis/Loads.h"
58#include "llvm/Analysis/LoopInfo.h"
59#include "llvm/Analysis/OptimizationRemarkEmitter.h"
60#include "llvm/Analysis/RegionInfo.h"
61#include "llvm/Analysis/ScalarEvolution.h"
62#include "llvm/Analysis/ScalarEvolutionExpressions.h"
63#include "llvm/IR/BasicBlock.h"
64#include "llvm/IR/DebugLoc.h"
65#include "llvm/IR/DerivedTypes.h"
66#include "llvm/IR/DiagnosticInfo.h"
67#include "llvm/IR/DiagnosticPrinter.h"
68#include "llvm/IR/Dominators.h"
69#include "llvm/IR/Function.h"
70#include "llvm/IR/InstrTypes.h"
71#include "llvm/IR/Instruction.h"
72#include "llvm/IR/Instructions.h"
73#include "llvm/IR/IntrinsicInst.h"
74#include "llvm/IR/Metadata.h"
75#include "llvm/IR/Module.h"
76#include "llvm/IR/PassManager.h"
77#include "llvm/IR/Value.h"
78#include "llvm/InitializePasses.h"
79#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
81#include "llvm/Support/Regex.h"
82#include "llvm/Support/raw_ostream.h"
83#include <algorithm>
84#include <cassert>
85#include <memory>
86#include <stack>
87#include <string>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace polly;
93
94#include "polly/Support/PollyDebug.h"
95#define DEBUG_TYPE "polly-detect"
96
97// This option is set to a very high value, as analyzing such loops increases
98// compile time on several cases. For experiments that enable this option,
99// a value of around 40 has been working to avoid run-time regressions with
100// Polly while still exposing interesting optimization opportunities.
101static cl::opt<int> ProfitabilityMinPerLoopInstructions(
102 "polly-detect-profitability-min-per-loop-insts",
103 cl::desc("The minimal number of per-loop instructions before a single loop "
104 "region is considered profitable"),
105 cl::Hidden, cl::ValueRequired, cl::init(Val: 100000000), cl::cat(PollyCategory));
106
107bool polly::PollyProcessUnprofitable;
108
109static cl::opt<bool, true> XPollyProcessUnprofitable(
110 "polly-process-unprofitable",
111 cl::desc(
112 "Process scops that are unlikely to benefit from Polly optimizations."),
113 cl::location(L&: PollyProcessUnprofitable), cl::cat(PollyCategory));
114
115static cl::list<std::string> OnlyFunctions(
116 "polly-only-func",
117 cl::desc("Only run on functions that match a regex. "
118 "Multiple regexes can be comma separated. "
119 "Scop detection will run on all functions that match "
120 "ANY of the regexes provided."),
121 cl::CommaSeparated, cl::cat(PollyCategory));
122
123static cl::list<std::string> IgnoredFunctions(
124 "polly-ignore-func",
125 cl::desc("Ignore functions that match a regex. "
126 "Multiple regexes can be comma separated. "
127 "Scop detection will ignore all functions that match "
128 "ANY of the regexes provided."),
129 cl::CommaSeparated, cl::cat(PollyCategory));
130
131bool polly::PollyAllowFullFunction;
132
133static cl::opt<bool, true>
134 XAllowFullFunction("polly-detect-full-functions",
135 cl::desc("Allow the detection of full functions"),
136 cl::location(L&: polly::PollyAllowFullFunction),
137 cl::init(Val: false), cl::cat(PollyCategory));
138
139static cl::opt<std::string> OnlyRegion(
140 "polly-only-region",
141 cl::desc("Only run on certain regions (The provided identifier must "
142 "appear in the name of the region's entry block"),
143 cl::value_desc("identifier"), cl::ValueRequired, cl::init(Val: ""),
144 cl::cat(PollyCategory));
145
146static cl::opt<bool>
147 IgnoreAliasing("polly-ignore-aliasing",
148 cl::desc("Ignore possible aliasing of the array bases"),
149 cl::Hidden, cl::cat(PollyCategory));
150
151bool polly::PollyAllowUnsignedOperations;
152
153static cl::opt<bool, true> XPollyAllowUnsignedOperations(
154 "polly-allow-unsigned-operations",
155 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
156 cl::location(L&: PollyAllowUnsignedOperations), cl::Hidden, cl::init(Val: true),
157 cl::cat(PollyCategory));
158
159bool polly::PollyUseRuntimeAliasChecks;
160
161static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
162 "polly-use-runtime-alias-checks",
163 cl::desc("Use runtime alias checks to resolve possible aliasing."),
164 cl::location(L&: PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(Val: true),
165 cl::cat(PollyCategory));
166
167static cl::opt<bool>
168 ReportLevel("polly-report",
169 cl::desc("Print information about the activities of Polly"),
170 cl::cat(PollyCategory));
171
172static cl::opt<bool> AllowDifferentTypes(
173 "polly-allow-differing-element-types",
174 cl::desc("Allow different element types for array accesses"), cl::Hidden,
175 cl::init(Val: true), cl::cat(PollyCategory));
176
177static cl::opt<bool>
178 AllowNonAffine("polly-allow-nonaffine",
179 cl::desc("Allow non affine access functions in arrays"),
180 cl::Hidden, cl::cat(PollyCategory));
181
182static cl::opt<bool>
183 AllowModrefCall("polly-allow-modref-calls",
184 cl::desc("Allow functions with known modref behavior"),
185 cl::Hidden, cl::cat(PollyCategory));
186
187static cl::opt<bool> AllowNonAffineSubRegions(
188 "polly-allow-nonaffine-branches",
189 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
190 cl::init(Val: true), cl::cat(PollyCategory));
191
192static cl::opt<bool>
193 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
194 cl::desc("Allow non affine conditions for loops"),
195 cl::Hidden, cl::cat(PollyCategory));
196
197static cl::opt<bool, true>
198 TrackFailures("polly-detect-track-failures",
199 cl::desc("Track failure strings in detecting scop regions"),
200 cl::location(L&: PollyTrackFailures), cl::Hidden, cl::init(Val: true),
201 cl::cat(PollyCategory));
202
203static cl::opt<bool> KeepGoing("polly-detect-keep-going",
204 cl::desc("Do not fail on the first error."),
205 cl::Hidden, cl::cat(PollyCategory));
206
207static cl::opt<bool, true>
208 PollyDelinearizeX("polly-delinearize",
209 cl::desc("Delinearize array access functions"),
210 cl::location(L&: PollyDelinearize), cl::Hidden,
211 cl::init(Val: true), cl::cat(PollyCategory));
212
213static cl::opt<bool>
214 VerifyScops("polly-detect-verify",
215 cl::desc("Verify the detected SCoPs after each transformation"),
216 cl::Hidden, cl::cat(PollyCategory));
217
218bool polly::PollyInvariantLoadHoisting;
219
220static cl::opt<bool, true>
221 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
222 cl::desc("Hoist invariant loads."),
223 cl::location(L&: PollyInvariantLoadHoisting),
224 cl::Hidden, cl::cat(PollyCategory));
225
226static cl::opt<bool> PollyAllowErrorBlocks(
227 "polly-allow-error-blocks",
228 cl::desc("Allow to speculate on the execution of 'error blocks'."),
229 cl::Hidden, cl::init(Val: true), cl::cat(PollyCategory));
230
231/// The minimal trip count under which loops are considered unprofitable.
232static const unsigned MIN_LOOP_TRIP_COUNT = 8;
233
234bool polly::PollyTrackFailures = false;
235bool polly::PollyDelinearize = false;
236StringRef polly::PollySkipFnAttr = "polly.skip.fn";
237
238//===----------------------------------------------------------------------===//
239// Statistics.
240
241STATISTIC(NumScopRegions, "Number of scops");
242STATISTIC(NumLoopsInScop, "Number of loops in scops");
243STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
244STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
245STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
246STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
247STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
248STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
249STATISTIC(NumScopsDepthLarger,
250 "Number of scops with maximal loop depth 6 and larger");
251STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
252STATISTIC(NumLoopsInProfScop,
253 "Number of loops in scops (profitable scops only)");
254STATISTIC(NumLoopsOverall, "Number of total loops");
255STATISTIC(NumProfScopsDepthZero,
256 "Number of scops with maximal loop depth 0 (profitable scops only)");
257STATISTIC(NumProfScopsDepthOne,
258 "Number of scops with maximal loop depth 1 (profitable scops only)");
259STATISTIC(NumProfScopsDepthTwo,
260 "Number of scops with maximal loop depth 2 (profitable scops only)");
261STATISTIC(NumProfScopsDepthThree,
262 "Number of scops with maximal loop depth 3 (profitable scops only)");
263STATISTIC(NumProfScopsDepthFour,
264 "Number of scops with maximal loop depth 4 (profitable scops only)");
265STATISTIC(NumProfScopsDepthFive,
266 "Number of scops with maximal loop depth 5 (profitable scops only)");
267STATISTIC(NumProfScopsDepthLarger,
268 "Number of scops with maximal loop depth 6 and larger "
269 "(profitable scops only)");
270STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
271STATISTIC(MaxNumLoopsInProfScop,
272 "Maximal number of loops in scops (profitable scops only)");
273
274static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
275 bool OnlyProfitable);
276
277namespace {
278
279class DiagnosticScopFound final : public DiagnosticInfo {
280private:
281 static int PluginDiagnosticKind;
282
283 Function &F;
284 std::string FileName;
285 unsigned EntryLine, ExitLine;
286
287public:
288 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
289 unsigned ExitLine)
290 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
291 EntryLine(EntryLine), ExitLine(ExitLine) {}
292
293 void print(DiagnosticPrinter &DP) const override;
294
295 static bool classof(const DiagnosticInfo *DI) {
296 return DI->getKind() == PluginDiagnosticKind;
297 }
298};
299} // namespace
300
301int DiagnosticScopFound::PluginDiagnosticKind =
302 getNextAvailablePluginDiagnosticKind();
303
304void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
305 DP << "Polly detected an optimizable loop region (scop) in function '" << F
306 << "'\n";
307
308 if (FileName.empty()) {
309 DP << "Scop location is unknown. Compile with debug info "
310 "(-g) to get more precise information. ";
311 return;
312 }
313
314 DP << FileName << ":" << EntryLine << ": Start of scop\n";
315 DP << FileName << ":" << ExitLine << ": End of scop";
316}
317
318/// Check if a string matches any regex in a list of regexes.
319/// @param Str the input string to match against.
320/// @param RegexList a list of strings that are regular expressions.
321static bool doesStringMatchAnyRegex(StringRef Str,
322 const cl::list<std::string> &RegexList) {
323 for (auto RegexStr : RegexList) {
324 Regex R(RegexStr);
325
326 std::string Err;
327 if (!R.isValid(Error&: Err))
328 report_fatal_error(reason: Twine("invalid regex given as input to polly: ") + Err,
329 gen_crash_diag: true);
330
331 if (R.match(String: Str))
332 return true;
333 }
334 return false;
335}
336
337//===----------------------------------------------------------------------===//
338// ScopDetection.
339
340ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
341 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
342 OptimizationRemarkEmitter &ORE)
343 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
344
345void ScopDetection::detect(Function &F) {
346 assert(ValidRegions.empty() && "Detection must run only once");
347
348 if (!PollyProcessUnprofitable && LI.empty())
349 return;
350
351 Region *TopRegion = RI.getTopLevelRegion();
352
353 if (!OnlyFunctions.empty() &&
354 !doesStringMatchAnyRegex(Str: F.getName(), RegexList: OnlyFunctions))
355 return;
356
357 if (doesStringMatchAnyRegex(Str: F.getName(), RegexList: IgnoredFunctions))
358 return;
359
360 if (!isValidFunction(F))
361 return;
362
363 findScops(R&: *TopRegion);
364
365 NumScopRegions += ValidRegions.size();
366
367 // Prune non-profitable regions.
368 for (auto &DIt : DetectionContextMap) {
369 DetectionContext &DC = *DIt.getSecond().get();
370 if (DC.Log.hasErrors())
371 continue;
372 if (!ValidRegions.count(key: &DC.CurRegion))
373 continue;
374 LoopStats Stats = countBeneficialLoops(R: &DC.CurRegion, SE, LI, MinProfitableTrips: 0);
375 updateLoopCountStatistic(Stats, OnlyProfitable: false /* OnlyProfitable */);
376 if (isProfitableRegion(Context&: DC)) {
377 updateLoopCountStatistic(Stats, OnlyProfitable: true /* OnlyProfitable */);
378 continue;
379 }
380
381 ValidRegions.remove(X: &DC.CurRegion);
382 }
383
384 NumProfScopRegions += ValidRegions.size();
385 NumLoopsOverall += countBeneficialLoops(R: TopRegion, SE, LI, MinProfitableTrips: 0).NumLoops;
386
387 // Only makes sense when we tracked errors.
388 if (PollyTrackFailures)
389 emitMissedRemarks(F);
390
391 if (ReportLevel)
392 printLocations(F);
393
394 assert(ValidRegions.size() <= DetectionContextMap.size() &&
395 "Cached more results than valid regions");
396}
397
398template <class RR, typename... Args>
399inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
400 Args &&...Arguments) const {
401 if (!Context.Verifying) {
402 RejectLog &Log = Context.Log;
403 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
404 Context.IsInvalid = true;
405
406 // Log even if PollyTrackFailures is false, the log entries are also used in
407 // canUseISLTripCount().
408 Log.report(Reject: RejectReason);
409
410 POLLY_DEBUG(dbgs() << RejectReason->getMessage());
411 POLLY_DEBUG(dbgs() << "\n");
412 } else {
413 assert(!Assert && "Verification of detected scop failed");
414 }
415
416 return false;
417}
418
419bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
420 if (!ValidRegions.count(key: &R))
421 return false;
422
423 if (Verify) {
424 BBPair P = getBBPairForRegion(R: &R);
425 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
426
427 // Free previous DetectionContext for the region and create and verify a new
428 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
429 // Due to changes but CodeGeneration of another Scop, the Region object and
430 // the BBPair might not match anymore.
431 Entry = std::make_unique<DetectionContext>(args&: const_cast<Region &>(R), args&: AA,
432 /*Verifying=*/args: false);
433
434 return isValidRegion(Context&: *Entry.get());
435 }
436
437 return true;
438}
439
440std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
441 // Get the first error we found. Even in keep-going mode, this is the first
442 // reason that caused the candidate to be rejected.
443 auto *Log = lookupRejectionLog(R);
444
445 // This can happen when we marked a region invalid, but didn't track
446 // an error for it.
447 if (!Log || !Log->hasErrors())
448 return "";
449
450 RejectReasonPtr RR = *Log->begin();
451 return RR->getMessage();
452}
453
454bool ScopDetection::addOverApproximatedRegion(Region *AR,
455 DetectionContext &Context) const {
456 // If we already know about Ar we can exit.
457 if (!Context.NonAffineSubRegionSet.insert(X: AR))
458 return true;
459
460 // All loops in the region have to be overapproximated too if there
461 // are accesses that depend on the iteration count.
462
463 for (BasicBlock *BB : AR->blocks()) {
464 Loop *L = LI.getLoopFor(BB);
465 if (AR->contains(L))
466 Context.BoxedLoopsSet.insert(X: L);
467 }
468
469 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
470}
471
472bool ScopDetection::onlyValidRequiredInvariantLoads(
473 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
474 Region &CurRegion = Context.CurRegion;
475 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
476
477 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
478 return false;
479
480 for (LoadInst *Load : RequiredILS) {
481 // If we already know a load has been accepted as required invariant, we
482 // already run the validation below once and consequently don't need to
483 // run it again. Hence, we return early. For certain test cases (e.g.,
484 // COSMO this avoids us spending 50% of scop-detection time in this
485 // very function (and its children).
486 if (Context.RequiredILS.count(key: Load))
487 continue;
488 if (!isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
489 return false;
490
491 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
492 if (isSafeToLoadUnconditionally(V: Load->getPointerOperand(),
493 Ty: Load->getType(), Alignment: Load->getAlign(), DL))
494 continue;
495
496 if (NonAffineRegion->contains(Inst: Load) &&
497 Load->getParent() != NonAffineRegion->getEntry())
498 return false;
499 }
500 }
501
502 Context.RequiredILS.insert(Start: RequiredILS.begin(), End: RequiredILS.end());
503
504 return true;
505}
506
507bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
508 Loop *Scope) const {
509 SetVector<Value *> Values;
510 findValues(Expr: S0, SE, Values);
511 if (S1)
512 findValues(Expr: S1, SE, Values);
513
514 SmallPtrSet<Value *, 8> PtrVals;
515 for (auto *V : Values) {
516 if (auto *P2I = dyn_cast<PtrToIntInst>(Val: V))
517 V = P2I->getOperand(i_nocapture: 0);
518
519 if (!V->getType()->isPointerTy())
520 continue;
521
522 auto *PtrSCEV = SE.getSCEVAtScope(V, L: Scope);
523 if (isa<SCEVConstant>(Val: PtrSCEV))
524 continue;
525
526 auto *BasePtr = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: PtrSCEV));
527 if (!BasePtr)
528 return true;
529
530 auto *BasePtrVal = BasePtr->getValue();
531 if (PtrVals.insert(Ptr: BasePtrVal).second) {
532 for (auto *PtrVal : PtrVals)
533 if (PtrVal != BasePtrVal && !AA.isNoAlias(V1: PtrVal, V2: BasePtrVal))
534 return true;
535 }
536 }
537
538 return false;
539}
540
541bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
542 DetectionContext &Context) const {
543 InvariantLoadsSetTy AccessILS;
544 if (!isAffineExpr(R: &Context.CurRegion, Scope, Expression: S, SE, ILS: &AccessILS))
545 return false;
546
547 if (!onlyValidRequiredInvariantLoads(RequiredILS&: AccessILS, Context))
548 return false;
549
550 return true;
551}
552
553bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
554 Value *Condition, bool IsLoopBranch,
555 DetectionContext &Context) const {
556 Loop *L = LI.getLoopFor(BB: &BB);
557 const SCEV *ConditionSCEV = SE.getSCEVAtScope(V: Condition, L);
558
559 if (IsLoopBranch && L->isLoopLatch(BB: &BB))
560 return false;
561
562 // Check for invalid usage of different pointers in one expression.
563 if (involvesMultiplePtrs(S0: ConditionSCEV, S1: nullptr, Scope: L))
564 return false;
565
566 if (isAffine(S: ConditionSCEV, Scope: L, Context))
567 return true;
568
569 if (AllowNonAffineSubRegions &&
570 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
571 return true;
572
573 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB,
574 Arguments&: ConditionSCEV, Arguments&: ConditionSCEV, Arguments&: SI);
575}
576
577bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
578 Value *Condition, bool IsLoopBranch,
579 DetectionContext &Context) {
580 // Constant integer conditions are always affine.
581 if (isa<ConstantInt>(Val: Condition))
582 return true;
583
584 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: Condition)) {
585 auto Opcode = BinOp->getOpcode();
586 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
587 Value *Op0 = BinOp->getOperand(i_nocapture: 0);
588 Value *Op1 = BinOp->getOperand(i_nocapture: 1);
589 return isValidBranch(BB, BI, Condition: Op0, IsLoopBranch, Context) &&
590 isValidBranch(BB, BI, Condition: Op1, IsLoopBranch, Context);
591 }
592 }
593
594 if (auto PHI = dyn_cast<PHINode>(Val: Condition)) {
595 auto *Unique = dyn_cast_or_null<ConstantInt>(
596 Val: getUniqueNonErrorValue(PHI, R: &Context.CurRegion, SD: this));
597 if (Unique && (Unique->isZero() || Unique->isOne()))
598 return true;
599 }
600
601 if (auto Load = dyn_cast<LoadInst>(Val: Condition))
602 if (!IsLoopBranch && Context.CurRegion.contains(Inst: Load)) {
603 Context.RequiredILS.insert(X: Load);
604 return true;
605 }
606
607 // Non constant conditions of branches need to be ICmpInst.
608 if (!isa<ICmpInst>(Val: Condition)) {
609 if (!IsLoopBranch && AllowNonAffineSubRegions &&
610 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
611 return true;
612 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, Arguments&: BI, Arguments: &BB);
613 }
614
615 ICmpInst *ICmp = cast<ICmpInst>(Val: Condition);
616
617 // Are both operands of the ICmp affine?
618 if (isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 0)) ||
619 isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 1)))
620 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: ICmp);
621
622 Loop *L = LI.getLoopFor(BB: &BB);
623 const SCEV *LHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 0), L);
624 const SCEV *RHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 1), L);
625
626 LHS = tryForwardThroughPHI(Expr: LHS, R&: Context.CurRegion, SE, SD: this);
627 RHS = tryForwardThroughPHI(Expr: RHS, R&: Context.CurRegion, SE, SD: this);
628
629 // If unsigned operations are not allowed try to approximate the region.
630 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
631 return !IsLoopBranch && AllowNonAffineSubRegions &&
632 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context);
633
634 // Check for invalid usage of different pointers in one expression.
635 if (ICmp->isEquality() && involvesMultiplePtrs(S0: LHS, S1: nullptr, Scope: L) &&
636 involvesMultiplePtrs(S0: RHS, S1: nullptr, Scope: L))
637 return false;
638
639 // Check for invalid usage of different pointers in a relational comparison.
640 if (ICmp->isRelational() && involvesMultiplePtrs(S0: LHS, S1: RHS, Scope: L))
641 return false;
642
643 if (isAffine(S: LHS, Scope: L, Context) && isAffine(S: RHS, Scope: L, Context))
644 return true;
645
646 if (!IsLoopBranch && AllowNonAffineSubRegions &&
647 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
648 return true;
649
650 if (IsLoopBranch)
651 return false;
652
653 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: LHS, Arguments&: RHS,
654 Arguments&: ICmp);
655}
656
657bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
658 bool AllowUnreachable,
659 DetectionContext &Context) {
660 Region &CurRegion = Context.CurRegion;
661
662 Instruction *TI = BB.getTerminator();
663
664 if (AllowUnreachable && isa<UnreachableInst>(Val: TI))
665 return true;
666
667 // Return instructions are only valid if the region is the top level region.
668 if (isa<ReturnInst>(Val: TI) && CurRegion.isTopLevelRegion())
669 return true;
670
671 Value *Condition = getConditionFromTerminator(TI);
672
673 if (!Condition)
674 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, Arguments: &BB);
675
676 // UndefValue is not allowed as condition.
677 if (isa<UndefValue>(Val: Condition))
678 return invalid<ReportUndefCond>(Context, /*Assert=*/true, Arguments&: TI, Arguments: &BB);
679
680 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
681 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
682
683 SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI);
684 assert(SI && "Terminator was neither branch nor switch");
685
686 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
687}
688
689bool ScopDetection::isValidCallInst(CallInst &CI,
690 DetectionContext &Context) const {
691 if (CI.doesNotReturn())
692 return false;
693
694 if (CI.doesNotAccessMemory())
695 return true;
696
697 if (auto *II = dyn_cast<IntrinsicInst>(Val: &CI))
698 if (isValidIntrinsicInst(II&: *II, Context))
699 return true;
700
701 Function *CalledFunction = CI.getCalledFunction();
702
703 // Indirect calls are not supported.
704 if (CalledFunction == nullptr)
705 return false;
706
707 if (isDebugCall(Inst: &CI)) {
708 POLLY_DEBUG(dbgs() << "Allow call to debug function: "
709 << CalledFunction->getName() << '\n');
710 return true;
711 }
712
713 if (AllowModrefCall) {
714 MemoryEffects ME = AA.getMemoryEffects(F: CalledFunction);
715 if (ME.onlyAccessesArgPointees()) {
716 for (const auto &Arg : CI.args()) {
717 if (!Arg->getType()->isPointerTy())
718 continue;
719
720 // Bail if a pointer argument has a base address not known to
721 // ScalarEvolution. Note that a zero pointer is acceptable.
722 auto *ArgSCEV = SE.getSCEVAtScope(V: Arg, L: LI.getLoopFor(BB: CI.getParent()));
723 if (ArgSCEV->isZero())
724 continue;
725
726 auto *BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: ArgSCEV));
727 if (!BP)
728 return false;
729
730 // Implicitly disable delinearization since we have an unknown
731 // accesses with an unknown access function.
732 Context.HasUnknownAccess = true;
733 }
734
735 // Explicitly use addUnknown so we don't put a loop-variant
736 // pointer into the alias set.
737 Context.AST.addUnknown(I: &CI);
738 return true;
739 }
740
741 if (ME.onlyReadsMemory()) {
742 // Implicitly disable delinearization since we have an unknown
743 // accesses with an unknown access function.
744 Context.HasUnknownAccess = true;
745 // Explicitly use addUnknown so we don't put a loop-variant
746 // pointer into the alias set.
747 Context.AST.addUnknown(I: &CI);
748 return true;
749 }
750 return false;
751 }
752
753 return false;
754}
755
756bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
757 DetectionContext &Context) const {
758 if (isIgnoredIntrinsic(V: &II))
759 return true;
760
761 // The closest loop surrounding the call instruction.
762 Loop *L = LI.getLoopFor(BB: II.getParent());
763
764 // The access function and base pointer for memory intrinsics.
765 const SCEV *AF;
766 const SCEVUnknown *BP;
767
768 switch (II.getIntrinsicID()) {
769 // Memory intrinsics that can be represented are supported.
770 case Intrinsic::memmove:
771 case Intrinsic::memcpy:
772 AF = SE.getSCEVAtScope(V: cast<MemTransferInst>(Val&: II).getSource(), L);
773 if (!AF->isZero()) {
774 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
775 // Bail if the source pointer is not valid.
776 if (!isValidAccess(Inst: &II, AF, BP, Context))
777 return false;
778 }
779 [[fallthrough]];
780 case Intrinsic::memset:
781 AF = SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getDest(), L);
782 if (!AF->isZero()) {
783 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
784 // Bail if the destination pointer is not valid.
785 if (!isValidAccess(Inst: &II, AF, BP, Context))
786 return false;
787 }
788
789 // Bail if the length is not affine.
790 if (!isAffine(S: SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getLength(), L), Scope: L,
791 Context))
792 return false;
793
794 return true;
795 default:
796 break;
797 }
798
799 return false;
800}
801
802bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
803 DetectionContext &Ctx) const {
804 // A reference to function argument or constant value is invariant.
805 if (isa<Argument>(Val) || isa<Constant>(Val))
806 return true;
807
808 Instruction *I = dyn_cast<Instruction>(Val: &Val);
809 if (!I)
810 return false;
811
812 if (!Reg.contains(Inst: I))
813 return true;
814
815 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
816 // is not hoistable, it will be rejected later, but here we assume it is and
817 // that makes the value invariant.
818 if (auto LI = dyn_cast<LoadInst>(Val: I)) {
819 Ctx.RequiredILS.insert(X: LI);
820 return true;
821 }
822
823 return false;
824}
825
826namespace {
827
828/// Remove smax of smax(0, size) expressions from a SCEV expression and
829/// register the '...' components.
830///
831/// Array access expressions as they are generated by GFortran contain smax(0,
832/// size) expressions that confuse the 'normal' delinearization algorithm.
833/// However, if we extract such expressions before the normal delinearization
834/// takes place they can actually help to identify array size expressions in
835/// Fortran accesses. For the subsequently following delinearization the smax(0,
836/// size) component can be replaced by just 'size'. This is correct as we will
837/// always add and verify the assumption that for all subscript expressions
838/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
839/// that 0 <= size, which means smax(0, size) == size.
840class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
841public:
842 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
843 : SCEVRewriteVisitor(SE), Terms(Terms) {}
844
845 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
846 std::vector<const SCEV *> *Terms = nullptr) {
847 SCEVRemoveMax Rewriter(SE, Terms);
848 return Rewriter.visit(S: Scev);
849 }
850
851 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
852 if ((Expr->getNumOperands() == 2) && Expr->getOperand(i: 0)->isZero()) {
853 auto Res = visit(S: Expr->getOperand(i: 1));
854 if (Terms)
855 (*Terms).push_back(x: Res);
856 return Res;
857 }
858
859 return Expr;
860 }
861
862private:
863 std::vector<const SCEV *> *Terms;
864};
865} // namespace
866
867SmallVector<const SCEV *, 4>
868ScopDetection::getDelinearizationTerms(DetectionContext &Context,
869 const SCEVUnknown *BasePointer) const {
870 SmallVector<const SCEV *, 4> Terms;
871 for (const auto &Pair : Context.Accesses[BasePointer]) {
872 std::vector<const SCEV *> MaxTerms;
873 SCEVRemoveMax::rewrite(Scev: Pair.second, SE, Terms: &MaxTerms);
874 if (!MaxTerms.empty()) {
875 Terms.insert(I: Terms.begin(), From: MaxTerms.begin(), To: MaxTerms.end());
876 continue;
877 }
878 // In case the outermost expression is a plain add, we check if any of its
879 // terms has the form 4 * %inst * %param * %param ..., aka a term that
880 // contains a product between a parameter and an instruction that is
881 // inside the scop. Such instructions, if allowed at all, are instructions
882 // SCEV can not represent, but Polly is still looking through. As a
883 // result, these instructions can depend on induction variables and are
884 // most likely no array sizes. However, terms that are multiplied with
885 // them are likely candidates for array sizes.
886 if (auto *AF = dyn_cast<SCEVAddExpr>(Val: Pair.second)) {
887 for (auto Op : AF->operands()) {
888 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Val: Op))
889 collectParametricTerms(SE, Expr: AF2, Terms);
890 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Val: Op)) {
891 SmallVector<const SCEV *, 0> Operands;
892
893 for (auto *MulOp : AF2->operands()) {
894 if (auto *Const = dyn_cast<SCEVConstant>(Val: MulOp))
895 Operands.push_back(Elt: Const);
896 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: MulOp)) {
897 if (auto *Inst = dyn_cast<Instruction>(Val: Unknown->getValue())) {
898 if (!Context.CurRegion.contains(Inst))
899 Operands.push_back(Elt: MulOp);
900
901 } else {
902 Operands.push_back(Elt: MulOp);
903 }
904 }
905 }
906 if (Operands.size())
907 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
908 }
909 }
910 }
911 if (Terms.empty())
912 collectParametricTerms(SE, Expr: Pair.second, Terms);
913 }
914 return Terms;
915}
916
917bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
918 SmallVectorImpl<const SCEV *> &Sizes,
919 const SCEVUnknown *BasePointer,
920 Loop *Scope) const {
921 // If no sizes were found, all sizes are trivially valid. We allow this case
922 // to make it possible to pass known-affine accesses to the delinearization to
923 // try to recover some interesting multi-dimensional accesses, but to still
924 // allow the already known to be affine access in case the delinearization
925 // fails. In such situations, the delinearization will just return a Sizes
926 // array of size zero.
927 if (Sizes.size() == 0)
928 return true;
929
930 Value *BaseValue = BasePointer->getValue();
931 Region &CurRegion = Context.CurRegion;
932 for (const SCEV *DelinearizedSize : Sizes) {
933 // Don't pass down the scope to isAfffine; array dimensions must be
934 // invariant across the entire scop.
935 if (!isAffine(S: DelinearizedSize, Scope: nullptr, Context)) {
936 Sizes.clear();
937 break;
938 }
939 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: DelinearizedSize)) {
940 auto *V = dyn_cast<Value>(Val: Unknown->getValue());
941 if (auto *Load = dyn_cast<LoadInst>(Val: V)) {
942 if (Context.CurRegion.contains(Inst: Load) &&
943 isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
944 Context.RequiredILS.insert(X: Load);
945 continue;
946 }
947 }
948 if (hasScalarDepsInsideRegion(Expr: DelinearizedSize, R: &CurRegion, Scope, AllowLoops: false,
949 ILS: Context.RequiredILS))
950 return invalid<ReportNonAffineAccess>(
951 Context, /*Assert=*/true, Arguments&: DelinearizedSize,
952 Arguments&: Context.Accesses[BasePointer].front().first, Arguments&: BaseValue);
953 }
954
955 // No array shape derived.
956 if (Sizes.empty()) {
957 if (AllowNonAffine)
958 return true;
959
960 for (const auto &Pair : Context.Accesses[BasePointer]) {
961 const Instruction *Insn = Pair.first;
962 const SCEV *AF = Pair.second;
963
964 if (!isAffine(S: AF, Scope, Context)) {
965 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Insn,
966 Arguments&: BaseValue);
967 if (!KeepGoing)
968 return false;
969 }
970 }
971 return false;
972 }
973 return true;
974}
975
976// We first store the resulting memory accesses in TempMemoryAccesses. Only
977// if the access functions for all memory accesses have been successfully
978// delinearized we continue. Otherwise, we either report a failure or, if
979// non-affine accesses are allowed, we drop the information. In case the
980// information is dropped the memory accesses need to be overapproximated
981// when translated to a polyhedral representation.
982bool ScopDetection::computeAccessFunctions(
983 DetectionContext &Context, const SCEVUnknown *BasePointer,
984 std::shared_ptr<ArrayShape> Shape) const {
985 Value *BaseValue = BasePointer->getValue();
986 bool BasePtrHasNonAffine = false;
987 MapInsnToMemAcc TempMemoryAccesses;
988 for (const auto &Pair : Context.Accesses[BasePointer]) {
989 const Instruction *Insn = Pair.first;
990 auto *AF = Pair.second;
991 AF = SCEVRemoveMax::rewrite(Scev: AF, SE);
992 bool IsNonAffine = false;
993 TempMemoryAccesses.insert(x: std::make_pair(x&: Insn, y: MemAcc(Insn, Shape)));
994 MemAcc *Acc = &TempMemoryAccesses.find(x: Insn)->second;
995 auto *Scope = LI.getLoopFor(BB: Insn->getParent());
996
997 if (!AF) {
998 if (isAffine(S: Pair.second, Scope, Context))
999 Acc->DelinearizedSubscripts.push_back(Elt: Pair.second);
1000 else
1001 IsNonAffine = true;
1002 } else {
1003 if (Shape->DelinearizedSizes.size() == 0) {
1004 Acc->DelinearizedSubscripts.push_back(Elt: AF);
1005 } else {
1006 llvm::computeAccessFunctions(SE, Expr: AF, Subscripts&: Acc->DelinearizedSubscripts,
1007 Sizes&: Shape->DelinearizedSizes);
1008 if (Acc->DelinearizedSubscripts.size() == 0)
1009 IsNonAffine = true;
1010 }
1011 for (const SCEV *S : Acc->DelinearizedSubscripts)
1012 if (!isAffine(S, Scope, Context))
1013 IsNonAffine = true;
1014 }
1015
1016 // (Possibly) report non affine access
1017 if (IsNonAffine) {
1018 BasePtrHasNonAffine = true;
1019 if (!AllowNonAffine) {
1020 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments: Pair.second,
1021 Arguments&: Insn, Arguments&: BaseValue);
1022 if (!KeepGoing)
1023 return false;
1024 }
1025 }
1026 }
1027
1028 if (!BasePtrHasNonAffine)
1029 Context.InsnToMemAcc.insert(first: TempMemoryAccesses.begin(),
1030 last: TempMemoryAccesses.end());
1031
1032 return true;
1033}
1034
1035bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1036 const SCEVUnknown *BasePointer,
1037 Loop *Scope) const {
1038 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1039
1040 auto Terms = getDelinearizationTerms(Context, BasePointer);
1041
1042 findArrayDimensions(SE, Terms, Sizes&: Shape->DelinearizedSizes,
1043 ElementSize: Context.ElementSize[BasePointer]);
1044
1045 if (!hasValidArraySizes(Context, Sizes&: Shape->DelinearizedSizes, BasePointer,
1046 Scope))
1047 return false;
1048
1049 return computeAccessFunctions(Context, BasePointer, Shape);
1050}
1051
1052bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1053 // TODO: If we have an unknown access and other non-affine accesses we do
1054 // not try to delinearize them for now.
1055 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1056 return AllowNonAffine;
1057
1058 for (auto &Pair : Context.NonAffineAccesses) {
1059 auto *BasePointer = Pair.first;
1060 auto *Scope = Pair.second;
1061 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1062 Context.IsInvalid = true;
1063 if (!KeepGoing)
1064 return false;
1065 }
1066 }
1067 return true;
1068}
1069
1070bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1071 const SCEVUnknown *BP,
1072 DetectionContext &Context) const {
1073
1074 if (!BP)
1075 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1076
1077 auto *BV = BP->getValue();
1078 if (isa<UndefValue>(Val: BV))
1079 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1080
1081 // FIXME: Think about allowing IntToPtrInst
1082 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(Val: BV))
1083 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Arguments&: Inst);
1084
1085 // Check that the base address of the access is invariant in the current
1086 // region.
1087 if (!isInvariant(Val&: *BV, Reg: Context.CurRegion, Ctx&: Context))
1088 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, Arguments&: BV, Arguments&: Inst);
1089
1090 AF = SE.getMinusSCEV(LHS: AF, RHS: BP);
1091
1092 const SCEV *Size;
1093 if (!isa<MemIntrinsic>(Val: Inst)) {
1094 Size = SE.getElementSize(Inst);
1095 } else {
1096 auto *SizeTy =
1097 SE.getEffectiveSCEVType(Ty: PointerType::getUnqual(C&: SE.getContext()));
1098 Size = SE.getConstant(Ty: SizeTy, V: 8);
1099 }
1100
1101 if (Context.ElementSize[BP]) {
1102 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1103 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1104 Arguments&: Inst, Arguments&: BV);
1105
1106 Context.ElementSize[BP] = SE.getSMinExpr(LHS: Size, RHS: Context.ElementSize[BP]);
1107 } else {
1108 Context.ElementSize[BP] = Size;
1109 }
1110
1111 bool IsVariantInNonAffineLoop = false;
1112 SetVector<const Loop *> Loops;
1113 findLoops(Expr: AF, Loops);
1114 for (const Loop *L : Loops)
1115 if (Context.BoxedLoopsSet.count(key: L))
1116 IsVariantInNonAffineLoop = true;
1117
1118 auto *Scope = LI.getLoopFor(BB: Inst->getParent());
1119 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(S: AF, Scope, Context);
1120 // Do not try to delinearize memory intrinsics and force them to be affine.
1121 if (isa<MemIntrinsic>(Val: Inst) && !IsAffine) {
1122 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1123 Arguments&: BV);
1124 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1125 Context.Accesses[BP].push_back(x: {Inst, AF});
1126
1127 if (!IsAffine)
1128 Context.NonAffineAccesses.insert(
1129 X: std::make_pair(x&: BP, y: LI.getLoopFor(BB: Inst->getParent())));
1130 } else if (!AllowNonAffine && !IsAffine) {
1131 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1132 Arguments&: BV);
1133 }
1134
1135 if (IgnoreAliasing)
1136 return true;
1137
1138 // Check if the base pointer of the memory access does alias with
1139 // any other pointer. This cannot be handled at the moment.
1140 AAMDNodes AATags = Inst->getAAMetadata();
1141 AliasSet &AS = Context.AST.getAliasSetFor(
1142 MemLoc: MemoryLocation::getBeforeOrAfter(Ptr: BP->getValue(), AATags));
1143
1144 if (!AS.isMustAlias()) {
1145 if (PollyUseRuntimeAliasChecks) {
1146 bool CanBuildRunTimeCheck = true;
1147 // The run-time alias check places code that involves the base pointer at
1148 // the beginning of the SCoP. This breaks if the base pointer is defined
1149 // inside the scop. Hence, we can only create a run-time check if we are
1150 // sure the base pointer is not an instruction defined inside the scop.
1151 // However, we can ignore loads that will be hoisted.
1152
1153 auto ASPointers = AS.getPointers();
1154
1155 InvariantLoadsSetTy VariantLS, InvariantLS;
1156 // In order to detect loads which are dependent on other invariant loads
1157 // as invariant, we use fixed-point iteration method here i.e we iterate
1158 // over the alias set for arbitrary number of times until it is safe to
1159 // assume that all the invariant loads have been detected
1160 while (true) {
1161 const unsigned int VariantSize = VariantLS.size(),
1162 InvariantSize = InvariantLS.size();
1163
1164 for (const Value *Ptr : ASPointers) {
1165 Instruction *Inst = dyn_cast<Instruction>(Val: const_cast<Value *>(Ptr));
1166 if (Inst && Context.CurRegion.contains(Inst)) {
1167 auto *Load = dyn_cast<LoadInst>(Val: Inst);
1168 if (Load && InvariantLS.count(key: Load))
1169 continue;
1170 if (Load && isHoistableLoad(LInst: Load, R&: Context.CurRegion, LI, SE, DT,
1171 KnownInvariantLoads: InvariantLS)) {
1172 if (VariantLS.count(key: Load))
1173 VariantLS.remove(X: Load);
1174 Context.RequiredILS.insert(X: Load);
1175 InvariantLS.insert(X: Load);
1176 } else {
1177 CanBuildRunTimeCheck = false;
1178 VariantLS.insert(X: Load);
1179 }
1180 }
1181 }
1182
1183 if (InvariantSize == InvariantLS.size() &&
1184 VariantSize == VariantLS.size())
1185 break;
1186 }
1187
1188 if (CanBuildRunTimeCheck)
1189 return true;
1190 }
1191 return invalid<ReportAlias>(Context, /*Assert=*/true, Arguments&: Inst, Arguments&: AS);
1192 }
1193
1194 return true;
1195}
1196
1197bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1198 DetectionContext &Context) const {
1199 Value *Ptr = Inst.getPointerOperand();
1200 Loop *L = LI.getLoopFor(BB: Inst->getParent());
1201 const SCEV *AccessFunction = SE.getSCEVAtScope(V: Ptr, L);
1202 const SCEVUnknown *BasePointer;
1203
1204 BasePointer = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1205
1206 return isValidAccess(Inst, AF: AccessFunction, BP: BasePointer, Context);
1207}
1208
1209bool ScopDetection::isValidInstruction(Instruction &Inst,
1210 DetectionContext &Context) {
1211 for (auto &Op : Inst.operands()) {
1212 auto *OpInst = dyn_cast<Instruction>(Val: &Op);
1213
1214 if (!OpInst)
1215 continue;
1216
1217 if (isErrorBlock(BB&: *OpInst->getParent(), R: Context.CurRegion)) {
1218 auto *PHI = dyn_cast<PHINode>(Val: OpInst);
1219 if (PHI) {
1220 for (User *U : PHI->users()) {
1221 auto *UI = dyn_cast<Instruction>(Val: U);
1222 if (!UI || !UI->isTerminator())
1223 return false;
1224 }
1225 } else {
1226 return false;
1227 }
1228 }
1229 }
1230
1231 if (isa<LandingPadInst>(Val: &Inst) || isa<ResumeInst>(Val: &Inst))
1232 return false;
1233
1234 // We only check the call instruction but not invoke instruction.
1235 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1236 if (isValidCallInst(CI&: *CI, Context))
1237 return true;
1238
1239 return invalid<ReportFuncCall>(Context, /*Assert=*/true, Arguments: &Inst);
1240 }
1241
1242 if (!Inst.mayReadOrWriteMemory()) {
1243 if (!isa<AllocaInst>(Val: Inst))
1244 return true;
1245
1246 return invalid<ReportAlloca>(Context, /*Assert=*/true, Arguments: &Inst);
1247 }
1248
1249 // Check the access function.
1250 if (auto MemInst = MemAccInst::dyn_cast(V&: Inst)) {
1251 Context.hasStores |= isa<StoreInst>(Val: MemInst);
1252 Context.hasLoads |= isa<LoadInst>(Val: MemInst);
1253 if (!MemInst.isSimple())
1254 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1255 Arguments: &Inst);
1256
1257 return isValidMemoryAccess(Inst: MemInst, Context);
1258 }
1259
1260 // We do not know this instruction, therefore we assume it is invalid.
1261 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, Arguments: &Inst);
1262}
1263
1264/// Check whether @p L has exiting blocks.
1265///
1266/// @param L The loop of interest
1267///
1268/// @return True if the loop has exiting blocks, false otherwise.
1269static bool hasExitingBlocks(Loop *L) {
1270 SmallVector<BasicBlock *, 4> ExitingBlocks;
1271 L->getExitingBlocks(ExitingBlocks);
1272 return !ExitingBlocks.empty();
1273}
1274
1275bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1276 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1277 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1278 // could be used. We currently preserve the legacy behaviour of rejecting
1279 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1280 // whether the ISL trip count can be used or can be used as a non-affine
1281 // region. However, we allow rejections by isValidCFG() that do not result in
1282 // an error log entry.
1283 bool OldIsInvalid = Context.IsInvalid;
1284
1285 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1286 // need to overapproximate it as a boxed loop.
1287 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1288 L->getExitingBlocks(ExitingBlocks&: LoopControlBlocks);
1289 L->getLoopLatches(LoopLatches&: LoopControlBlocks);
1290 for (BasicBlock *ControlBB : LoopControlBlocks) {
1291 if (!isValidCFG(BB&: *ControlBB, IsLoopBranch: true, AllowUnreachable: false, Context)) {
1292 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1293 return false;
1294 }
1295 }
1296
1297 // We can use ISL to compute the trip count of L.
1298 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1299 return true;
1300}
1301
1302bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1303 // Loops that contain part but not all of the blocks of a region cannot be
1304 // handled by the schedule generation. Such loop constructs can happen
1305 // because a region can contain BBs that have no path to the exit block
1306 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1307 // loop.
1308 //
1309 // _______________
1310 // | Loop Header | <-----------.
1311 // --------------- |
1312 // | |
1313 // _______________ ______________
1314 // | RegionEntry |-----> | RegionExit |----->
1315 // --------------- --------------
1316 // |
1317 // _______________
1318 // | EndlessLoop | <--.
1319 // --------------- |
1320 // | |
1321 // \------------/
1322 //
1323 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1324 // neither entirely contained in the region RegionEntry->RegionExit
1325 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1326 // in the loop.
1327 // The block EndlessLoop is contained in the region because Region::contains
1328 // tests whether it is not dominated by RegionExit. This is probably to not
1329 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1330 // end can also be formed by an UnreachableInst. This case is already caught
1331 // by isErrorBlock(). We hence only have to reject endless loops here.
1332 if (!hasExitingBlocks(L))
1333 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, Arguments&: L);
1334
1335 // The algorithm for domain construction assumes that loops has only a single
1336 // exit block (and hence corresponds to a subregion). Note that we cannot use
1337 // L->getExitBlock() because it does not check whether all exiting edges point
1338 // to the same BB.
1339 SmallVector<BasicBlock *, 4> ExitBlocks;
1340 L->getExitBlocks(ExitBlocks);
1341 BasicBlock *TheExitBlock = ExitBlocks[0];
1342 for (BasicBlock *ExitBB : ExitBlocks) {
1343 if (TheExitBlock != ExitBB)
1344 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, Arguments&: L);
1345 }
1346
1347 if (canUseISLTripCount(L, Context))
1348 return true;
1349
1350 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1351 Region *R = RI.getRegionFor(BB: L->getHeader());
1352 while (R != &Context.CurRegion && !R->contains(L))
1353 R = R->getParent();
1354
1355 if (addOverApproximatedRegion(AR: R, Context))
1356 return true;
1357 }
1358
1359 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1360 return invalid<ReportLoopBound>(Context, /*Assert=*/true, Arguments&: L, Arguments&: LoopCount);
1361}
1362
1363/// Return the number of loops in @p L (incl. @p L) that have a trip
1364/// count that is not known to be less than @MinProfitableTrips.
1365ScopDetection::LoopStats
1366ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1367 unsigned MinProfitableTrips) {
1368 auto *TripCount = SE.getBackedgeTakenCount(L);
1369
1370 int NumLoops = 1;
1371 int MaxLoopDepth = 1;
1372 if (MinProfitableTrips > 0)
1373 if (auto *TripCountC = dyn_cast<SCEVConstant>(Val: TripCount))
1374 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1375 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1376 NumLoops -= 1;
1377
1378 for (auto &SubLoop : *L) {
1379 LoopStats Stats = countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1380 NumLoops += Stats.NumLoops;
1381 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth + 1);
1382 }
1383
1384 return {.NumLoops: NumLoops, .MaxDepth: MaxLoopDepth};
1385}
1386
1387ScopDetection::LoopStats
1388ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1389 LoopInfo &LI, unsigned MinProfitableTrips) {
1390 int LoopNum = 0;
1391 int MaxLoopDepth = 0;
1392
1393 auto L = LI.getLoopFor(BB: R->getEntry());
1394
1395 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1396 // L is either nullptr or already surrounding R.
1397 if (L && R->contains(L)) {
1398 L = R->outermostLoopInRegion(L);
1399 L = L->getParentLoop();
1400 }
1401
1402 auto SubLoops =
1403 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1404
1405 for (auto &SubLoop : SubLoops)
1406 if (R->contains(L: SubLoop)) {
1407 LoopStats Stats =
1408 countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1409 LoopNum += Stats.NumLoops;
1410 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth);
1411 }
1412
1413 return {.NumLoops: LoopNum, .MaxDepth: MaxLoopDepth};
1414}
1415
1416static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1417 const DominatorTree &DT) {
1418 if (isa<UnreachableInst>(Val: BB.getTerminator()))
1419 return true;
1420
1421 if (LI.isLoopHeader(BB: &BB))
1422 return false;
1423
1424 // Don't consider something outside the SCoP as error block. It will precede
1425 // the code versioning runtime check.
1426 if (!R.contains(BB: &BB))
1427 return false;
1428
1429 // Basic blocks that are always executed are not considered error blocks,
1430 // as their execution can not be a rare event.
1431 bool DominatesAllPredecessors = true;
1432 if (R.isTopLevelRegion()) {
1433 for (BasicBlock &I : *R.getEntry()->getParent()) {
1434 if (isa<ReturnInst>(Val: I.getTerminator()) && !DT.dominates(A: &BB, B: &I)) {
1435 DominatesAllPredecessors = false;
1436 break;
1437 }
1438 }
1439 } else {
1440 for (auto Pred : predecessors(BB: R.getExit())) {
1441 if (R.contains(BB: Pred) && !DT.dominates(A: &BB, B: Pred)) {
1442 DominatesAllPredecessors = false;
1443 break;
1444 }
1445 }
1446 }
1447
1448 if (DominatesAllPredecessors)
1449 return false;
1450
1451 for (Instruction &Inst : BB)
1452 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1453 if (isDebugCall(Inst: CI))
1454 continue;
1455
1456 if (isIgnoredIntrinsic(V: CI))
1457 continue;
1458
1459 // memset, memcpy and memmove are modeled intrinsics.
1460 if (isa<MemSetInst>(Val: CI) || isa<MemTransferInst>(Val: CI))
1461 continue;
1462
1463 if (!CI->doesNotAccessMemory())
1464 return true;
1465 if (CI->doesNotReturn())
1466 return true;
1467 }
1468
1469 return false;
1470}
1471
1472bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1473 if (!PollyAllowErrorBlocks)
1474 return false;
1475
1476 auto It = ErrorBlockCache.insert(KV: {std::make_pair(x: &BB, y: &R), false});
1477 if (!It.second)
1478 return It.first->getSecond();
1479
1480 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1481 It.first->second = Result;
1482 return Result;
1483}
1484
1485Region *ScopDetection::expandRegion(Region &R) {
1486 // Initial no valid region was found (greater than R)
1487 std::unique_ptr<Region> LastValidRegion;
1488 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1489
1490 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1491
1492 while (ExpandedRegion) {
1493 BBPair P = getBBPairForRegion(R: ExpandedRegion.get());
1494 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1495 Entry = std::make_unique<DetectionContext>(args&: *ExpandedRegion, args&: AA,
1496 /*Verifying=*/args: false);
1497 DetectionContext &Context = *Entry.get();
1498
1499 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr()
1500 << "\n");
1501 // Only expand when we did not collect errors.
1502
1503 if (!Context.Log.hasErrors()) {
1504 // If the exit is valid check all blocks
1505 // - if true, a valid region was found => store it + keep expanding
1506 // - if false, .tbd. => stop (should this really end the loop?)
1507 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1508 removeCachedResults(R: *ExpandedRegion);
1509 DetectionContextMap.erase(Val: P);
1510 break;
1511 }
1512
1513 // Store this region, because it is the greatest valid (encountered so
1514 // far).
1515 if (LastValidRegion) {
1516 removeCachedResults(R: *LastValidRegion);
1517 DetectionContextMap.erase(Val: P);
1518 }
1519 LastValidRegion = std::move(ExpandedRegion);
1520
1521 // Create and test the next greater region (if any)
1522 ExpandedRegion =
1523 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1524
1525 } else {
1526 // Create and test the next greater region (if any)
1527 removeCachedResults(R: *ExpandedRegion);
1528 DetectionContextMap.erase(Val: P);
1529 ExpandedRegion =
1530 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1531 }
1532 }
1533
1534 POLLY_DEBUG({
1535 if (LastValidRegion)
1536 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1537 else
1538 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1539 });
1540
1541 return LastValidRegion.release();
1542}
1543
1544static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1545 for (const BasicBlock *BB : R.blocks())
1546 if (R.contains(L: LI.getLoopFor(BB)))
1547 return false;
1548
1549 return true;
1550}
1551
1552void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1553 for (auto &SubRegion : R) {
1554 if (ValidRegions.count(key: SubRegion.get())) {
1555 removeCachedResults(R: *SubRegion.get());
1556 } else
1557 removeCachedResultsRecursively(R: *SubRegion);
1558 }
1559}
1560
1561void ScopDetection::removeCachedResults(const Region &R) {
1562 ValidRegions.remove(X: &R);
1563}
1564
1565void ScopDetection::findScops(Region &R) {
1566 std::unique_ptr<DetectionContext> &Entry =
1567 DetectionContextMap[getBBPairForRegion(R: &R)];
1568 Entry = std::make_unique<DetectionContext>(args&: R, args&: AA, /*Verifying=*/args: false);
1569 DetectionContext &Context = *Entry.get();
1570
1571 bool DidBailout = true;
1572 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1573 invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &R);
1574 else
1575 DidBailout = !isValidRegion(Context);
1576
1577 (void)DidBailout;
1578 if (KeepGoing) {
1579 assert((!DidBailout || Context.IsInvalid) &&
1580 "With -polly-detect-keep-going, it is sufficient that if "
1581 "isValidRegion short-circuited, that SCoP is invalid");
1582 } else {
1583 assert(DidBailout == Context.IsInvalid &&
1584 "isValidRegion must short-circuit iff the ScoP is invalid");
1585 }
1586
1587 if (Context.IsInvalid) {
1588 removeCachedResults(R);
1589 } else {
1590 ValidRegions.insert(X: &R);
1591 return;
1592 }
1593
1594 for (auto &SubRegion : R)
1595 findScops(R&: *SubRegion);
1596
1597 // Try to expand regions.
1598 //
1599 // As the region tree normally only contains canonical regions, non canonical
1600 // regions that form a Scop are not found. Therefore, those non canonical
1601 // regions are checked by expanding the canonical ones.
1602
1603 std::vector<Region *> ToExpand;
1604
1605 for (auto &SubRegion : R)
1606 ToExpand.push_back(x: SubRegion.get());
1607
1608 for (Region *CurrentRegion : ToExpand) {
1609 // Skip invalid regions. Regions may become invalid, if they are element of
1610 // an already expanded region.
1611 if (!ValidRegions.count(key: CurrentRegion))
1612 continue;
1613
1614 // Skip regions that had errors.
1615 bool HadErrors = lookupRejectionLog(R: CurrentRegion)->hasErrors();
1616 if (HadErrors)
1617 continue;
1618
1619 Region *ExpandedR = expandRegion(R&: *CurrentRegion);
1620
1621 if (!ExpandedR)
1622 continue;
1623
1624 R.addSubRegion(SubRegion: ExpandedR, moveChildren: true);
1625 ValidRegions.insert(X: ExpandedR);
1626 removeCachedResults(R: *CurrentRegion);
1627 removeCachedResultsRecursively(R: *ExpandedR);
1628 }
1629}
1630
1631bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1632 Region &CurRegion = Context.CurRegion;
1633
1634 for (const BasicBlock *BB : CurRegion.blocks()) {
1635 Loop *L = LI.getLoopFor(BB);
1636 if (L && L->getHeader() == BB) {
1637 if (CurRegion.contains(L)) {
1638 if (!isValidLoop(L, Context)) {
1639 Context.IsInvalid = true;
1640 if (!KeepGoing)
1641 return false;
1642 }
1643 } else {
1644 SmallVector<BasicBlock *, 1> Latches;
1645 L->getLoopLatches(LoopLatches&: Latches);
1646 for (BasicBlock *Latch : Latches)
1647 if (CurRegion.contains(BB: Latch))
1648 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1649 Arguments&: L);
1650 }
1651 }
1652 }
1653
1654 for (BasicBlock *BB : CurRegion.blocks()) {
1655 bool IsErrorBlock = isErrorBlock(BB&: *BB, R: CurRegion);
1656
1657 // Also check exception blocks (and possibly register them as non-affine
1658 // regions). Even though exception blocks are not modeled, we use them
1659 // to forward-propagate domain constraints during ScopInfo construction.
1660 if (!isValidCFG(BB&: *BB, IsLoopBranch: false, AllowUnreachable: IsErrorBlock, Context) && !KeepGoing)
1661 return false;
1662
1663 if (IsErrorBlock)
1664 continue;
1665
1666 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1667 if (!isValidInstruction(Inst&: *I, Context)) {
1668 Context.IsInvalid = true;
1669 if (!KeepGoing)
1670 return false;
1671 }
1672 }
1673
1674 if (!hasAffineMemoryAccesses(Context))
1675 return false;
1676
1677 return true;
1678}
1679
1680bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1681 int NumLoops) const {
1682 int InstCount = 0;
1683
1684 if (NumLoops == 0)
1685 return false;
1686
1687 for (auto *BB : Context.CurRegion.blocks())
1688 if (Context.CurRegion.contains(L: LI.getLoopFor(BB)))
1689 InstCount += BB->size();
1690
1691 InstCount = InstCount / NumLoops;
1692
1693 return InstCount >= ProfitabilityMinPerLoopInstructions;
1694}
1695
1696bool ScopDetection::hasPossiblyDistributableLoop(
1697 DetectionContext &Context) const {
1698 for (auto *BB : Context.CurRegion.blocks()) {
1699 auto *L = LI.getLoopFor(BB);
1700 if (!Context.CurRegion.contains(L))
1701 continue;
1702 if (Context.BoxedLoopsSet.count(key: L))
1703 continue;
1704 unsigned StmtsWithStoresInLoops = 0;
1705 for (auto *LBB : L->blocks()) {
1706 bool MemStore = false;
1707 for (auto &I : *LBB)
1708 MemStore |= isa<StoreInst>(Val: &I);
1709 StmtsWithStoresInLoops += MemStore;
1710 }
1711 return (StmtsWithStoresInLoops > 1);
1712 }
1713 return false;
1714}
1715
1716bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1717 Region &CurRegion = Context.CurRegion;
1718
1719 if (PollyProcessUnprofitable)
1720 return true;
1721
1722 // We can probably not do a lot on scops that only write or only read
1723 // data.
1724 if (!Context.hasStores || !Context.hasLoads)
1725 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1726
1727 int NumLoops =
1728 countBeneficialLoops(R: &CurRegion, SE, LI, MinProfitableTrips: MIN_LOOP_TRIP_COUNT).NumLoops;
1729 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1730
1731 // Scops with at least two loops may allow either loop fusion or tiling and
1732 // are consequently interesting to look at.
1733 if (NumAffineLoops >= 2)
1734 return true;
1735
1736 // A loop with multiple non-trivial blocks might be amendable to distribution.
1737 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1738 return true;
1739
1740 // Scops that contain a loop with a non-trivial amount of computation per
1741 // loop-iteration are interesting as we may be able to parallelize such
1742 // loops. Individual loops that have only a small amount of computation
1743 // per-iteration are performance-wise very fragile as any change to the
1744 // loop induction variables may affect performance. To not cause spurious
1745 // performance regressions, we do not consider such loops.
1746 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1747 return true;
1748
1749 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1750}
1751
1752bool ScopDetection::isValidRegion(DetectionContext &Context) {
1753 Region &CurRegion = Context.CurRegion;
1754
1755 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr()
1756 << "\n\t");
1757
1758 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1759 POLLY_DEBUG(dbgs() << "Top level region is invalid\n");
1760 Context.IsInvalid = true;
1761 return false;
1762 }
1763
1764 DebugLoc DbgLoc;
1765 if (CurRegion.getExit() &&
1766 isa<UnreachableInst>(Val: CurRegion.getExit()->getTerminator())) {
1767 POLLY_DEBUG(dbgs() << "Unreachable in exit\n");
1768 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1769 Arguments: CurRegion.getExit(), Arguments&: DbgLoc);
1770 }
1771
1772 if (!OnlyRegion.empty() &&
1773 !CurRegion.getEntry()->getName().count(Str: OnlyRegion)) {
1774 POLLY_DEBUG({
1775 dbgs() << "Region entry does not match -polly-only-region";
1776 dbgs() << "\n";
1777 });
1778 Context.IsInvalid = true;
1779 return false;
1780 }
1781
1782 for (BasicBlock *Pred : predecessors(BB: CurRegion.getEntry())) {
1783 Instruction *PredTerm = Pred->getTerminator();
1784 if (isa<IndirectBrInst>(Val: PredTerm) || isa<CallBrInst>(Val: PredTerm))
1785 return invalid<ReportIndirectPredecessor>(
1786 Context, /*Assert=*/true, Arguments&: PredTerm, Arguments: PredTerm->getDebugLoc());
1787 }
1788
1789 // SCoP cannot contain the entry block of the function, because we need
1790 // to insert alloca instruction there when translate scalar to array.
1791 if (!PollyAllowFullFunction &&
1792 CurRegion.getEntry() ==
1793 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1794 return invalid<ReportEntry>(Context, /*Assert=*/true, Arguments: CurRegion.getEntry());
1795
1796 if (!allBlocksValid(Context)) {
1797 // TODO: Every failure condition within allBlocksValid should call
1798 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1799 // the user.
1800 Context.IsInvalid = true;
1801 return false;
1802 }
1803
1804 if (!isReducibleRegion(R&: CurRegion, DbgLoc))
1805 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1806 Arguments: &CurRegion, Arguments&: DbgLoc);
1807
1808 POLLY_DEBUG(dbgs() << "OK\n");
1809 return true;
1810}
1811
1812void ScopDetection::markFunctionAsInvalid(Function *F) {
1813 F->addFnAttr(Kind: PollySkipFnAttr);
1814}
1815
1816bool ScopDetection::isValidFunction(Function &F) {
1817 return !F.hasFnAttribute(Kind: PollySkipFnAttr);
1818}
1819
1820void ScopDetection::printLocations(Function &F) {
1821 for (const Region *R : *this) {
1822 unsigned LineEntry, LineExit;
1823 std::string FileName;
1824
1825 getDebugLocation(R, LineBegin&: LineEntry, LineEnd&: LineExit, FileName);
1826 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1827 F.getContext().diagnose(DI: Diagnostic);
1828 }
1829}
1830
1831void ScopDetection::emitMissedRemarks(const Function &F) {
1832 for (auto &DIt : DetectionContextMap) {
1833 DetectionContext &DC = *DIt.getSecond().get();
1834 if (DC.Log.hasErrors())
1835 emitRejectionRemarks(P: DIt.getFirst(), Log: DC.Log, ORE);
1836 }
1837}
1838
1839bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1840 /// Enum for coloring BBs in Region.
1841 ///
1842 /// WHITE - Unvisited BB in DFS walk.
1843 /// GREY - BBs which are currently on the DFS stack for processing.
1844 /// BLACK - Visited and completely processed BB.
1845 enum Color { WHITE, GREY, BLACK };
1846
1847 BasicBlock *REntry = R.getEntry();
1848 BasicBlock *RExit = R.getExit();
1849 // Map to match the color of a BasicBlock during the DFS walk.
1850 DenseMap<const BasicBlock *, Color> BBColorMap;
1851 // Stack keeping track of current BB and index of next child to be processed.
1852 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1853
1854 unsigned AdjacentBlockIndex = 0;
1855 BasicBlock *CurrBB, *SuccBB;
1856 CurrBB = REntry;
1857
1858 // Initialize the map for all BB with WHITE color.
1859 for (auto *BB : R.blocks())
1860 BBColorMap[BB] = WHITE;
1861
1862 // Process the entry block of the Region.
1863 BBColorMap[CurrBB] = GREY;
1864 DFSStack.push(x: std::make_pair(x&: CurrBB, y: 0));
1865
1866 while (!DFSStack.empty()) {
1867 // Get next BB on stack to be processed.
1868 CurrBB = DFSStack.top().first;
1869 AdjacentBlockIndex = DFSStack.top().second;
1870 DFSStack.pop();
1871
1872 // Loop to iterate over the successors of current BB.
1873 const Instruction *TInst = CurrBB->getTerminator();
1874 unsigned NSucc = TInst->getNumSuccessors();
1875 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1876 ++I, ++AdjacentBlockIndex) {
1877 SuccBB = TInst->getSuccessor(Idx: I);
1878
1879 // Checks for region exit block and self-loops in BB.
1880 if (SuccBB == RExit || SuccBB == CurrBB)
1881 continue;
1882
1883 // WHITE indicates an unvisited BB in DFS walk.
1884 if (BBColorMap[SuccBB] == WHITE) {
1885 // Push the current BB and the index of the next child to be visited.
1886 DFSStack.push(x: std::make_pair(x&: CurrBB, y: I + 1));
1887 // Push the next BB to be processed.
1888 DFSStack.push(x: std::make_pair(x&: SuccBB, y: 0));
1889 // First time the BB is being processed.
1890 BBColorMap[SuccBB] = GREY;
1891 break;
1892 } else if (BBColorMap[SuccBB] == GREY) {
1893 // GREY indicates a loop in the control flow.
1894 // If the destination dominates the source, it is a natural loop
1895 // else, an irreducible control flow in the region is detected.
1896 if (!DT.dominates(A: SuccBB, B: CurrBB)) {
1897 // Get debug info of instruction which causes irregular control flow.
1898 DbgLoc = TInst->getDebugLoc();
1899 return false;
1900 }
1901 }
1902 }
1903
1904 // If all children of current BB have been processed,
1905 // then mark that BB as fully processed.
1906 if (AdjacentBlockIndex == NSucc)
1907 BBColorMap[CurrBB] = BLACK;
1908 }
1909
1910 return true;
1911}
1912
1913static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1914 bool OnlyProfitable) {
1915 if (!OnlyProfitable) {
1916 NumLoopsInScop += Stats.NumLoops;
1917 MaxNumLoopsInScop =
1918 std::max(a: MaxNumLoopsInScop.getValue(), b: (uint64_t)Stats.NumLoops);
1919 if (Stats.MaxDepth == 0)
1920 NumScopsDepthZero++;
1921 else if (Stats.MaxDepth == 1)
1922 NumScopsDepthOne++;
1923 else if (Stats.MaxDepth == 2)
1924 NumScopsDepthTwo++;
1925 else if (Stats.MaxDepth == 3)
1926 NumScopsDepthThree++;
1927 else if (Stats.MaxDepth == 4)
1928 NumScopsDepthFour++;
1929 else if (Stats.MaxDepth == 5)
1930 NumScopsDepthFive++;
1931 else
1932 NumScopsDepthLarger++;
1933 } else {
1934 NumLoopsInProfScop += Stats.NumLoops;
1935 MaxNumLoopsInProfScop =
1936 std::max(a: MaxNumLoopsInProfScop.getValue(), b: (uint64_t)Stats.NumLoops);
1937 if (Stats.MaxDepth == 0)
1938 NumProfScopsDepthZero++;
1939 else if (Stats.MaxDepth == 1)
1940 NumProfScopsDepthOne++;
1941 else if (Stats.MaxDepth == 2)
1942 NumProfScopsDepthTwo++;
1943 else if (Stats.MaxDepth == 3)
1944 NumProfScopsDepthThree++;
1945 else if (Stats.MaxDepth == 4)
1946 NumProfScopsDepthFour++;
1947 else if (Stats.MaxDepth == 5)
1948 NumProfScopsDepthFive++;
1949 else
1950 NumProfScopsDepthLarger++;
1951 }
1952}
1953
1954ScopDetection::DetectionContext *
1955ScopDetection::getDetectionContext(const Region *R) const {
1956 auto DCMIt = DetectionContextMap.find(Val: getBBPairForRegion(R));
1957 if (DCMIt == DetectionContextMap.end())
1958 return nullptr;
1959 return DCMIt->second.get();
1960}
1961
1962const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1963 const DetectionContext *DC = getDetectionContext(R);
1964 return DC ? &DC->Log : nullptr;
1965}
1966
1967void ScopDetection::verifyRegion(const Region &R) {
1968 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1969
1970 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1971 isValidRegion(Context);
1972}
1973
1974void ScopDetection::verifyAnalysis() {
1975 if (!VerifyScops)
1976 return;
1977
1978 for (const Region *R : ValidRegions)
1979 verifyRegion(R: *R);
1980}
1981
1982bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1983 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1984 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1985 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1986 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1987 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1988 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1989
1990 Result = std::make_unique<ScopDetection>(args&: DT, args&: SE, args&: LI, args&: RI, args&: AA, args&: ORE);
1991 Result->detect(F);
1992 return false;
1993}
1994
1995void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1996 AU.addRequired<LoopInfoWrapperPass>();
1997 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1998 AU.addRequired<DominatorTreeWrapperPass>();
1999 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2000 // We also need AA and RegionInfo when we are verifying analysis.
2001 AU.addRequiredTransitive<AAResultsWrapperPass>();
2002 AU.addRequiredTransitive<RegionInfoPass>();
2003 AU.setPreservesAll();
2004}
2005
2006void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
2007 for (const Region *R : Result->ValidRegions)
2008 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2009
2010 OS << "\n";
2011}
2012
2013ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
2014 // Disable runtime alias checks if we ignore aliasing all together.
2015 if (IgnoreAliasing)
2016 PollyUseRuntimeAliasChecks = false;
2017}
2018
2019ScopAnalysis::ScopAnalysis() {
2020 // Disable runtime alias checks if we ignore aliasing all together.
2021 if (IgnoreAliasing)
2022 PollyUseRuntimeAliasChecks = false;
2023}
2024
2025void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
2026
2027char ScopDetectionWrapperPass::ID;
2028
2029AnalysisKey ScopAnalysis::Key;
2030
2031ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2032 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
2033 auto &RI = FAM.getResult<RegionInfoAnalysis>(IR&: F);
2034 auto &AA = FAM.getResult<AAManager>(IR&: F);
2035 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
2036 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F);
2037 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2038
2039 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2040 Result.detect(F);
2041 return Result;
2042}
2043
2044PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2045 FunctionAnalysisManager &FAM) {
2046 OS << "Detected Scops in Function " << F.getName() << "\n";
2047 auto &SD = FAM.getResult<ScopAnalysis>(IR&: F);
2048 for (const Region *R : SD.ValidRegions)
2049 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2050
2051 OS << "\n";
2052 return PreservedAnalyses::all();
2053}
2054
2055Pass *polly::createScopDetectionWrapperPassPass() {
2056 return new ScopDetectionWrapperPass();
2057}
2058
2059INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2060 "Polly - Detect static control parts (SCoPs)", false,
2061 false);
2062INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2063INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2064INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2065INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2066INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2067INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2068INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2069 "Polly - Detect static control parts (SCoPs)", false, false)
2070
2071//===----------------------------------------------------------------------===//
2072
2073namespace {
2074/// Print result from ScopDetectionWrapperPass.
2075class ScopDetectionPrinterLegacyPass final : public FunctionPass {
2076public:
2077 static char ID;
2078
2079 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2080
2081 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2082 : FunctionPass(ID), OS(OS) {}
2083
2084 bool runOnFunction(Function &F) override {
2085 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2086
2087 OS << "Printing analysis '" << P.getPassName() << "' for function '"
2088 << F.getName() << "':\n";
2089 P.print(OS);
2090
2091 return false;
2092 }
2093
2094 void getAnalysisUsage(AnalysisUsage &AU) const override {
2095 FunctionPass::getAnalysisUsage(AU);
2096 AU.addRequired<ScopDetectionWrapperPass>();
2097 AU.setPreservesAll();
2098 }
2099
2100private:
2101 llvm::raw_ostream &OS;
2102};
2103
2104char ScopDetectionPrinterLegacyPass::ID = 0;
2105} // namespace
2106
2107Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2108 return new ScopDetectionPrinterLegacyPass(OS);
2109}
2110
2111INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2112 "Polly - Print static control parts (SCoPs)", false,
2113 false);
2114INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2115INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2116 "Polly - Print static control parts (SCoPs)", false, false)
2117

source code of polly/lib/Analysis/ScopDetection.cpp