1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/LinkAllPasses.h"
48#include "polly/Options.h"
49#include "polly/ScopDetectionDiagnostic.h"
50#include "polly/Support/SCEVValidator.h"
51#include "polly/Support/ScopHelper.h"
52#include "polly/Support/ScopLocation.h"
53#include "llvm/ADT/SmallPtrSet.h"
54#include "llvm/ADT/Statistic.h"
55#include "llvm/Analysis/AliasAnalysis.h"
56#include "llvm/Analysis/Delinearization.h"
57#include "llvm/Analysis/Loads.h"
58#include "llvm/Analysis/LoopInfo.h"
59#include "llvm/Analysis/OptimizationRemarkEmitter.h"
60#include "llvm/Analysis/RegionInfo.h"
61#include "llvm/Analysis/ScalarEvolution.h"
62#include "llvm/Analysis/ScalarEvolutionExpressions.h"
63#include "llvm/IR/BasicBlock.h"
64#include "llvm/IR/DebugLoc.h"
65#include "llvm/IR/DerivedTypes.h"
66#include "llvm/IR/DiagnosticInfo.h"
67#include "llvm/IR/DiagnosticPrinter.h"
68#include "llvm/IR/Dominators.h"
69#include "llvm/IR/Function.h"
70#include "llvm/IR/InstrTypes.h"
71#include "llvm/IR/Instruction.h"
72#include "llvm/IR/Instructions.h"
73#include "llvm/IR/IntrinsicInst.h"
74#include "llvm/IR/Metadata.h"
75#include "llvm/IR/Module.h"
76#include "llvm/IR/PassManager.h"
77#include "llvm/IR/Value.h"
78#include "llvm/InitializePasses.h"
79#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
81#include "llvm/Support/Regex.h"
82#include "llvm/Support/raw_ostream.h"
83#include <algorithm>
84#include <cassert>
85#include <memory>
86#include <stack>
87#include <string>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace polly;
93
94#define DEBUG_TYPE "polly-detect"
95
96// This option is set to a very high value, as analyzing such loops increases
97// compile time on several cases. For experiments that enable this option,
98// a value of around 40 has been working to avoid run-time regressions with
99// Polly while still exposing interesting optimization opportunities.
100static cl::opt<int> ProfitabilityMinPerLoopInstructions(
101 "polly-detect-profitability-min-per-loop-insts",
102 cl::desc("The minimal number of per-loop instructions before a single loop "
103 "region is considered profitable"),
104 cl::Hidden, cl::ValueRequired, cl::init(Val: 100000000), cl::cat(PollyCategory));
105
106bool polly::PollyProcessUnprofitable;
107
108static cl::opt<bool, true> XPollyProcessUnprofitable(
109 "polly-process-unprofitable",
110 cl::desc(
111 "Process scops that are unlikely to benefit from Polly optimizations."),
112 cl::location(L&: PollyProcessUnprofitable), cl::cat(PollyCategory));
113
114static cl::list<std::string> OnlyFunctions(
115 "polly-only-func",
116 cl::desc("Only run on functions that match a regex. "
117 "Multiple regexes can be comma separated. "
118 "Scop detection will run on all functions that match "
119 "ANY of the regexes provided."),
120 cl::CommaSeparated, cl::cat(PollyCategory));
121
122static cl::list<std::string> IgnoredFunctions(
123 "polly-ignore-func",
124 cl::desc("Ignore functions that match a regex. "
125 "Multiple regexes can be comma separated. "
126 "Scop detection will ignore all functions that match "
127 "ANY of the regexes provided."),
128 cl::CommaSeparated, cl::cat(PollyCategory));
129
130bool polly::PollyAllowFullFunction;
131
132static cl::opt<bool, true>
133 XAllowFullFunction("polly-detect-full-functions",
134 cl::desc("Allow the detection of full functions"),
135 cl::location(L&: polly::PollyAllowFullFunction),
136 cl::init(Val: false), cl::cat(PollyCategory));
137
138static cl::opt<std::string> OnlyRegion(
139 "polly-only-region",
140 cl::desc("Only run on certain regions (The provided identifier must "
141 "appear in the name of the region's entry block"),
142 cl::value_desc("identifier"), cl::ValueRequired, cl::init(Val: ""),
143 cl::cat(PollyCategory));
144
145static cl::opt<bool>
146 IgnoreAliasing("polly-ignore-aliasing",
147 cl::desc("Ignore possible aliasing of the array bases"),
148 cl::Hidden, cl::cat(PollyCategory));
149
150bool polly::PollyAllowUnsignedOperations;
151
152static cl::opt<bool, true> XPollyAllowUnsignedOperations(
153 "polly-allow-unsigned-operations",
154 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
155 cl::location(L&: PollyAllowUnsignedOperations), cl::Hidden, cl::init(Val: true),
156 cl::cat(PollyCategory));
157
158bool polly::PollyUseRuntimeAliasChecks;
159
160static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
161 "polly-use-runtime-alias-checks",
162 cl::desc("Use runtime alias checks to resolve possible aliasing."),
163 cl::location(L&: PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(Val: true),
164 cl::cat(PollyCategory));
165
166static cl::opt<bool>
167 ReportLevel("polly-report",
168 cl::desc("Print information about the activities of Polly"),
169 cl::cat(PollyCategory));
170
171static cl::opt<bool> AllowDifferentTypes(
172 "polly-allow-differing-element-types",
173 cl::desc("Allow different element types for array accesses"), cl::Hidden,
174 cl::init(Val: true), cl::cat(PollyCategory));
175
176static cl::opt<bool>
177 AllowNonAffine("polly-allow-nonaffine",
178 cl::desc("Allow non affine access functions in arrays"),
179 cl::Hidden, cl::cat(PollyCategory));
180
181static cl::opt<bool>
182 AllowModrefCall("polly-allow-modref-calls",
183 cl::desc("Allow functions with known modref behavior"),
184 cl::Hidden, cl::cat(PollyCategory));
185
186static cl::opt<bool> AllowNonAffineSubRegions(
187 "polly-allow-nonaffine-branches",
188 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
189 cl::init(Val: true), cl::cat(PollyCategory));
190
191static cl::opt<bool>
192 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
193 cl::desc("Allow non affine conditions for loops"),
194 cl::Hidden, cl::cat(PollyCategory));
195
196static cl::opt<bool, true>
197 TrackFailures("polly-detect-track-failures",
198 cl::desc("Track failure strings in detecting scop regions"),
199 cl::location(L&: PollyTrackFailures), cl::Hidden, cl::init(Val: true),
200 cl::cat(PollyCategory));
201
202static cl::opt<bool> KeepGoing("polly-detect-keep-going",
203 cl::desc("Do not fail on the first error."),
204 cl::Hidden, cl::cat(PollyCategory));
205
206static cl::opt<bool, true>
207 PollyDelinearizeX("polly-delinearize",
208 cl::desc("Delinearize array access functions"),
209 cl::location(L&: PollyDelinearize), cl::Hidden,
210 cl::init(Val: true), cl::cat(PollyCategory));
211
212static cl::opt<bool>
213 VerifyScops("polly-detect-verify",
214 cl::desc("Verify the detected SCoPs after each transformation"),
215 cl::Hidden, cl::cat(PollyCategory));
216
217bool polly::PollyInvariantLoadHoisting;
218
219static cl::opt<bool, true>
220 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
221 cl::desc("Hoist invariant loads."),
222 cl::location(L&: PollyInvariantLoadHoisting),
223 cl::Hidden, cl::cat(PollyCategory));
224
225static cl::opt<bool> PollyAllowErrorBlocks(
226 "polly-allow-error-blocks",
227 cl::desc("Allow to speculate on the execution of 'error blocks'."),
228 cl::Hidden, cl::init(Val: true), cl::cat(PollyCategory));
229
230/// The minimal trip count under which loops are considered unprofitable.
231static const unsigned MIN_LOOP_TRIP_COUNT = 8;
232
233bool polly::PollyTrackFailures = false;
234bool polly::PollyDelinearize = false;
235StringRef polly::PollySkipFnAttr = "polly.skip.fn";
236
237//===----------------------------------------------------------------------===//
238// Statistics.
239
240STATISTIC(NumScopRegions, "Number of scops");
241STATISTIC(NumLoopsInScop, "Number of loops in scops");
242STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
243STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
244STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
245STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
246STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
247STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
248STATISTIC(NumScopsDepthLarger,
249 "Number of scops with maximal loop depth 6 and larger");
250STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
251STATISTIC(NumLoopsInProfScop,
252 "Number of loops in scops (profitable scops only)");
253STATISTIC(NumLoopsOverall, "Number of total loops");
254STATISTIC(NumProfScopsDepthZero,
255 "Number of scops with maximal loop depth 0 (profitable scops only)");
256STATISTIC(NumProfScopsDepthOne,
257 "Number of scops with maximal loop depth 1 (profitable scops only)");
258STATISTIC(NumProfScopsDepthTwo,
259 "Number of scops with maximal loop depth 2 (profitable scops only)");
260STATISTIC(NumProfScopsDepthThree,
261 "Number of scops with maximal loop depth 3 (profitable scops only)");
262STATISTIC(NumProfScopsDepthFour,
263 "Number of scops with maximal loop depth 4 (profitable scops only)");
264STATISTIC(NumProfScopsDepthFive,
265 "Number of scops with maximal loop depth 5 (profitable scops only)");
266STATISTIC(NumProfScopsDepthLarger,
267 "Number of scops with maximal loop depth 6 and larger "
268 "(profitable scops only)");
269STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
270STATISTIC(MaxNumLoopsInProfScop,
271 "Maximal number of loops in scops (profitable scops only)");
272
273static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
274 bool OnlyProfitable);
275
276namespace {
277
278class DiagnosticScopFound final : public DiagnosticInfo {
279private:
280 static int PluginDiagnosticKind;
281
282 Function &F;
283 std::string FileName;
284 unsigned EntryLine, ExitLine;
285
286public:
287 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
288 unsigned ExitLine)
289 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
290 EntryLine(EntryLine), ExitLine(ExitLine) {}
291
292 void print(DiagnosticPrinter &DP) const override;
293
294 static bool classof(const DiagnosticInfo *DI) {
295 return DI->getKind() == PluginDiagnosticKind;
296 }
297};
298} // namespace
299
300int DiagnosticScopFound::PluginDiagnosticKind =
301 getNextAvailablePluginDiagnosticKind();
302
303void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
304 DP << "Polly detected an optimizable loop region (scop) in function '" << F
305 << "'\n";
306
307 if (FileName.empty()) {
308 DP << "Scop location is unknown. Compile with debug info "
309 "(-g) to get more precise information. ";
310 return;
311 }
312
313 DP << FileName << ":" << EntryLine << ": Start of scop\n";
314 DP << FileName << ":" << ExitLine << ": End of scop";
315}
316
317/// Check if a string matches any regex in a list of regexes.
318/// @param Str the input string to match against.
319/// @param RegexList a list of strings that are regular expressions.
320static bool doesStringMatchAnyRegex(StringRef Str,
321 const cl::list<std::string> &RegexList) {
322 for (auto RegexStr : RegexList) {
323 Regex R(RegexStr);
324
325 std::string Err;
326 if (!R.isValid(Error&: Err))
327 report_fatal_error(reason: Twine("invalid regex given as input to polly: ") + Err,
328 gen_crash_diag: true);
329
330 if (R.match(String: Str))
331 return true;
332 }
333 return false;
334}
335
336//===----------------------------------------------------------------------===//
337// ScopDetection.
338
339ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
340 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
341 OptimizationRemarkEmitter &ORE)
342 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
343
344void ScopDetection::detect(Function &F) {
345 assert(ValidRegions.empty() && "Detection must run only once");
346
347 if (!PollyProcessUnprofitable && LI.empty())
348 return;
349
350 Region *TopRegion = RI.getTopLevelRegion();
351
352 if (!OnlyFunctions.empty() &&
353 !doesStringMatchAnyRegex(Str: F.getName(), RegexList: OnlyFunctions))
354 return;
355
356 if (doesStringMatchAnyRegex(Str: F.getName(), RegexList: IgnoredFunctions))
357 return;
358
359 if (!isValidFunction(F))
360 return;
361
362 findScops(R&: *TopRegion);
363
364 NumScopRegions += ValidRegions.size();
365
366 // Prune non-profitable regions.
367 for (auto &DIt : DetectionContextMap) {
368 DetectionContext &DC = *DIt.getSecond().get();
369 if (DC.Log.hasErrors())
370 continue;
371 if (!ValidRegions.count(key: &DC.CurRegion))
372 continue;
373 LoopStats Stats = countBeneficialLoops(R: &DC.CurRegion, SE, LI, MinProfitableTrips: 0);
374 updateLoopCountStatistic(Stats, OnlyProfitable: false /* OnlyProfitable */);
375 if (isProfitableRegion(Context&: DC)) {
376 updateLoopCountStatistic(Stats, OnlyProfitable: true /* OnlyProfitable */);
377 continue;
378 }
379
380 ValidRegions.remove(X: &DC.CurRegion);
381 }
382
383 NumProfScopRegions += ValidRegions.size();
384 NumLoopsOverall += countBeneficialLoops(R: TopRegion, SE, LI, MinProfitableTrips: 0).NumLoops;
385
386 // Only makes sense when we tracked errors.
387 if (PollyTrackFailures)
388 emitMissedRemarks(F);
389
390 if (ReportLevel)
391 printLocations(F);
392
393 assert(ValidRegions.size() <= DetectionContextMap.size() &&
394 "Cached more results than valid regions");
395}
396
397template <class RR, typename... Args>
398inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
399 Args &&...Arguments) const {
400 if (!Context.Verifying) {
401 RejectLog &Log = Context.Log;
402 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
403 Context.IsInvalid = true;
404
405 // Log even if PollyTrackFailures is false, the log entries are also used in
406 // canUseISLTripCount().
407 Log.report(Reject: RejectReason);
408
409 LLVM_DEBUG(dbgs() << RejectReason->getMessage());
410 LLVM_DEBUG(dbgs() << "\n");
411 } else {
412 assert(!Assert && "Verification of detected scop failed");
413 }
414
415 return false;
416}
417
418bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
419 if (!ValidRegions.count(key: &R))
420 return false;
421
422 if (Verify) {
423 BBPair P = getBBPairForRegion(R: &R);
424 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
425
426 // Free previous DetectionContext for the region and create and verify a new
427 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
428 // Due to changes but CodeGeneration of another Scop, the Region object and
429 // the BBPair might not match anymore.
430 Entry = std::make_unique<DetectionContext>(args&: const_cast<Region &>(R), args&: AA,
431 /*Verifying=*/args: false);
432
433 return isValidRegion(Context&: *Entry.get());
434 }
435
436 return true;
437}
438
439std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
440 // Get the first error we found. Even in keep-going mode, this is the first
441 // reason that caused the candidate to be rejected.
442 auto *Log = lookupRejectionLog(R);
443
444 // This can happen when we marked a region invalid, but didn't track
445 // an error for it.
446 if (!Log || !Log->hasErrors())
447 return "";
448
449 RejectReasonPtr RR = *Log->begin();
450 return RR->getMessage();
451}
452
453bool ScopDetection::addOverApproximatedRegion(Region *AR,
454 DetectionContext &Context) const {
455 // If we already know about Ar we can exit.
456 if (!Context.NonAffineSubRegionSet.insert(X: AR))
457 return true;
458
459 // All loops in the region have to be overapproximated too if there
460 // are accesses that depend on the iteration count.
461
462 for (BasicBlock *BB : AR->blocks()) {
463 Loop *L = LI.getLoopFor(BB);
464 if (AR->contains(L))
465 Context.BoxedLoopsSet.insert(X: L);
466 }
467
468 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
469}
470
471bool ScopDetection::onlyValidRequiredInvariantLoads(
472 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
473 Region &CurRegion = Context.CurRegion;
474 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
475
476 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
477 return false;
478
479 for (LoadInst *Load : RequiredILS) {
480 // If we already know a load has been accepted as required invariant, we
481 // already run the validation below once and consequently don't need to
482 // run it again. Hence, we return early. For certain test cases (e.g.,
483 // COSMO this avoids us spending 50% of scop-detection time in this
484 // very function (and its children).
485 if (Context.RequiredILS.count(key: Load))
486 continue;
487 if (!isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
488 return false;
489
490 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
491 if (isSafeToLoadUnconditionally(V: Load->getPointerOperand(),
492 Ty: Load->getType(), Alignment: Load->getAlign(), DL))
493 continue;
494
495 if (NonAffineRegion->contains(Inst: Load) &&
496 Load->getParent() != NonAffineRegion->getEntry())
497 return false;
498 }
499 }
500
501 Context.RequiredILS.insert(Start: RequiredILS.begin(), End: RequiredILS.end());
502
503 return true;
504}
505
506bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
507 Loop *Scope) const {
508 SetVector<Value *> Values;
509 findValues(Expr: S0, SE, Values);
510 if (S1)
511 findValues(Expr: S1, SE, Values);
512
513 SmallPtrSet<Value *, 8> PtrVals;
514 for (auto *V : Values) {
515 if (auto *P2I = dyn_cast<PtrToIntInst>(Val: V))
516 V = P2I->getOperand(i_nocapture: 0);
517
518 if (!V->getType()->isPointerTy())
519 continue;
520
521 auto *PtrSCEV = SE.getSCEVAtScope(V, L: Scope);
522 if (isa<SCEVConstant>(Val: PtrSCEV))
523 continue;
524
525 auto *BasePtr = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: PtrSCEV));
526 if (!BasePtr)
527 return true;
528
529 auto *BasePtrVal = BasePtr->getValue();
530 if (PtrVals.insert(Ptr: BasePtrVal).second) {
531 for (auto *PtrVal : PtrVals)
532 if (PtrVal != BasePtrVal && !AA.isNoAlias(V1: PtrVal, V2: BasePtrVal))
533 return true;
534 }
535 }
536
537 return false;
538}
539
540bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
541 DetectionContext &Context) const {
542 InvariantLoadsSetTy AccessILS;
543 if (!isAffineExpr(R: &Context.CurRegion, Scope, Expression: S, SE, ILS: &AccessILS))
544 return false;
545
546 if (!onlyValidRequiredInvariantLoads(RequiredILS&: AccessILS, Context))
547 return false;
548
549 return true;
550}
551
552bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
553 Value *Condition, bool IsLoopBranch,
554 DetectionContext &Context) const {
555 Loop *L = LI.getLoopFor(BB: &BB);
556 const SCEV *ConditionSCEV = SE.getSCEVAtScope(V: Condition, L);
557
558 if (IsLoopBranch && L->isLoopLatch(BB: &BB))
559 return false;
560
561 // Check for invalid usage of different pointers in one expression.
562 if (involvesMultiplePtrs(S0: ConditionSCEV, S1: nullptr, Scope: L))
563 return false;
564
565 if (isAffine(S: ConditionSCEV, Scope: L, Context))
566 return true;
567
568 if (AllowNonAffineSubRegions &&
569 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
570 return true;
571
572 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB,
573 Arguments&: ConditionSCEV, Arguments&: ConditionSCEV, Arguments&: SI);
574}
575
576bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
577 Value *Condition, bool IsLoopBranch,
578 DetectionContext &Context) {
579 // Constant integer conditions are always affine.
580 if (isa<ConstantInt>(Val: Condition))
581 return true;
582
583 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: Condition)) {
584 auto Opcode = BinOp->getOpcode();
585 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
586 Value *Op0 = BinOp->getOperand(i_nocapture: 0);
587 Value *Op1 = BinOp->getOperand(i_nocapture: 1);
588 return isValidBranch(BB, BI, Condition: Op0, IsLoopBranch, Context) &&
589 isValidBranch(BB, BI, Condition: Op1, IsLoopBranch, Context);
590 }
591 }
592
593 if (auto PHI = dyn_cast<PHINode>(Val: Condition)) {
594 auto *Unique = dyn_cast_or_null<ConstantInt>(
595 Val: getUniqueNonErrorValue(PHI, R: &Context.CurRegion, SD: this));
596 if (Unique && (Unique->isZero() || Unique->isOne()))
597 return true;
598 }
599
600 if (auto Load = dyn_cast<LoadInst>(Val: Condition))
601 if (!IsLoopBranch && Context.CurRegion.contains(Inst: Load)) {
602 Context.RequiredILS.insert(X: Load);
603 return true;
604 }
605
606 // Non constant conditions of branches need to be ICmpInst.
607 if (!isa<ICmpInst>(Val: Condition)) {
608 if (!IsLoopBranch && AllowNonAffineSubRegions &&
609 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
610 return true;
611 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, Arguments&: BI, Arguments: &BB);
612 }
613
614 ICmpInst *ICmp = cast<ICmpInst>(Val: Condition);
615
616 // Are both operands of the ICmp affine?
617 if (isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 0)) ||
618 isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 1)))
619 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: ICmp);
620
621 Loop *L = LI.getLoopFor(BB: &BB);
622 const SCEV *LHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 0), L);
623 const SCEV *RHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 1), L);
624
625 LHS = tryForwardThroughPHI(Expr: LHS, R&: Context.CurRegion, SE, SD: this);
626 RHS = tryForwardThroughPHI(Expr: RHS, R&: Context.CurRegion, SE, SD: this);
627
628 // If unsigned operations are not allowed try to approximate the region.
629 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
630 return !IsLoopBranch && AllowNonAffineSubRegions &&
631 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context);
632
633 // Check for invalid usage of different pointers in one expression.
634 if (ICmp->isEquality() && involvesMultiplePtrs(S0: LHS, S1: nullptr, Scope: L) &&
635 involvesMultiplePtrs(S0: RHS, S1: nullptr, Scope: L))
636 return false;
637
638 // Check for invalid usage of different pointers in a relational comparison.
639 if (ICmp->isRelational() && involvesMultiplePtrs(S0: LHS, S1: RHS, Scope: L))
640 return false;
641
642 if (isAffine(S: LHS, Scope: L, Context) && isAffine(S: RHS, Scope: L, Context))
643 return true;
644
645 if (!IsLoopBranch && AllowNonAffineSubRegions &&
646 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
647 return true;
648
649 if (IsLoopBranch)
650 return false;
651
652 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: LHS, Arguments&: RHS,
653 Arguments&: ICmp);
654}
655
656bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
657 bool AllowUnreachable,
658 DetectionContext &Context) {
659 Region &CurRegion = Context.CurRegion;
660
661 Instruction *TI = BB.getTerminator();
662
663 if (AllowUnreachable && isa<UnreachableInst>(Val: TI))
664 return true;
665
666 // Return instructions are only valid if the region is the top level region.
667 if (isa<ReturnInst>(Val: TI) && CurRegion.isTopLevelRegion())
668 return true;
669
670 Value *Condition = getConditionFromTerminator(TI);
671
672 if (!Condition)
673 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, Arguments: &BB);
674
675 // UndefValue is not allowed as condition.
676 if (isa<UndefValue>(Val: Condition))
677 return invalid<ReportUndefCond>(Context, /*Assert=*/true, Arguments&: TI, Arguments: &BB);
678
679 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
680 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
681
682 SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI);
683 assert(SI && "Terminator was neither branch nor switch");
684
685 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
686}
687
688bool ScopDetection::isValidCallInst(CallInst &CI,
689 DetectionContext &Context) const {
690 if (CI.doesNotReturn())
691 return false;
692
693 if (CI.doesNotAccessMemory())
694 return true;
695
696 if (auto *II = dyn_cast<IntrinsicInst>(Val: &CI))
697 if (isValidIntrinsicInst(II&: *II, Context))
698 return true;
699
700 Function *CalledFunction = CI.getCalledFunction();
701
702 // Indirect calls are not supported.
703 if (CalledFunction == nullptr)
704 return false;
705
706 if (isDebugCall(Inst: &CI)) {
707 LLVM_DEBUG(dbgs() << "Allow call to debug function: "
708 << CalledFunction->getName() << '\n');
709 return true;
710 }
711
712 if (AllowModrefCall) {
713 MemoryEffects ME = AA.getMemoryEffects(F: CalledFunction);
714 if (ME.onlyAccessesArgPointees()) {
715 for (const auto &Arg : CI.args()) {
716 if (!Arg->getType()->isPointerTy())
717 continue;
718
719 // Bail if a pointer argument has a base address not known to
720 // ScalarEvolution. Note that a zero pointer is acceptable.
721 auto *ArgSCEV = SE.getSCEVAtScope(V: Arg, L: LI.getLoopFor(BB: CI.getParent()));
722 if (ArgSCEV->isZero())
723 continue;
724
725 auto *BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: ArgSCEV));
726 if (!BP)
727 return false;
728
729 // Implicitly disable delinearization since we have an unknown
730 // accesses with an unknown access function.
731 Context.HasUnknownAccess = true;
732 }
733
734 // Explicitly use addUnknown so we don't put a loop-variant
735 // pointer into the alias set.
736 Context.AST.addUnknown(I: &CI);
737 return true;
738 }
739
740 if (ME.onlyReadsMemory()) {
741 // Implicitly disable delinearization since we have an unknown
742 // accesses with an unknown access function.
743 Context.HasUnknownAccess = true;
744 // Explicitly use addUnknown so we don't put a loop-variant
745 // pointer into the alias set.
746 Context.AST.addUnknown(I: &CI);
747 return true;
748 }
749 return false;
750 }
751
752 return false;
753}
754
755bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
756 DetectionContext &Context) const {
757 if (isIgnoredIntrinsic(V: &II))
758 return true;
759
760 // The closest loop surrounding the call instruction.
761 Loop *L = LI.getLoopFor(BB: II.getParent());
762
763 // The access function and base pointer for memory intrinsics.
764 const SCEV *AF;
765 const SCEVUnknown *BP;
766
767 switch (II.getIntrinsicID()) {
768 // Memory intrinsics that can be represented are supported.
769 case Intrinsic::memmove:
770 case Intrinsic::memcpy:
771 AF = SE.getSCEVAtScope(V: cast<MemTransferInst>(Val&: II).getSource(), L);
772 if (!AF->isZero()) {
773 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
774 // Bail if the source pointer is not valid.
775 if (!isValidAccess(Inst: &II, AF, BP, Context))
776 return false;
777 }
778 [[fallthrough]];
779 case Intrinsic::memset:
780 AF = SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getDest(), L);
781 if (!AF->isZero()) {
782 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
783 // Bail if the destination pointer is not valid.
784 if (!isValidAccess(Inst: &II, AF, BP, Context))
785 return false;
786 }
787
788 // Bail if the length is not affine.
789 if (!isAffine(S: SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getLength(), L), Scope: L,
790 Context))
791 return false;
792
793 return true;
794 default:
795 break;
796 }
797
798 return false;
799}
800
801bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
802 DetectionContext &Ctx) const {
803 // A reference to function argument or constant value is invariant.
804 if (isa<Argument>(Val) || isa<Constant>(Val))
805 return true;
806
807 Instruction *I = dyn_cast<Instruction>(Val: &Val);
808 if (!I)
809 return false;
810
811 if (!Reg.contains(Inst: I))
812 return true;
813
814 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
815 // is not hoistable, it will be rejected later, but here we assume it is and
816 // that makes the value invariant.
817 if (auto LI = dyn_cast<LoadInst>(Val: I)) {
818 Ctx.RequiredILS.insert(X: LI);
819 return true;
820 }
821
822 return false;
823}
824
825namespace {
826
827/// Remove smax of smax(0, size) expressions from a SCEV expression and
828/// register the '...' components.
829///
830/// Array access expressions as they are generated by GFortran contain smax(0,
831/// size) expressions that confuse the 'normal' delinearization algorithm.
832/// However, if we extract such expressions before the normal delinearization
833/// takes place they can actually help to identify array size expressions in
834/// Fortran accesses. For the subsequently following delinearization the smax(0,
835/// size) component can be replaced by just 'size'. This is correct as we will
836/// always add and verify the assumption that for all subscript expressions
837/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
838/// that 0 <= size, which means smax(0, size) == size.
839class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
840public:
841 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
842 : SCEVRewriteVisitor(SE), Terms(Terms) {}
843
844 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
845 std::vector<const SCEV *> *Terms = nullptr) {
846 SCEVRemoveMax Rewriter(SE, Terms);
847 return Rewriter.visit(S: Scev);
848 }
849
850 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
851 if ((Expr->getNumOperands() == 2) && Expr->getOperand(i: 0)->isZero()) {
852 auto Res = visit(S: Expr->getOperand(i: 1));
853 if (Terms)
854 (*Terms).push_back(x: Res);
855 return Res;
856 }
857
858 return Expr;
859 }
860
861private:
862 std::vector<const SCEV *> *Terms;
863};
864} // namespace
865
866SmallVector<const SCEV *, 4>
867ScopDetection::getDelinearizationTerms(DetectionContext &Context,
868 const SCEVUnknown *BasePointer) const {
869 SmallVector<const SCEV *, 4> Terms;
870 for (const auto &Pair : Context.Accesses[BasePointer]) {
871 std::vector<const SCEV *> MaxTerms;
872 SCEVRemoveMax::rewrite(Scev: Pair.second, SE, Terms: &MaxTerms);
873 if (!MaxTerms.empty()) {
874 Terms.insert(I: Terms.begin(), From: MaxTerms.begin(), To: MaxTerms.end());
875 continue;
876 }
877 // In case the outermost expression is a plain add, we check if any of its
878 // terms has the form 4 * %inst * %param * %param ..., aka a term that
879 // contains a product between a parameter and an instruction that is
880 // inside the scop. Such instructions, if allowed at all, are instructions
881 // SCEV can not represent, but Polly is still looking through. As a
882 // result, these instructions can depend on induction variables and are
883 // most likely no array sizes. However, terms that are multiplied with
884 // them are likely candidates for array sizes.
885 if (auto *AF = dyn_cast<SCEVAddExpr>(Val: Pair.second)) {
886 for (auto Op : AF->operands()) {
887 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Val: Op))
888 collectParametricTerms(SE, Expr: AF2, Terms);
889 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Val: Op)) {
890 SmallVector<const SCEV *, 0> Operands;
891
892 for (auto *MulOp : AF2->operands()) {
893 if (auto *Const = dyn_cast<SCEVConstant>(Val: MulOp))
894 Operands.push_back(Elt: Const);
895 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: MulOp)) {
896 if (auto *Inst = dyn_cast<Instruction>(Val: Unknown->getValue())) {
897 if (!Context.CurRegion.contains(Inst))
898 Operands.push_back(Elt: MulOp);
899
900 } else {
901 Operands.push_back(Elt: MulOp);
902 }
903 }
904 }
905 if (Operands.size())
906 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
907 }
908 }
909 }
910 if (Terms.empty())
911 collectParametricTerms(SE, Expr: Pair.second, Terms);
912 }
913 return Terms;
914}
915
916bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
917 SmallVectorImpl<const SCEV *> &Sizes,
918 const SCEVUnknown *BasePointer,
919 Loop *Scope) const {
920 // If no sizes were found, all sizes are trivially valid. We allow this case
921 // to make it possible to pass known-affine accesses to the delinearization to
922 // try to recover some interesting multi-dimensional accesses, but to still
923 // allow the already known to be affine access in case the delinearization
924 // fails. In such situations, the delinearization will just return a Sizes
925 // array of size zero.
926 if (Sizes.size() == 0)
927 return true;
928
929 Value *BaseValue = BasePointer->getValue();
930 Region &CurRegion = Context.CurRegion;
931 for (const SCEV *DelinearizedSize : Sizes) {
932 // Don't pass down the scope to isAfffine; array dimensions must be
933 // invariant across the entire scop.
934 if (!isAffine(S: DelinearizedSize, Scope: nullptr, Context)) {
935 Sizes.clear();
936 break;
937 }
938 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: DelinearizedSize)) {
939 auto *V = dyn_cast<Value>(Val: Unknown->getValue());
940 if (auto *Load = dyn_cast<LoadInst>(Val: V)) {
941 if (Context.CurRegion.contains(Inst: Load) &&
942 isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
943 Context.RequiredILS.insert(X: Load);
944 continue;
945 }
946 }
947 if (hasScalarDepsInsideRegion(Expr: DelinearizedSize, R: &CurRegion, Scope, AllowLoops: false,
948 ILS: Context.RequiredILS))
949 return invalid<ReportNonAffineAccess>(
950 Context, /*Assert=*/true, Arguments&: DelinearizedSize,
951 Arguments&: Context.Accesses[BasePointer].front().first, Arguments&: BaseValue);
952 }
953
954 // No array shape derived.
955 if (Sizes.empty()) {
956 if (AllowNonAffine)
957 return true;
958
959 for (const auto &Pair : Context.Accesses[BasePointer]) {
960 const Instruction *Insn = Pair.first;
961 const SCEV *AF = Pair.second;
962
963 if (!isAffine(S: AF, Scope, Context)) {
964 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Insn,
965 Arguments&: BaseValue);
966 if (!KeepGoing)
967 return false;
968 }
969 }
970 return false;
971 }
972 return true;
973}
974
975// We first store the resulting memory accesses in TempMemoryAccesses. Only
976// if the access functions for all memory accesses have been successfully
977// delinearized we continue. Otherwise, we either report a failure or, if
978// non-affine accesses are allowed, we drop the information. In case the
979// information is dropped the memory accesses need to be overapproximated
980// when translated to a polyhedral representation.
981bool ScopDetection::computeAccessFunctions(
982 DetectionContext &Context, const SCEVUnknown *BasePointer,
983 std::shared_ptr<ArrayShape> Shape) const {
984 Value *BaseValue = BasePointer->getValue();
985 bool BasePtrHasNonAffine = false;
986 MapInsnToMemAcc TempMemoryAccesses;
987 for (const auto &Pair : Context.Accesses[BasePointer]) {
988 const Instruction *Insn = Pair.first;
989 auto *AF = Pair.second;
990 AF = SCEVRemoveMax::rewrite(Scev: AF, SE);
991 bool IsNonAffine = false;
992 TempMemoryAccesses.insert(x: std::make_pair(x&: Insn, y: MemAcc(Insn, Shape)));
993 MemAcc *Acc = &TempMemoryAccesses.find(x: Insn)->second;
994 auto *Scope = LI.getLoopFor(BB: Insn->getParent());
995
996 if (!AF) {
997 if (isAffine(S: Pair.second, Scope, Context))
998 Acc->DelinearizedSubscripts.push_back(Elt: Pair.second);
999 else
1000 IsNonAffine = true;
1001 } else {
1002 if (Shape->DelinearizedSizes.size() == 0) {
1003 Acc->DelinearizedSubscripts.push_back(Elt: AF);
1004 } else {
1005 llvm::computeAccessFunctions(SE, Expr: AF, Subscripts&: Acc->DelinearizedSubscripts,
1006 Sizes&: Shape->DelinearizedSizes);
1007 if (Acc->DelinearizedSubscripts.size() == 0)
1008 IsNonAffine = true;
1009 }
1010 for (const SCEV *S : Acc->DelinearizedSubscripts)
1011 if (!isAffine(S, Scope, Context))
1012 IsNonAffine = true;
1013 }
1014
1015 // (Possibly) report non affine access
1016 if (IsNonAffine) {
1017 BasePtrHasNonAffine = true;
1018 if (!AllowNonAffine) {
1019 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments: Pair.second,
1020 Arguments&: Insn, Arguments&: BaseValue);
1021 if (!KeepGoing)
1022 return false;
1023 }
1024 }
1025 }
1026
1027 if (!BasePtrHasNonAffine)
1028 Context.InsnToMemAcc.insert(first: TempMemoryAccesses.begin(),
1029 last: TempMemoryAccesses.end());
1030
1031 return true;
1032}
1033
1034bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1035 const SCEVUnknown *BasePointer,
1036 Loop *Scope) const {
1037 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1038
1039 auto Terms = getDelinearizationTerms(Context, BasePointer);
1040
1041 findArrayDimensions(SE, Terms, Sizes&: Shape->DelinearizedSizes,
1042 ElementSize: Context.ElementSize[BasePointer]);
1043
1044 if (!hasValidArraySizes(Context, Sizes&: Shape->DelinearizedSizes, BasePointer,
1045 Scope))
1046 return false;
1047
1048 return computeAccessFunctions(Context, BasePointer, Shape);
1049}
1050
1051bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1052 // TODO: If we have an unknown access and other non-affine accesses we do
1053 // not try to delinearize them for now.
1054 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1055 return AllowNonAffine;
1056
1057 for (auto &Pair : Context.NonAffineAccesses) {
1058 auto *BasePointer = Pair.first;
1059 auto *Scope = Pair.second;
1060 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1061 Context.IsInvalid = true;
1062 if (!KeepGoing)
1063 return false;
1064 }
1065 }
1066 return true;
1067}
1068
1069bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1070 const SCEVUnknown *BP,
1071 DetectionContext &Context) const {
1072
1073 if (!BP)
1074 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1075
1076 auto *BV = BP->getValue();
1077 if (isa<UndefValue>(Val: BV))
1078 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1079
1080 // FIXME: Think about allowing IntToPtrInst
1081 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(Val: BV))
1082 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Arguments&: Inst);
1083
1084 // Check that the base address of the access is invariant in the current
1085 // region.
1086 if (!isInvariant(Val&: *BV, Reg: Context.CurRegion, Ctx&: Context))
1087 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, Arguments&: BV, Arguments&: Inst);
1088
1089 AF = SE.getMinusSCEV(LHS: AF, RHS: BP);
1090
1091 const SCEV *Size;
1092 if (!isa<MemIntrinsic>(Val: Inst)) {
1093 Size = SE.getElementSize(Inst);
1094 } else {
1095 auto *SizeTy =
1096 SE.getEffectiveSCEVType(Ty: PointerType::getUnqual(C&: SE.getContext()));
1097 Size = SE.getConstant(Ty: SizeTy, V: 8);
1098 }
1099
1100 if (Context.ElementSize[BP]) {
1101 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1102 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1103 Arguments&: Inst, Arguments&: BV);
1104
1105 Context.ElementSize[BP] = SE.getSMinExpr(LHS: Size, RHS: Context.ElementSize[BP]);
1106 } else {
1107 Context.ElementSize[BP] = Size;
1108 }
1109
1110 bool IsVariantInNonAffineLoop = false;
1111 SetVector<const Loop *> Loops;
1112 findLoops(Expr: AF, Loops);
1113 for (const Loop *L : Loops)
1114 if (Context.BoxedLoopsSet.count(key: L))
1115 IsVariantInNonAffineLoop = true;
1116
1117 auto *Scope = LI.getLoopFor(BB: Inst->getParent());
1118 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(S: AF, Scope, Context);
1119 // Do not try to delinearize memory intrinsics and force them to be affine.
1120 if (isa<MemIntrinsic>(Val: Inst) && !IsAffine) {
1121 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1122 Arguments&: BV);
1123 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1124 Context.Accesses[BP].push_back(x: {Inst, AF});
1125
1126 if (!IsAffine)
1127 Context.NonAffineAccesses.insert(
1128 X: std::make_pair(x&: BP, y: LI.getLoopFor(BB: Inst->getParent())));
1129 } else if (!AllowNonAffine && !IsAffine) {
1130 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1131 Arguments&: BV);
1132 }
1133
1134 if (IgnoreAliasing)
1135 return true;
1136
1137 // Check if the base pointer of the memory access does alias with
1138 // any other pointer. This cannot be handled at the moment.
1139 AAMDNodes AATags = Inst->getAAMetadata();
1140 AliasSet &AS = Context.AST.getAliasSetFor(
1141 MemLoc: MemoryLocation::getBeforeOrAfter(Ptr: BP->getValue(), AATags));
1142
1143 if (!AS.isMustAlias()) {
1144 if (PollyUseRuntimeAliasChecks) {
1145 bool CanBuildRunTimeCheck = true;
1146 // The run-time alias check places code that involves the base pointer at
1147 // the beginning of the SCoP. This breaks if the base pointer is defined
1148 // inside the scop. Hence, we can only create a run-time check if we are
1149 // sure the base pointer is not an instruction defined inside the scop.
1150 // However, we can ignore loads that will be hoisted.
1151
1152 auto ASPointers = AS.getPointers();
1153
1154 InvariantLoadsSetTy VariantLS, InvariantLS;
1155 // In order to detect loads which are dependent on other invariant loads
1156 // as invariant, we use fixed-point iteration method here i.e we iterate
1157 // over the alias set for arbitrary number of times until it is safe to
1158 // assume that all the invariant loads have been detected
1159 while (true) {
1160 const unsigned int VariantSize = VariantLS.size(),
1161 InvariantSize = InvariantLS.size();
1162
1163 for (const Value *Ptr : ASPointers) {
1164 Instruction *Inst = dyn_cast<Instruction>(Val: const_cast<Value *>(Ptr));
1165 if (Inst && Context.CurRegion.contains(Inst)) {
1166 auto *Load = dyn_cast<LoadInst>(Val: Inst);
1167 if (Load && InvariantLS.count(key: Load))
1168 continue;
1169 if (Load && isHoistableLoad(LInst: Load, R&: Context.CurRegion, LI, SE, DT,
1170 KnownInvariantLoads: InvariantLS)) {
1171 if (VariantLS.count(key: Load))
1172 VariantLS.remove(X: Load);
1173 Context.RequiredILS.insert(X: Load);
1174 InvariantLS.insert(X: Load);
1175 } else {
1176 CanBuildRunTimeCheck = false;
1177 VariantLS.insert(X: Load);
1178 }
1179 }
1180 }
1181
1182 if (InvariantSize == InvariantLS.size() &&
1183 VariantSize == VariantLS.size())
1184 break;
1185 }
1186
1187 if (CanBuildRunTimeCheck)
1188 return true;
1189 }
1190 return invalid<ReportAlias>(Context, /*Assert=*/true, Arguments&: Inst, Arguments&: AS);
1191 }
1192
1193 return true;
1194}
1195
1196bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1197 DetectionContext &Context) const {
1198 Value *Ptr = Inst.getPointerOperand();
1199 Loop *L = LI.getLoopFor(BB: Inst->getParent());
1200 const SCEV *AccessFunction = SE.getSCEVAtScope(V: Ptr, L);
1201 const SCEVUnknown *BasePointer;
1202
1203 BasePointer = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1204
1205 return isValidAccess(Inst, AF: AccessFunction, BP: BasePointer, Context);
1206}
1207
1208bool ScopDetection::isValidInstruction(Instruction &Inst,
1209 DetectionContext &Context) {
1210 for (auto &Op : Inst.operands()) {
1211 auto *OpInst = dyn_cast<Instruction>(Val: &Op);
1212
1213 if (!OpInst)
1214 continue;
1215
1216 if (isErrorBlock(BB&: *OpInst->getParent(), R: Context.CurRegion)) {
1217 auto *PHI = dyn_cast<PHINode>(Val: OpInst);
1218 if (PHI) {
1219 for (User *U : PHI->users()) {
1220 auto *UI = dyn_cast<Instruction>(Val: U);
1221 if (!UI || !UI->isTerminator())
1222 return false;
1223 }
1224 } else {
1225 return false;
1226 }
1227 }
1228 }
1229
1230 if (isa<LandingPadInst>(Val: &Inst) || isa<ResumeInst>(Val: &Inst))
1231 return false;
1232
1233 // We only check the call instruction but not invoke instruction.
1234 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1235 if (isValidCallInst(CI&: *CI, Context))
1236 return true;
1237
1238 return invalid<ReportFuncCall>(Context, /*Assert=*/true, Arguments: &Inst);
1239 }
1240
1241 if (!Inst.mayReadOrWriteMemory()) {
1242 if (!isa<AllocaInst>(Val: Inst))
1243 return true;
1244
1245 return invalid<ReportAlloca>(Context, /*Assert=*/true, Arguments: &Inst);
1246 }
1247
1248 // Check the access function.
1249 if (auto MemInst = MemAccInst::dyn_cast(V&: Inst)) {
1250 Context.hasStores |= isa<StoreInst>(Val: MemInst);
1251 Context.hasLoads |= isa<LoadInst>(Val: MemInst);
1252 if (!MemInst.isSimple())
1253 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1254 Arguments: &Inst);
1255
1256 return isValidMemoryAccess(Inst: MemInst, Context);
1257 }
1258
1259 // We do not know this instruction, therefore we assume it is invalid.
1260 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, Arguments: &Inst);
1261}
1262
1263/// Check whether @p L has exiting blocks.
1264///
1265/// @param L The loop of interest
1266///
1267/// @return True if the loop has exiting blocks, false otherwise.
1268static bool hasExitingBlocks(Loop *L) {
1269 SmallVector<BasicBlock *, 4> ExitingBlocks;
1270 L->getExitingBlocks(ExitingBlocks);
1271 return !ExitingBlocks.empty();
1272}
1273
1274bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1275 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1276 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1277 // could be used. We currently preserve the legacy behaviour of rejecting
1278 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1279 // whether the ISL trip count can be used or can be used as a non-affine
1280 // region. However, we allow rejections by isValidCFG() that do not result in
1281 // an error log entry.
1282 bool OldIsInvalid = Context.IsInvalid;
1283
1284 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1285 // need to overapproximate it as a boxed loop.
1286 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1287 L->getExitingBlocks(ExitingBlocks&: LoopControlBlocks);
1288 L->getLoopLatches(LoopLatches&: LoopControlBlocks);
1289 for (BasicBlock *ControlBB : LoopControlBlocks) {
1290 if (!isValidCFG(BB&: *ControlBB, IsLoopBranch: true, AllowUnreachable: false, Context)) {
1291 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1292 return false;
1293 }
1294 }
1295
1296 // We can use ISL to compute the trip count of L.
1297 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1298 return true;
1299}
1300
1301bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1302 // Loops that contain part but not all of the blocks of a region cannot be
1303 // handled by the schedule generation. Such loop constructs can happen
1304 // because a region can contain BBs that have no path to the exit block
1305 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1306 // loop.
1307 //
1308 // _______________
1309 // | Loop Header | <-----------.
1310 // --------------- |
1311 // | |
1312 // _______________ ______________
1313 // | RegionEntry |-----> | RegionExit |----->
1314 // --------------- --------------
1315 // |
1316 // _______________
1317 // | EndlessLoop | <--.
1318 // --------------- |
1319 // | |
1320 // \------------/
1321 //
1322 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1323 // neither entirely contained in the region RegionEntry->RegionExit
1324 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1325 // in the loop.
1326 // The block EndlessLoop is contained in the region because Region::contains
1327 // tests whether it is not dominated by RegionExit. This is probably to not
1328 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1329 // end can also be formed by an UnreachableInst. This case is already caught
1330 // by isErrorBlock(). We hence only have to reject endless loops here.
1331 if (!hasExitingBlocks(L))
1332 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, Arguments&: L);
1333
1334 // The algorithm for domain construction assumes that loops has only a single
1335 // exit block (and hence corresponds to a subregion). Note that we cannot use
1336 // L->getExitBlock() because it does not check whether all exiting edges point
1337 // to the same BB.
1338 SmallVector<BasicBlock *, 4> ExitBlocks;
1339 L->getExitBlocks(ExitBlocks);
1340 BasicBlock *TheExitBlock = ExitBlocks[0];
1341 for (BasicBlock *ExitBB : ExitBlocks) {
1342 if (TheExitBlock != ExitBB)
1343 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, Arguments&: L);
1344 }
1345
1346 if (canUseISLTripCount(L, Context))
1347 return true;
1348
1349 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1350 Region *R = RI.getRegionFor(BB: L->getHeader());
1351 while (R != &Context.CurRegion && !R->contains(L))
1352 R = R->getParent();
1353
1354 if (addOverApproximatedRegion(AR: R, Context))
1355 return true;
1356 }
1357
1358 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1359 return invalid<ReportLoopBound>(Context, /*Assert=*/true, Arguments&: L, Arguments&: LoopCount);
1360}
1361
1362/// Return the number of loops in @p L (incl. @p L) that have a trip
1363/// count that is not known to be less than @MinProfitableTrips.
1364ScopDetection::LoopStats
1365ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1366 unsigned MinProfitableTrips) {
1367 auto *TripCount = SE.getBackedgeTakenCount(L);
1368
1369 int NumLoops = 1;
1370 int MaxLoopDepth = 1;
1371 if (MinProfitableTrips > 0)
1372 if (auto *TripCountC = dyn_cast<SCEVConstant>(Val: TripCount))
1373 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1374 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1375 NumLoops -= 1;
1376
1377 for (auto &SubLoop : *L) {
1378 LoopStats Stats = countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1379 NumLoops += Stats.NumLoops;
1380 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth + 1);
1381 }
1382
1383 return {.NumLoops: NumLoops, .MaxDepth: MaxLoopDepth};
1384}
1385
1386ScopDetection::LoopStats
1387ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1388 LoopInfo &LI, unsigned MinProfitableTrips) {
1389 int LoopNum = 0;
1390 int MaxLoopDepth = 0;
1391
1392 auto L = LI.getLoopFor(BB: R->getEntry());
1393
1394 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1395 // L is either nullptr or already surrounding R.
1396 if (L && R->contains(L)) {
1397 L = R->outermostLoopInRegion(L);
1398 L = L->getParentLoop();
1399 }
1400
1401 auto SubLoops =
1402 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1403
1404 for (auto &SubLoop : SubLoops)
1405 if (R->contains(L: SubLoop)) {
1406 LoopStats Stats =
1407 countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1408 LoopNum += Stats.NumLoops;
1409 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth);
1410 }
1411
1412 return {.NumLoops: LoopNum, .MaxDepth: MaxLoopDepth};
1413}
1414
1415static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1416 const DominatorTree &DT) {
1417 if (isa<UnreachableInst>(Val: BB.getTerminator()))
1418 return true;
1419
1420 if (LI.isLoopHeader(BB: &BB))
1421 return false;
1422
1423 // Don't consider something outside the SCoP as error block. It will precede
1424 // the code versioning runtime check.
1425 if (!R.contains(BB: &BB))
1426 return false;
1427
1428 // Basic blocks that are always executed are not considered error blocks,
1429 // as their execution can not be a rare event.
1430 bool DominatesAllPredecessors = true;
1431 if (R.isTopLevelRegion()) {
1432 for (BasicBlock &I : *R.getEntry()->getParent()) {
1433 if (isa<ReturnInst>(Val: I.getTerminator()) && !DT.dominates(A: &BB, B: &I)) {
1434 DominatesAllPredecessors = false;
1435 break;
1436 }
1437 }
1438 } else {
1439 for (auto Pred : predecessors(BB: R.getExit())) {
1440 if (R.contains(BB: Pred) && !DT.dominates(A: &BB, B: Pred)) {
1441 DominatesAllPredecessors = false;
1442 break;
1443 }
1444 }
1445 }
1446
1447 if (DominatesAllPredecessors)
1448 return false;
1449
1450 for (Instruction &Inst : BB)
1451 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1452 if (isDebugCall(Inst: CI))
1453 continue;
1454
1455 if (isIgnoredIntrinsic(V: CI))
1456 continue;
1457
1458 // memset, memcpy and memmove are modeled intrinsics.
1459 if (isa<MemSetInst>(Val: CI) || isa<MemTransferInst>(Val: CI))
1460 continue;
1461
1462 if (!CI->doesNotAccessMemory())
1463 return true;
1464 if (CI->doesNotReturn())
1465 return true;
1466 }
1467
1468 return false;
1469}
1470
1471bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1472 if (!PollyAllowErrorBlocks)
1473 return false;
1474
1475 auto It = ErrorBlockCache.insert(KV: {std::make_pair(x: &BB, y: &R), false});
1476 if (!It.second)
1477 return It.first->getSecond();
1478
1479 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1480 It.first->second = Result;
1481 return Result;
1482}
1483
1484Region *ScopDetection::expandRegion(Region &R) {
1485 // Initial no valid region was found (greater than R)
1486 std::unique_ptr<Region> LastValidRegion;
1487 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1488
1489 LLVM_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1490
1491 while (ExpandedRegion) {
1492 BBPair P = getBBPairForRegion(R: ExpandedRegion.get());
1493 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1494 Entry = std::make_unique<DetectionContext>(args&: *ExpandedRegion, args&: AA,
1495 /*Verifying=*/args: false);
1496 DetectionContext &Context = *Entry.get();
1497
1498 LLVM_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n");
1499 // Only expand when we did not collect errors.
1500
1501 if (!Context.Log.hasErrors()) {
1502 // If the exit is valid check all blocks
1503 // - if true, a valid region was found => store it + keep expanding
1504 // - if false, .tbd. => stop (should this really end the loop?)
1505 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1506 removeCachedResults(R: *ExpandedRegion);
1507 DetectionContextMap.erase(Val: P);
1508 break;
1509 }
1510
1511 // Store this region, because it is the greatest valid (encountered so
1512 // far).
1513 if (LastValidRegion) {
1514 removeCachedResults(R: *LastValidRegion);
1515 DetectionContextMap.erase(Val: P);
1516 }
1517 LastValidRegion = std::move(ExpandedRegion);
1518
1519 // Create and test the next greater region (if any)
1520 ExpandedRegion =
1521 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1522
1523 } else {
1524 // Create and test the next greater region (if any)
1525 removeCachedResults(R: *ExpandedRegion);
1526 DetectionContextMap.erase(Val: P);
1527 ExpandedRegion =
1528 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1529 }
1530 }
1531
1532 LLVM_DEBUG({
1533 if (LastValidRegion)
1534 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1535 else
1536 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1537 });
1538
1539 return LastValidRegion.release();
1540}
1541
1542static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1543 for (const BasicBlock *BB : R.blocks())
1544 if (R.contains(L: LI.getLoopFor(BB)))
1545 return false;
1546
1547 return true;
1548}
1549
1550void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1551 for (auto &SubRegion : R) {
1552 if (ValidRegions.count(key: SubRegion.get())) {
1553 removeCachedResults(R: *SubRegion.get());
1554 } else
1555 removeCachedResultsRecursively(R: *SubRegion);
1556 }
1557}
1558
1559void ScopDetection::removeCachedResults(const Region &R) {
1560 ValidRegions.remove(X: &R);
1561}
1562
1563void ScopDetection::findScops(Region &R) {
1564 std::unique_ptr<DetectionContext> &Entry =
1565 DetectionContextMap[getBBPairForRegion(R: &R)];
1566 Entry = std::make_unique<DetectionContext>(args&: R, args&: AA, /*Verifying=*/args: false);
1567 DetectionContext &Context = *Entry.get();
1568
1569 bool DidBailout = true;
1570 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1571 invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &R);
1572 else
1573 DidBailout = !isValidRegion(Context);
1574
1575 (void)DidBailout;
1576 if (KeepGoing) {
1577 assert((!DidBailout || Context.IsInvalid) &&
1578 "With -polly-detect-keep-going, it is sufficient that if "
1579 "isValidRegion short-circuited, that SCoP is invalid");
1580 } else {
1581 assert(DidBailout == Context.IsInvalid &&
1582 "isValidRegion must short-circuit iff the ScoP is invalid");
1583 }
1584
1585 if (Context.IsInvalid) {
1586 removeCachedResults(R);
1587 } else {
1588 ValidRegions.insert(X: &R);
1589 return;
1590 }
1591
1592 for (auto &SubRegion : R)
1593 findScops(R&: *SubRegion);
1594
1595 // Try to expand regions.
1596 //
1597 // As the region tree normally only contains canonical regions, non canonical
1598 // regions that form a Scop are not found. Therefore, those non canonical
1599 // regions are checked by expanding the canonical ones.
1600
1601 std::vector<Region *> ToExpand;
1602
1603 for (auto &SubRegion : R)
1604 ToExpand.push_back(x: SubRegion.get());
1605
1606 for (Region *CurrentRegion : ToExpand) {
1607 // Skip invalid regions. Regions may become invalid, if they are element of
1608 // an already expanded region.
1609 if (!ValidRegions.count(key: CurrentRegion))
1610 continue;
1611
1612 // Skip regions that had errors.
1613 bool HadErrors = lookupRejectionLog(R: CurrentRegion)->hasErrors();
1614 if (HadErrors)
1615 continue;
1616
1617 Region *ExpandedR = expandRegion(R&: *CurrentRegion);
1618
1619 if (!ExpandedR)
1620 continue;
1621
1622 R.addSubRegion(SubRegion: ExpandedR, moveChildren: true);
1623 ValidRegions.insert(X: ExpandedR);
1624 removeCachedResults(R: *CurrentRegion);
1625 removeCachedResultsRecursively(R: *ExpandedR);
1626 }
1627}
1628
1629bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1630 Region &CurRegion = Context.CurRegion;
1631
1632 for (const BasicBlock *BB : CurRegion.blocks()) {
1633 Loop *L = LI.getLoopFor(BB);
1634 if (L && L->getHeader() == BB) {
1635 if (CurRegion.contains(L)) {
1636 if (!isValidLoop(L, Context)) {
1637 Context.IsInvalid = true;
1638 if (!KeepGoing)
1639 return false;
1640 }
1641 } else {
1642 SmallVector<BasicBlock *, 1> Latches;
1643 L->getLoopLatches(LoopLatches&: Latches);
1644 for (BasicBlock *Latch : Latches)
1645 if (CurRegion.contains(BB: Latch))
1646 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1647 Arguments&: L);
1648 }
1649 }
1650 }
1651
1652 for (BasicBlock *BB : CurRegion.blocks()) {
1653 bool IsErrorBlock = isErrorBlock(BB&: *BB, R: CurRegion);
1654
1655 // Also check exception blocks (and possibly register them as non-affine
1656 // regions). Even though exception blocks are not modeled, we use them
1657 // to forward-propagate domain constraints during ScopInfo construction.
1658 if (!isValidCFG(BB&: *BB, IsLoopBranch: false, AllowUnreachable: IsErrorBlock, Context) && !KeepGoing)
1659 return false;
1660
1661 if (IsErrorBlock)
1662 continue;
1663
1664 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1665 if (!isValidInstruction(Inst&: *I, Context)) {
1666 Context.IsInvalid = true;
1667 if (!KeepGoing)
1668 return false;
1669 }
1670 }
1671
1672 if (!hasAffineMemoryAccesses(Context))
1673 return false;
1674
1675 return true;
1676}
1677
1678bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1679 int NumLoops) const {
1680 int InstCount = 0;
1681
1682 if (NumLoops == 0)
1683 return false;
1684
1685 for (auto *BB : Context.CurRegion.blocks())
1686 if (Context.CurRegion.contains(L: LI.getLoopFor(BB)))
1687 InstCount += BB->size();
1688
1689 InstCount = InstCount / NumLoops;
1690
1691 return InstCount >= ProfitabilityMinPerLoopInstructions;
1692}
1693
1694bool ScopDetection::hasPossiblyDistributableLoop(
1695 DetectionContext &Context) const {
1696 for (auto *BB : Context.CurRegion.blocks()) {
1697 auto *L = LI.getLoopFor(BB);
1698 if (!Context.CurRegion.contains(L))
1699 continue;
1700 if (Context.BoxedLoopsSet.count(key: L))
1701 continue;
1702 unsigned StmtsWithStoresInLoops = 0;
1703 for (auto *LBB : L->blocks()) {
1704 bool MemStore = false;
1705 for (auto &I : *LBB)
1706 MemStore |= isa<StoreInst>(Val: &I);
1707 StmtsWithStoresInLoops += MemStore;
1708 }
1709 return (StmtsWithStoresInLoops > 1);
1710 }
1711 return false;
1712}
1713
1714bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1715 Region &CurRegion = Context.CurRegion;
1716
1717 if (PollyProcessUnprofitable)
1718 return true;
1719
1720 // We can probably not do a lot on scops that only write or only read
1721 // data.
1722 if (!Context.hasStores || !Context.hasLoads)
1723 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1724
1725 int NumLoops =
1726 countBeneficialLoops(R: &CurRegion, SE, LI, MinProfitableTrips: MIN_LOOP_TRIP_COUNT).NumLoops;
1727 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1728
1729 // Scops with at least two loops may allow either loop fusion or tiling and
1730 // are consequently interesting to look at.
1731 if (NumAffineLoops >= 2)
1732 return true;
1733
1734 // A loop with multiple non-trivial blocks might be amendable to distribution.
1735 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1736 return true;
1737
1738 // Scops that contain a loop with a non-trivial amount of computation per
1739 // loop-iteration are interesting as we may be able to parallelize such
1740 // loops. Individual loops that have only a small amount of computation
1741 // per-iteration are performance-wise very fragile as any change to the
1742 // loop induction variables may affect performance. To not cause spurious
1743 // performance regressions, we do not consider such loops.
1744 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1745 return true;
1746
1747 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1748}
1749
1750bool ScopDetection::isValidRegion(DetectionContext &Context) {
1751 Region &CurRegion = Context.CurRegion;
1752
1753 LLVM_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr() << "\n\t");
1754
1755 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1756 LLVM_DEBUG(dbgs() << "Top level region is invalid\n");
1757 Context.IsInvalid = true;
1758 return false;
1759 }
1760
1761 DebugLoc DbgLoc;
1762 if (CurRegion.getExit() &&
1763 isa<UnreachableInst>(Val: CurRegion.getExit()->getTerminator())) {
1764 LLVM_DEBUG(dbgs() << "Unreachable in exit\n");
1765 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1766 Arguments: CurRegion.getExit(), Arguments&: DbgLoc);
1767 }
1768
1769 if (!OnlyRegion.empty() &&
1770 !CurRegion.getEntry()->getName().count(Str: OnlyRegion)) {
1771 LLVM_DEBUG({
1772 dbgs() << "Region entry does not match -polly-only-region";
1773 dbgs() << "\n";
1774 });
1775 Context.IsInvalid = true;
1776 return false;
1777 }
1778
1779 for (BasicBlock *Pred : predecessors(BB: CurRegion.getEntry())) {
1780 Instruction *PredTerm = Pred->getTerminator();
1781 if (isa<IndirectBrInst>(Val: PredTerm) || isa<CallBrInst>(Val: PredTerm))
1782 return invalid<ReportIndirectPredecessor>(
1783 Context, /*Assert=*/true, Arguments&: PredTerm, Arguments: PredTerm->getDebugLoc());
1784 }
1785
1786 // SCoP cannot contain the entry block of the function, because we need
1787 // to insert alloca instruction there when translate scalar to array.
1788 if (!PollyAllowFullFunction &&
1789 CurRegion.getEntry() ==
1790 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1791 return invalid<ReportEntry>(Context, /*Assert=*/true, Arguments: CurRegion.getEntry());
1792
1793 if (!allBlocksValid(Context)) {
1794 // TODO: Every failure condition within allBlocksValid should call
1795 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1796 // the user.
1797 Context.IsInvalid = true;
1798 return false;
1799 }
1800
1801 if (!isReducibleRegion(R&: CurRegion, DbgLoc))
1802 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1803 Arguments: &CurRegion, Arguments&: DbgLoc);
1804
1805 LLVM_DEBUG(dbgs() << "OK\n");
1806 return true;
1807}
1808
1809void ScopDetection::markFunctionAsInvalid(Function *F) {
1810 F->addFnAttr(Kind: PollySkipFnAttr);
1811}
1812
1813bool ScopDetection::isValidFunction(Function &F) {
1814 return !F.hasFnAttribute(Kind: PollySkipFnAttr);
1815}
1816
1817void ScopDetection::printLocations(Function &F) {
1818 for (const Region *R : *this) {
1819 unsigned LineEntry, LineExit;
1820 std::string FileName;
1821
1822 getDebugLocation(R, LineBegin&: LineEntry, LineEnd&: LineExit, FileName);
1823 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1824 F.getContext().diagnose(DI: Diagnostic);
1825 }
1826}
1827
1828void ScopDetection::emitMissedRemarks(const Function &F) {
1829 for (auto &DIt : DetectionContextMap) {
1830 DetectionContext &DC = *DIt.getSecond().get();
1831 if (DC.Log.hasErrors())
1832 emitRejectionRemarks(P: DIt.getFirst(), Log: DC.Log, ORE);
1833 }
1834}
1835
1836bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1837 /// Enum for coloring BBs in Region.
1838 ///
1839 /// WHITE - Unvisited BB in DFS walk.
1840 /// GREY - BBs which are currently on the DFS stack for processing.
1841 /// BLACK - Visited and completely processed BB.
1842 enum Color { WHITE, GREY, BLACK };
1843
1844 BasicBlock *REntry = R.getEntry();
1845 BasicBlock *RExit = R.getExit();
1846 // Map to match the color of a BasicBlock during the DFS walk.
1847 DenseMap<const BasicBlock *, Color> BBColorMap;
1848 // Stack keeping track of current BB and index of next child to be processed.
1849 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1850
1851 unsigned AdjacentBlockIndex = 0;
1852 BasicBlock *CurrBB, *SuccBB;
1853 CurrBB = REntry;
1854
1855 // Initialize the map for all BB with WHITE color.
1856 for (auto *BB : R.blocks())
1857 BBColorMap[BB] = WHITE;
1858
1859 // Process the entry block of the Region.
1860 BBColorMap[CurrBB] = GREY;
1861 DFSStack.push(x: std::make_pair(x&: CurrBB, y: 0));
1862
1863 while (!DFSStack.empty()) {
1864 // Get next BB on stack to be processed.
1865 CurrBB = DFSStack.top().first;
1866 AdjacentBlockIndex = DFSStack.top().second;
1867 DFSStack.pop();
1868
1869 // Loop to iterate over the successors of current BB.
1870 const Instruction *TInst = CurrBB->getTerminator();
1871 unsigned NSucc = TInst->getNumSuccessors();
1872 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1873 ++I, ++AdjacentBlockIndex) {
1874 SuccBB = TInst->getSuccessor(Idx: I);
1875
1876 // Checks for region exit block and self-loops in BB.
1877 if (SuccBB == RExit || SuccBB == CurrBB)
1878 continue;
1879
1880 // WHITE indicates an unvisited BB in DFS walk.
1881 if (BBColorMap[SuccBB] == WHITE) {
1882 // Push the current BB and the index of the next child to be visited.
1883 DFSStack.push(x: std::make_pair(x&: CurrBB, y: I + 1));
1884 // Push the next BB to be processed.
1885 DFSStack.push(x: std::make_pair(x&: SuccBB, y: 0));
1886 // First time the BB is being processed.
1887 BBColorMap[SuccBB] = GREY;
1888 break;
1889 } else if (BBColorMap[SuccBB] == GREY) {
1890 // GREY indicates a loop in the control flow.
1891 // If the destination dominates the source, it is a natural loop
1892 // else, an irreducible control flow in the region is detected.
1893 if (!DT.dominates(A: SuccBB, B: CurrBB)) {
1894 // Get debug info of instruction which causes irregular control flow.
1895 DbgLoc = TInst->getDebugLoc();
1896 return false;
1897 }
1898 }
1899 }
1900
1901 // If all children of current BB have been processed,
1902 // then mark that BB as fully processed.
1903 if (AdjacentBlockIndex == NSucc)
1904 BBColorMap[CurrBB] = BLACK;
1905 }
1906
1907 return true;
1908}
1909
1910static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1911 bool OnlyProfitable) {
1912 if (!OnlyProfitable) {
1913 NumLoopsInScop += Stats.NumLoops;
1914 MaxNumLoopsInScop =
1915 std::max(a: MaxNumLoopsInScop.getValue(), b: (uint64_t)Stats.NumLoops);
1916 if (Stats.MaxDepth == 0)
1917 NumScopsDepthZero++;
1918 else if (Stats.MaxDepth == 1)
1919 NumScopsDepthOne++;
1920 else if (Stats.MaxDepth == 2)
1921 NumScopsDepthTwo++;
1922 else if (Stats.MaxDepth == 3)
1923 NumScopsDepthThree++;
1924 else if (Stats.MaxDepth == 4)
1925 NumScopsDepthFour++;
1926 else if (Stats.MaxDepth == 5)
1927 NumScopsDepthFive++;
1928 else
1929 NumScopsDepthLarger++;
1930 } else {
1931 NumLoopsInProfScop += Stats.NumLoops;
1932 MaxNumLoopsInProfScop =
1933 std::max(a: MaxNumLoopsInProfScop.getValue(), b: (uint64_t)Stats.NumLoops);
1934 if (Stats.MaxDepth == 0)
1935 NumProfScopsDepthZero++;
1936 else if (Stats.MaxDepth == 1)
1937 NumProfScopsDepthOne++;
1938 else if (Stats.MaxDepth == 2)
1939 NumProfScopsDepthTwo++;
1940 else if (Stats.MaxDepth == 3)
1941 NumProfScopsDepthThree++;
1942 else if (Stats.MaxDepth == 4)
1943 NumProfScopsDepthFour++;
1944 else if (Stats.MaxDepth == 5)
1945 NumProfScopsDepthFive++;
1946 else
1947 NumProfScopsDepthLarger++;
1948 }
1949}
1950
1951ScopDetection::DetectionContext *
1952ScopDetection::getDetectionContext(const Region *R) const {
1953 auto DCMIt = DetectionContextMap.find(Val: getBBPairForRegion(R));
1954 if (DCMIt == DetectionContextMap.end())
1955 return nullptr;
1956 return DCMIt->second.get();
1957}
1958
1959const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1960 const DetectionContext *DC = getDetectionContext(R);
1961 return DC ? &DC->Log : nullptr;
1962}
1963
1964void ScopDetection::verifyRegion(const Region &R) {
1965 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1966
1967 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1968 isValidRegion(Context);
1969}
1970
1971void ScopDetection::verifyAnalysis() {
1972 if (!VerifyScops)
1973 return;
1974
1975 for (const Region *R : ValidRegions)
1976 verifyRegion(R: *R);
1977}
1978
1979bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1980 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1981 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1982 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1983 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1984 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1985 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1986
1987 Result = std::make_unique<ScopDetection>(args&: DT, args&: SE, args&: LI, args&: RI, args&: AA, args&: ORE);
1988 Result->detect(F);
1989 return false;
1990}
1991
1992void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
1993 AU.addRequired<LoopInfoWrapperPass>();
1994 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
1995 AU.addRequired<DominatorTreeWrapperPass>();
1996 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
1997 // We also need AA and RegionInfo when we are verifying analysis.
1998 AU.addRequiredTransitive<AAResultsWrapperPass>();
1999 AU.addRequiredTransitive<RegionInfoPass>();
2000 AU.setPreservesAll();
2001}
2002
2003void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
2004 for (const Region *R : Result->ValidRegions)
2005 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2006
2007 OS << "\n";
2008}
2009
2010ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
2011 // Disable runtime alias checks if we ignore aliasing all together.
2012 if (IgnoreAliasing)
2013 PollyUseRuntimeAliasChecks = false;
2014}
2015
2016ScopAnalysis::ScopAnalysis() {
2017 // Disable runtime alias checks if we ignore aliasing all together.
2018 if (IgnoreAliasing)
2019 PollyUseRuntimeAliasChecks = false;
2020}
2021
2022void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
2023
2024char ScopDetectionWrapperPass::ID;
2025
2026AnalysisKey ScopAnalysis::Key;
2027
2028ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2029 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
2030 auto &RI = FAM.getResult<RegionInfoAnalysis>(IR&: F);
2031 auto &AA = FAM.getResult<AAManager>(IR&: F);
2032 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
2033 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F);
2034 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2035
2036 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2037 Result.detect(F);
2038 return Result;
2039}
2040
2041PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2042 FunctionAnalysisManager &FAM) {
2043 OS << "Detected Scops in Function " << F.getName() << "\n";
2044 auto &SD = FAM.getResult<ScopAnalysis>(IR&: F);
2045 for (const Region *R : SD.ValidRegions)
2046 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2047
2048 OS << "\n";
2049 return PreservedAnalyses::all();
2050}
2051
2052Pass *polly::createScopDetectionWrapperPassPass() {
2053 return new ScopDetectionWrapperPass();
2054}
2055
2056INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2057 "Polly - Detect static control parts (SCoPs)", false,
2058 false);
2059INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2060INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2061INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2062INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2063INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2064INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2065INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2066 "Polly - Detect static control parts (SCoPs)", false, false)
2067
2068//===----------------------------------------------------------------------===//
2069
2070namespace {
2071/// Print result from ScopDetectionWrapperPass.
2072class ScopDetectionPrinterLegacyPass final : public FunctionPass {
2073public:
2074 static char ID;
2075
2076 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2077
2078 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2079 : FunctionPass(ID), OS(OS) {}
2080
2081 bool runOnFunction(Function &F) override {
2082 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2083
2084 OS << "Printing analysis '" << P.getPassName() << "' for function '"
2085 << F.getName() << "':\n";
2086 P.print(OS);
2087
2088 return false;
2089 }
2090
2091 void getAnalysisUsage(AnalysisUsage &AU) const override {
2092 FunctionPass::getAnalysisUsage(AU);
2093 AU.addRequired<ScopDetectionWrapperPass>();
2094 AU.setPreservesAll();
2095 }
2096
2097private:
2098 llvm::raw_ostream &OS;
2099};
2100
2101char ScopDetectionPrinterLegacyPass::ID = 0;
2102} // namespace
2103
2104Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2105 return new ScopDetectionPrinterLegacyPass(OS);
2106}
2107
2108INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2109 "Polly - Print static control parts (SCoPs)", false,
2110 false);
2111INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2112INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2113 "Polly - Print static control parts (SCoPs)", false, false)
2114

source code of polly/lib/Analysis/ScopDetection.cpp