1//===- ScopDetection.cpp - Detect Scops -----------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Detect the maximal Scops of a function.
10//
11// A static control part (Scop) is a subgraph of the control flow graph (CFG)
12// that only has statically known control flow and can therefore be described
13// within the polyhedral model.
14//
15// Every Scop fulfills these restrictions:
16//
17// * It is a single entry single exit region
18//
19// * Only affine linear bounds in the loops
20//
21// Every natural loop in a Scop must have a number of loop iterations that can
22// be described as an affine linear function in surrounding loop iterators or
23// parameters. (A parameter is a scalar that does not change its value during
24// execution of the Scop).
25//
26// * Only comparisons of affine linear expressions in conditions
27//
28// * All loops and conditions perfectly nested
29//
30// The control flow needs to be structured such that it could be written using
31// just 'for' and 'if' statements, without the need for any 'goto', 'break' or
32// 'continue'.
33//
34// * Side effect free functions call
35//
36// Function calls and intrinsics that do not have side effects (readnone)
37// or memory intrinsics (memset, memcpy, memmove) are allowed.
38//
39// The Scop detection finds the largest Scops by checking if the largest
40// region is a Scop. If this is not the case, its canonical subregions are
41// checked until a region is a Scop. It is now tried to extend this Scop by
42// creating a larger non canonical region.
43//
44//===----------------------------------------------------------------------===//
45
46#include "polly/ScopDetection.h"
47#include "polly/LinkAllPasses.h"
48#include "polly/Options.h"
49#include "polly/ScopDetectionDiagnostic.h"
50#include "polly/Support/SCEVValidator.h"
51#include "polly/Support/ScopHelper.h"
52#include "polly/Support/ScopLocation.h"
53#include "llvm/ADT/SmallPtrSet.h"
54#include "llvm/ADT/Statistic.h"
55#include "llvm/Analysis/AliasAnalysis.h"
56#include "llvm/Analysis/Delinearization.h"
57#include "llvm/Analysis/Loads.h"
58#include "llvm/Analysis/LoopInfo.h"
59#include "llvm/Analysis/OptimizationRemarkEmitter.h"
60#include "llvm/Analysis/RegionInfo.h"
61#include "llvm/Analysis/ScalarEvolution.h"
62#include "llvm/Analysis/ScalarEvolutionExpressions.h"
63#include "llvm/IR/BasicBlock.h"
64#include "llvm/IR/DebugLoc.h"
65#include "llvm/IR/DerivedTypes.h"
66#include "llvm/IR/DiagnosticInfo.h"
67#include "llvm/IR/DiagnosticPrinter.h"
68#include "llvm/IR/Dominators.h"
69#include "llvm/IR/Function.h"
70#include "llvm/IR/InstrTypes.h"
71#include "llvm/IR/Instruction.h"
72#include "llvm/IR/Instructions.h"
73#include "llvm/IR/IntrinsicInst.h"
74#include "llvm/IR/Metadata.h"
75#include "llvm/IR/Module.h"
76#include "llvm/IR/PassManager.h"
77#include "llvm/IR/Value.h"
78#include "llvm/InitializePasses.h"
79#include "llvm/Pass.h"
80#include "llvm/Support/Debug.h"
81#include "llvm/Support/Regex.h"
82#include "llvm/Support/raw_ostream.h"
83#include <algorithm>
84#include <cassert>
85#include <memory>
86#include <stack>
87#include <string>
88#include <utility>
89#include <vector>
90
91using namespace llvm;
92using namespace polly;
93
94#include "polly/Support/PollyDebug.h"
95#define DEBUG_TYPE "polly-detect"
96
97// This option is set to a very high value, as analyzing such loops increases
98// compile time on several cases. For experiments that enable this option,
99// a value of around 40 has been working to avoid run-time regressions with
100// Polly while still exposing interesting optimization opportunities.
101static cl::opt<int> ProfitabilityMinPerLoopInstructions(
102 "polly-detect-profitability-min-per-loop-insts",
103 cl::desc("The minimal number of per-loop instructions before a single loop "
104 "region is considered profitable"),
105 cl::Hidden, cl::ValueRequired, cl::init(Val: 100000000), cl::cat(PollyCategory));
106
107bool polly::PollyProcessUnprofitable;
108
109static cl::opt<bool, true> XPollyProcessUnprofitable(
110 "polly-process-unprofitable",
111 cl::desc(
112 "Process scops that are unlikely to benefit from Polly optimizations."),
113 cl::location(L&: PollyProcessUnprofitable), cl::cat(PollyCategory));
114
115static cl::list<std::string> OnlyFunctions(
116 "polly-only-func",
117 cl::desc("Only run on functions that match a regex. "
118 "Multiple regexes can be comma separated. "
119 "Scop detection will run on all functions that match "
120 "ANY of the regexes provided."),
121 cl::CommaSeparated, cl::cat(PollyCategory));
122
123static cl::list<std::string> IgnoredFunctions(
124 "polly-ignore-func",
125 cl::desc("Ignore functions that match a regex. "
126 "Multiple regexes can be comma separated. "
127 "Scop detection will ignore all functions that match "
128 "ANY of the regexes provided."),
129 cl::CommaSeparated, cl::cat(PollyCategory));
130
131bool polly::PollyAllowFullFunction;
132
133static cl::opt<bool, true>
134 XAllowFullFunction("polly-detect-full-functions",
135 cl::desc("Allow the detection of full functions"),
136 cl::location(L&: polly::PollyAllowFullFunction),
137 cl::init(Val: false), cl::cat(PollyCategory));
138
139static cl::opt<std::string> OnlyRegion(
140 "polly-only-region",
141 cl::desc("Only run on certain regions (The provided identifier must "
142 "appear in the name of the region's entry block"),
143 cl::value_desc("identifier"), cl::ValueRequired, cl::init(Val: ""),
144 cl::cat(PollyCategory));
145
146static cl::opt<bool>
147 IgnoreAliasing("polly-ignore-aliasing",
148 cl::desc("Ignore possible aliasing of the array bases"),
149 cl::Hidden, cl::cat(PollyCategory));
150
151bool polly::PollyAllowUnsignedOperations;
152
153static cl::opt<bool, true> XPollyAllowUnsignedOperations(
154 "polly-allow-unsigned-operations",
155 cl::desc("Allow unsigned operations such as comparisons or zero-extends."),
156 cl::location(L&: PollyAllowUnsignedOperations), cl::Hidden, cl::init(Val: true),
157 cl::cat(PollyCategory));
158
159bool polly::PollyUseRuntimeAliasChecks;
160
161static cl::opt<bool, true> XPollyUseRuntimeAliasChecks(
162 "polly-use-runtime-alias-checks",
163 cl::desc("Use runtime alias checks to resolve possible aliasing."),
164 cl::location(L&: PollyUseRuntimeAliasChecks), cl::Hidden, cl::init(Val: true),
165 cl::cat(PollyCategory));
166
167static cl::opt<bool>
168 ReportLevel("polly-report",
169 cl::desc("Print information about the activities of Polly"),
170 cl::cat(PollyCategory));
171
172static cl::opt<bool> AllowDifferentTypes(
173 "polly-allow-differing-element-types",
174 cl::desc("Allow different element types for array accesses"), cl::Hidden,
175 cl::init(Val: true), cl::cat(PollyCategory));
176
177static cl::opt<bool>
178 AllowNonAffine("polly-allow-nonaffine",
179 cl::desc("Allow non affine access functions in arrays"),
180 cl::Hidden, cl::cat(PollyCategory));
181
182static cl::opt<bool>
183 AllowModrefCall("polly-allow-modref-calls",
184 cl::desc("Allow functions with known modref behavior"),
185 cl::Hidden, cl::cat(PollyCategory));
186
187static cl::opt<bool> AllowNonAffineSubRegions(
188 "polly-allow-nonaffine-branches",
189 cl::desc("Allow non affine conditions for branches"), cl::Hidden,
190 cl::init(Val: true), cl::cat(PollyCategory));
191
192static cl::opt<bool>
193 AllowNonAffineSubLoops("polly-allow-nonaffine-loops",
194 cl::desc("Allow non affine conditions for loops"),
195 cl::Hidden, cl::cat(PollyCategory));
196
197static cl::opt<bool, true>
198 TrackFailures("polly-detect-track-failures",
199 cl::desc("Track failure strings in detecting scop regions"),
200 cl::location(L&: PollyTrackFailures), cl::Hidden, cl::init(Val: true),
201 cl::cat(PollyCategory));
202
203static cl::opt<bool> KeepGoing("polly-detect-keep-going",
204 cl::desc("Do not fail on the first error."),
205 cl::Hidden, cl::cat(PollyCategory));
206
207static cl::opt<bool, true>
208 PollyDelinearizeX("polly-delinearize",
209 cl::desc("Delinearize array access functions"),
210 cl::location(L&: PollyDelinearize), cl::Hidden,
211 cl::init(Val: true), cl::cat(PollyCategory));
212
213static cl::opt<bool>
214 VerifyScops("polly-detect-verify",
215 cl::desc("Verify the detected SCoPs after each transformation"),
216 cl::Hidden, cl::cat(PollyCategory));
217
218bool polly::PollyInvariantLoadHoisting;
219
220static cl::opt<bool, true>
221 XPollyInvariantLoadHoisting("polly-invariant-load-hoisting",
222 cl::desc("Hoist invariant loads."),
223 cl::location(L&: PollyInvariantLoadHoisting),
224 cl::Hidden, cl::cat(PollyCategory));
225
226static cl::opt<bool> PollyAllowErrorBlocks(
227 "polly-allow-error-blocks",
228 cl::desc("Allow to speculate on the execution of 'error blocks'."),
229 cl::Hidden, cl::init(Val: true), cl::cat(PollyCategory));
230
231/// The minimal trip count under which loops are considered unprofitable.
232static const unsigned MIN_LOOP_TRIP_COUNT = 8;
233
234bool polly::PollyTrackFailures = false;
235bool polly::PollyDelinearize = false;
236StringRef polly::PollySkipFnAttr = "polly.skip.fn";
237
238//===----------------------------------------------------------------------===//
239// Statistics.
240
241STATISTIC(NumScopRegions, "Number of scops");
242STATISTIC(NumLoopsInScop, "Number of loops in scops");
243STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0");
244STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1");
245STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2");
246STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3");
247STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4");
248STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5");
249STATISTIC(NumScopsDepthLarger,
250 "Number of scops with maximal loop depth 6 and larger");
251STATISTIC(NumProfScopRegions, "Number of scops (profitable scops only)");
252STATISTIC(NumLoopsInProfScop,
253 "Number of loops in scops (profitable scops only)");
254STATISTIC(NumLoopsOverall, "Number of total loops");
255STATISTIC(NumProfScopsDepthZero,
256 "Number of scops with maximal loop depth 0 (profitable scops only)");
257STATISTIC(NumProfScopsDepthOne,
258 "Number of scops with maximal loop depth 1 (profitable scops only)");
259STATISTIC(NumProfScopsDepthTwo,
260 "Number of scops with maximal loop depth 2 (profitable scops only)");
261STATISTIC(NumProfScopsDepthThree,
262 "Number of scops with maximal loop depth 3 (profitable scops only)");
263STATISTIC(NumProfScopsDepthFour,
264 "Number of scops with maximal loop depth 4 (profitable scops only)");
265STATISTIC(NumProfScopsDepthFive,
266 "Number of scops with maximal loop depth 5 (profitable scops only)");
267STATISTIC(NumProfScopsDepthLarger,
268 "Number of scops with maximal loop depth 6 and larger "
269 "(profitable scops only)");
270STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops");
271STATISTIC(MaxNumLoopsInProfScop,
272 "Maximal number of loops in scops (profitable scops only)");
273
274static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
275 bool OnlyProfitable);
276
277namespace {
278
279class DiagnosticScopFound final : public DiagnosticInfo {
280private:
281 static int PluginDiagnosticKind;
282
283 Function &F;
284 std::string FileName;
285 unsigned EntryLine, ExitLine;
286
287public:
288 DiagnosticScopFound(Function &F, std::string FileName, unsigned EntryLine,
289 unsigned ExitLine)
290 : DiagnosticInfo(PluginDiagnosticKind, DS_Note), F(F), FileName(FileName),
291 EntryLine(EntryLine), ExitLine(ExitLine) {}
292
293 void print(DiagnosticPrinter &DP) const override;
294
295 static bool classof(const DiagnosticInfo *DI) {
296 return DI->getKind() == PluginDiagnosticKind;
297 }
298};
299} // namespace
300
301int DiagnosticScopFound::PluginDiagnosticKind =
302 getNextAvailablePluginDiagnosticKind();
303
304void DiagnosticScopFound::print(DiagnosticPrinter &DP) const {
305 DP << "Polly detected an optimizable loop region (scop) in function '" << F
306 << "'\n";
307
308 if (FileName.empty()) {
309 DP << "Scop location is unknown. Compile with debug info "
310 "(-g) to get more precise information. ";
311 return;
312 }
313
314 DP << FileName << ":" << EntryLine << ": Start of scop\n";
315 DP << FileName << ":" << ExitLine << ": End of scop";
316}
317
318/// Check if a string matches any regex in a list of regexes.
319/// @param Str the input string to match against.
320/// @param RegexList a list of strings that are regular expressions.
321static bool doesStringMatchAnyRegex(StringRef Str,
322 const cl::list<std::string> &RegexList) {
323 for (auto RegexStr : RegexList) {
324 Regex R(RegexStr);
325
326 std::string Err;
327 if (!R.isValid(Error&: Err))
328 report_fatal_error(reason: Twine("invalid regex given as input to polly: ") + Err,
329 gen_crash_diag: true);
330
331 if (R.match(String: Str))
332 return true;
333 }
334 return false;
335}
336
337//===----------------------------------------------------------------------===//
338// ScopDetection.
339
340ScopDetection::ScopDetection(const DominatorTree &DT, ScalarEvolution &SE,
341 LoopInfo &LI, RegionInfo &RI, AAResults &AA,
342 OptimizationRemarkEmitter &ORE)
343 : DT(DT), SE(SE), LI(LI), RI(RI), AA(AA), ORE(ORE) {}
344
345void ScopDetection::detect(Function &F) {
346 assert(ValidRegions.empty() && "Detection must run only once");
347
348 if (!PollyProcessUnprofitable && LI.empty())
349 return;
350
351 Region *TopRegion = RI.getTopLevelRegion();
352
353 if (!OnlyFunctions.empty() &&
354 !doesStringMatchAnyRegex(Str: F.getName(), RegexList: OnlyFunctions))
355 return;
356
357 if (doesStringMatchAnyRegex(Str: F.getName(), RegexList: IgnoredFunctions))
358 return;
359
360 if (!isValidFunction(F))
361 return;
362
363 findScops(R&: *TopRegion);
364
365 NumScopRegions += ValidRegions.size();
366
367 // Prune non-profitable regions.
368 for (auto &DIt : DetectionContextMap) {
369 DetectionContext &DC = *DIt.getSecond();
370 if (DC.Log.hasErrors())
371 continue;
372 if (!ValidRegions.count(key: &DC.CurRegion))
373 continue;
374 LoopStats Stats = countBeneficialLoops(R: &DC.CurRegion, SE, LI, MinProfitableTrips: 0);
375 updateLoopCountStatistic(Stats, OnlyProfitable: false /* OnlyProfitable */);
376 if (isProfitableRegion(Context&: DC)) {
377 updateLoopCountStatistic(Stats, OnlyProfitable: true /* OnlyProfitable */);
378 continue;
379 }
380
381 ValidRegions.remove(X: &DC.CurRegion);
382 }
383
384 NumProfScopRegions += ValidRegions.size();
385 NumLoopsOverall += countBeneficialLoops(R: TopRegion, SE, LI, MinProfitableTrips: 0).NumLoops;
386
387 // Only makes sense when we tracked errors.
388 if (PollyTrackFailures)
389 emitMissedRemarks(F);
390
391 if (ReportLevel)
392 printLocations(F);
393
394 assert(ValidRegions.size() <= DetectionContextMap.size() &&
395 "Cached more results than valid regions");
396}
397
398template <class RR, typename... Args>
399inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
400 Args &&...Arguments) const {
401 if (!Context.Verifying) {
402 RejectLog &Log = Context.Log;
403 std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);
404 Context.IsInvalid = true;
405
406 // Log even if PollyTrackFailures is false, the log entries are also used in
407 // canUseISLTripCount().
408 Log.report(Reject: RejectReason);
409
410 POLLY_DEBUG(dbgs() << RejectReason->getMessage());
411 POLLY_DEBUG(dbgs() << "\n");
412 } else {
413 assert(!Assert && "Verification of detected scop failed");
414 }
415
416 return false;
417}
418
419bool ScopDetection::isMaxRegionInScop(const Region &R, bool Verify) {
420 if (!ValidRegions.count(key: &R))
421 return false;
422
423 if (Verify) {
424 BBPair P = getBBPairForRegion(R: &R);
425 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
426
427 // Free previous DetectionContext for the region and create and verify a new
428 // one. Be sure that the DetectionContext is not still used by a ScopInfop.
429 // Due to changes but CodeGeneration of another Scop, the Region object and
430 // the BBPair might not match anymore.
431 Entry = std::make_unique<DetectionContext>(args&: const_cast<Region &>(R), args&: AA,
432 /*Verifying=*/args: false);
433
434 return isValidRegion(Context&: *Entry);
435 }
436
437 return true;
438}
439
440std::string ScopDetection::regionIsInvalidBecause(const Region *R) const {
441 // Get the first error we found. Even in keep-going mode, this is the first
442 // reason that caused the candidate to be rejected.
443 auto *Log = lookupRejectionLog(R);
444
445 // This can happen when we marked a region invalid, but didn't track
446 // an error for it.
447 if (!Log || !Log->hasErrors())
448 return "";
449
450 RejectReasonPtr RR = *Log->begin();
451 return RR->getMessage();
452}
453
454bool ScopDetection::addOverApproximatedRegion(Region *AR,
455 DetectionContext &Context) const {
456 // If we already know about Ar we can exit.
457 if (!Context.NonAffineSubRegionSet.insert(X: AR))
458 return true;
459
460 // All loops in the region have to be overapproximated too if there
461 // are accesses that depend on the iteration count.
462
463 for (BasicBlock *BB : AR->blocks()) {
464 Loop *L = LI.getLoopFor(BB);
465 if (AR->contains(L))
466 Context.BoxedLoopsSet.insert(X: L);
467 }
468
469 return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty());
470}
471
472bool ScopDetection::onlyValidRequiredInvariantLoads(
473 InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
474 Region &CurRegion = Context.CurRegion;
475 const DataLayout &DL = CurRegion.getEntry()->getModule()->getDataLayout();
476
477 if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
478 return false;
479
480 for (LoadInst *Load : RequiredILS) {
481 // If we already know a load has been accepted as required invariant, we
482 // already run the validation below once and consequently don't need to
483 // run it again. Hence, we return early. For certain test cases (e.g.,
484 // COSMO this avoids us spending 50% of scop-detection time in this
485 // very function (and its children).
486 if (Context.RequiredILS.count(key: Load))
487 continue;
488 if (!isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
489 return false;
490
491 for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
492 if (isSafeToLoadUnconditionally(V: Load->getPointerOperand(),
493 Ty: Load->getType(), Alignment: Load->getAlign(), DL,
494 ScanFrom: nullptr))
495 continue;
496
497 if (NonAffineRegion->contains(Inst: Load) &&
498 Load->getParent() != NonAffineRegion->getEntry())
499 return false;
500 }
501 }
502
503 Context.RequiredILS.insert_range(R&: RequiredILS);
504
505 return true;
506}
507
508bool ScopDetection::involvesMultiplePtrs(const SCEV *S0, const SCEV *S1,
509 Loop *Scope) const {
510 SetVector<Value *> Values;
511 findValues(Expr: S0, SE, Values);
512 if (S1)
513 findValues(Expr: S1, SE, Values);
514
515 SmallPtrSet<Value *, 8> PtrVals;
516 for (auto *V : Values) {
517 if (auto *P2I = dyn_cast<PtrToIntInst>(Val: V))
518 V = P2I->getOperand(i_nocapture: 0);
519
520 if (!V->getType()->isPointerTy())
521 continue;
522
523 const SCEV *PtrSCEV = SE.getSCEVAtScope(V, L: Scope);
524 if (isa<SCEVConstant>(Val: PtrSCEV))
525 continue;
526
527 auto *BasePtr = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: PtrSCEV));
528 if (!BasePtr)
529 return true;
530
531 Value *BasePtrVal = BasePtr->getValue();
532 if (PtrVals.insert(Ptr: BasePtrVal).second) {
533 for (auto *PtrVal : PtrVals)
534 if (PtrVal != BasePtrVal && !AA.isNoAlias(V1: PtrVal, V2: BasePtrVal))
535 return true;
536 }
537 }
538
539 return false;
540}
541
542bool ScopDetection::isAffine(const SCEV *S, Loop *Scope,
543 DetectionContext &Context) const {
544 InvariantLoadsSetTy AccessILS;
545 if (!isAffineExpr(R: &Context.CurRegion, Scope, Expression: S, SE, ILS: &AccessILS))
546 return false;
547
548 if (!onlyValidRequiredInvariantLoads(RequiredILS&: AccessILS, Context))
549 return false;
550
551 return true;
552}
553
554bool ScopDetection::isValidSwitch(BasicBlock &BB, SwitchInst *SI,
555 Value *Condition, bool IsLoopBranch,
556 DetectionContext &Context) const {
557 Loop *L = LI.getLoopFor(BB: &BB);
558 const SCEV *ConditionSCEV = SE.getSCEVAtScope(V: Condition, L);
559
560 if (IsLoopBranch && L->isLoopLatch(BB: &BB))
561 return false;
562
563 // Check for invalid usage of different pointers in one expression.
564 if (involvesMultiplePtrs(S0: ConditionSCEV, S1: nullptr, Scope: L))
565 return false;
566
567 if (isAffine(S: ConditionSCEV, Scope: L, Context))
568 return true;
569
570 if (AllowNonAffineSubRegions &&
571 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
572 return true;
573
574 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB,
575 Arguments&: ConditionSCEV, Arguments&: ConditionSCEV, Arguments&: SI);
576}
577
578bool ScopDetection::isValidBranch(BasicBlock &BB, BranchInst *BI,
579 Value *Condition, bool IsLoopBranch,
580 DetectionContext &Context) {
581 // Constant integer conditions are always affine.
582 if (isa<ConstantInt>(Val: Condition))
583 return true;
584
585 if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val: Condition)) {
586 auto Opcode = BinOp->getOpcode();
587 if (Opcode == Instruction::And || Opcode == Instruction::Or) {
588 Value *Op0 = BinOp->getOperand(i_nocapture: 0);
589 Value *Op1 = BinOp->getOperand(i_nocapture: 1);
590 return isValidBranch(BB, BI, Condition: Op0, IsLoopBranch, Context) &&
591 isValidBranch(BB, BI, Condition: Op1, IsLoopBranch, Context);
592 }
593 }
594
595 if (auto PHI = dyn_cast<PHINode>(Val: Condition)) {
596 auto *Unique = dyn_cast_or_null<ConstantInt>(
597 Val: getUniqueNonErrorValue(PHI, R: &Context.CurRegion, SD: this));
598 if (Unique && (Unique->isZero() || Unique->isOne()))
599 return true;
600 }
601
602 if (auto Load = dyn_cast<LoadInst>(Val: Condition))
603 if (!IsLoopBranch && Context.CurRegion.contains(Inst: Load)) {
604 Context.RequiredILS.insert(X: Load);
605 return true;
606 }
607
608 // Non constant conditions of branches need to be ICmpInst.
609 if (!isa<ICmpInst>(Val: Condition)) {
610 if (!IsLoopBranch && AllowNonAffineSubRegions &&
611 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
612 return true;
613 return invalid<ReportInvalidCond>(Context, /*Assert=*/true, Arguments&: BI, Arguments: &BB);
614 }
615
616 ICmpInst *ICmp = cast<ICmpInst>(Val: Condition);
617
618 // Are both operands of the ICmp affine?
619 if (isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 0)) ||
620 isa<UndefValue>(Val: ICmp->getOperand(i_nocapture: 1)))
621 return invalid<ReportUndefOperand>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: ICmp);
622
623 Loop *L = LI.getLoopFor(BB: &BB);
624 const SCEV *LHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 0), L);
625 const SCEV *RHS = SE.getSCEVAtScope(V: ICmp->getOperand(i_nocapture: 1), L);
626
627 LHS = tryForwardThroughPHI(Expr: LHS, R&: Context.CurRegion, SE, SD: this);
628 RHS = tryForwardThroughPHI(Expr: RHS, R&: Context.CurRegion, SE, SD: this);
629
630 // If unsigned operations are not allowed try to approximate the region.
631 if (ICmp->isUnsigned() && !PollyAllowUnsignedOperations)
632 return !IsLoopBranch && AllowNonAffineSubRegions &&
633 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context);
634
635 // Check for invalid usage of different pointers in one expression.
636 if (ICmp->isEquality() && involvesMultiplePtrs(S0: LHS, S1: nullptr, Scope: L) &&
637 involvesMultiplePtrs(S0: RHS, S1: nullptr, Scope: L))
638 return false;
639
640 // Check for invalid usage of different pointers in a relational comparison.
641 if (ICmp->isRelational() && involvesMultiplePtrs(S0: LHS, S1: RHS, Scope: L))
642 return false;
643
644 if (isAffine(S: LHS, Scope: L, Context) && isAffine(S: RHS, Scope: L, Context))
645 return true;
646
647 if (!IsLoopBranch && AllowNonAffineSubRegions &&
648 addOverApproximatedRegion(AR: RI.getRegionFor(BB: &BB), Context))
649 return true;
650
651 if (IsLoopBranch)
652 return false;
653
654 return invalid<ReportNonAffBranch>(Context, /*Assert=*/true, Arguments: &BB, Arguments&: LHS, Arguments&: RHS,
655 Arguments&: ICmp);
656}
657
658bool ScopDetection::isValidCFG(BasicBlock &BB, bool IsLoopBranch,
659 bool AllowUnreachable,
660 DetectionContext &Context) {
661 Region &CurRegion = Context.CurRegion;
662
663 Instruction *TI = BB.getTerminator();
664
665 if (AllowUnreachable && isa<UnreachableInst>(Val: TI))
666 return true;
667
668 // Return instructions are only valid if the region is the top level region.
669 if (isa<ReturnInst>(Val: TI) && CurRegion.isTopLevelRegion())
670 return true;
671
672 Value *Condition = getConditionFromTerminator(TI);
673
674 if (!Condition)
675 return invalid<ReportInvalidTerminator>(Context, /*Assert=*/true, Arguments: &BB);
676
677 // UndefValue is not allowed as condition.
678 if (isa<UndefValue>(Val: Condition))
679 return invalid<ReportUndefCond>(Context, /*Assert=*/true, Arguments&: TI, Arguments: &BB);
680
681 if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
682 return isValidBranch(BB, BI, Condition, IsLoopBranch, Context);
683
684 SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI);
685 assert(SI && "Terminator was neither branch nor switch");
686
687 return isValidSwitch(BB, SI, Condition, IsLoopBranch, Context);
688}
689
690bool ScopDetection::isValidCallInst(CallInst &CI,
691 DetectionContext &Context) const {
692 if (CI.doesNotReturn())
693 return false;
694
695 if (CI.doesNotAccessMemory())
696 return true;
697
698 if (auto *II = dyn_cast<IntrinsicInst>(Val: &CI))
699 if (isValidIntrinsicInst(II&: *II, Context))
700 return true;
701
702 Function *CalledFunction = CI.getCalledFunction();
703
704 // Indirect calls are not supported.
705 if (CalledFunction == nullptr)
706 return false;
707
708 if (isDebugCall(Inst: &CI)) {
709 POLLY_DEBUG(dbgs() << "Allow call to debug function: "
710 << CalledFunction->getName() << '\n');
711 return true;
712 }
713
714 if (AllowModrefCall) {
715 MemoryEffects ME = AA.getMemoryEffects(F: CalledFunction);
716 if (ME.onlyAccessesArgPointees()) {
717 for (const auto &Arg : CI.args()) {
718 if (!Arg->getType()->isPointerTy())
719 continue;
720
721 // Bail if a pointer argument has a base address not known to
722 // ScalarEvolution. Note that a zero pointer is acceptable.
723 const SCEV *ArgSCEV =
724 SE.getSCEVAtScope(V: Arg, L: LI.getLoopFor(BB: CI.getParent()));
725 if (ArgSCEV->isZero())
726 continue;
727
728 auto *BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: ArgSCEV));
729 if (!BP)
730 return false;
731
732 // Implicitly disable delinearization since we have an unknown
733 // accesses with an unknown access function.
734 Context.HasUnknownAccess = true;
735 }
736
737 // Explicitly use addUnknown so we don't put a loop-variant
738 // pointer into the alias set.
739 Context.AST.addUnknown(I: &CI);
740 return true;
741 }
742
743 if (ME.onlyReadsMemory()) {
744 // Implicitly disable delinearization since we have an unknown
745 // accesses with an unknown access function.
746 Context.HasUnknownAccess = true;
747 // Explicitly use addUnknown so we don't put a loop-variant
748 // pointer into the alias set.
749 Context.AST.addUnknown(I: &CI);
750 return true;
751 }
752 return false;
753 }
754
755 return false;
756}
757
758bool ScopDetection::isValidIntrinsicInst(IntrinsicInst &II,
759 DetectionContext &Context) const {
760 if (isIgnoredIntrinsic(V: &II))
761 return true;
762
763 // The closest loop surrounding the call instruction.
764 Loop *L = LI.getLoopFor(BB: II.getParent());
765
766 // The access function and base pointer for memory intrinsics.
767 const SCEV *AF;
768 const SCEVUnknown *BP;
769
770 switch (II.getIntrinsicID()) {
771 // Memory intrinsics that can be represented are supported.
772 case Intrinsic::memmove:
773 case Intrinsic::memcpy:
774 AF = SE.getSCEVAtScope(V: cast<MemTransferInst>(Val&: II).getSource(), L);
775 if (!AF->isZero()) {
776 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
777 // Bail if the source pointer is not valid.
778 if (!isValidAccess(Inst: &II, AF, BP, Context))
779 return false;
780 }
781 [[fallthrough]];
782 case Intrinsic::memset:
783 AF = SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getDest(), L);
784 if (!AF->isZero()) {
785 BP = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AF));
786 // Bail if the destination pointer is not valid.
787 if (!isValidAccess(Inst: &II, AF, BP, Context))
788 return false;
789 }
790
791 // Bail if the length is not affine.
792 if (!isAffine(S: SE.getSCEVAtScope(V: cast<MemIntrinsic>(Val&: II).getLength(), L), Scope: L,
793 Context))
794 return false;
795
796 return true;
797 default:
798 break;
799 }
800
801 return false;
802}
803
804bool ScopDetection::isInvariant(Value &Val, const Region &Reg,
805 DetectionContext &Ctx) const {
806 // A reference to function argument or constant value is invariant.
807 if (isa<Argument>(Val) || isa<Constant>(Val))
808 return true;
809
810 Instruction *I = dyn_cast<Instruction>(Val: &Val);
811 if (!I)
812 return false;
813
814 if (!Reg.contains(Inst: I))
815 return true;
816
817 // Loads within the SCoP may read arbitrary values, need to hoist them. If it
818 // is not hoistable, it will be rejected later, but here we assume it is and
819 // that makes the value invariant.
820 if (auto LI = dyn_cast<LoadInst>(Val: I)) {
821 Ctx.RequiredILS.insert(X: LI);
822 return true;
823 }
824
825 return false;
826}
827
828namespace {
829
830/// Remove smax of smax(0, size) expressions from a SCEV expression and
831/// register the '...' components.
832///
833/// Array access expressions as they are generated by GFortran contain smax(0,
834/// size) expressions that confuse the 'normal' delinearization algorithm.
835/// However, if we extract such expressions before the normal delinearization
836/// takes place they can actually help to identify array size expressions in
837/// Fortran accesses. For the subsequently following delinearization the smax(0,
838/// size) component can be replaced by just 'size'. This is correct as we will
839/// always add and verify the assumption that for all subscript expressions
840/// 'exp' the inequality 0 <= exp < size holds. Hence, we will also verify
841/// that 0 <= size, which means smax(0, size) == size.
842class SCEVRemoveMax final : public SCEVRewriteVisitor<SCEVRemoveMax> {
843public:
844 SCEVRemoveMax(ScalarEvolution &SE, std::vector<const SCEV *> *Terms)
845 : SCEVRewriteVisitor(SE), Terms(Terms) {}
846
847 static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
848 std::vector<const SCEV *> *Terms = nullptr) {
849 SCEVRemoveMax Rewriter(SE, Terms);
850 return Rewriter.visit(S: Scev);
851 }
852
853 const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
854 if ((Expr->getNumOperands() == 2) && Expr->getOperand(i: 0)->isZero()) {
855 auto Res = visit(S: Expr->getOperand(i: 1));
856 if (Terms)
857 (*Terms).push_back(x: Res);
858 return Res;
859 }
860
861 return Expr;
862 }
863
864private:
865 std::vector<const SCEV *> *Terms;
866};
867} // namespace
868
869SmallVector<const SCEV *, 4>
870ScopDetection::getDelinearizationTerms(DetectionContext &Context,
871 const SCEVUnknown *BasePointer) const {
872 SmallVector<const SCEV *, 4> Terms;
873 for (const auto &Pair : Context.Accesses[BasePointer]) {
874 std::vector<const SCEV *> MaxTerms;
875 SCEVRemoveMax::rewrite(Scev: Pair.second, SE, Terms: &MaxTerms);
876 if (!MaxTerms.empty()) {
877 Terms.insert(I: Terms.begin(), From: MaxTerms.begin(), To: MaxTerms.end());
878 continue;
879 }
880 // In case the outermost expression is a plain add, we check if any of its
881 // terms has the form 4 * %inst * %param * %param ..., aka a term that
882 // contains a product between a parameter and an instruction that is
883 // inside the scop. Such instructions, if allowed at all, are instructions
884 // SCEV can not represent, but Polly is still looking through. As a
885 // result, these instructions can depend on induction variables and are
886 // most likely no array sizes. However, terms that are multiplied with
887 // them are likely candidates for array sizes.
888 if (auto *AF = dyn_cast<SCEVAddExpr>(Val: Pair.second)) {
889 for (auto Op : AF->operands()) {
890 if (auto *AF2 = dyn_cast<SCEVAddRecExpr>(Val: Op))
891 collectParametricTerms(SE, Expr: AF2, Terms);
892 if (auto *AF2 = dyn_cast<SCEVMulExpr>(Val: Op)) {
893 SmallVector<const SCEV *, 0> Operands;
894
895 for (const SCEV *MulOp : AF2->operands()) {
896 if (auto *Const = dyn_cast<SCEVConstant>(Val: MulOp))
897 Operands.push_back(Elt: Const);
898 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: MulOp)) {
899 if (auto *Inst = dyn_cast<Instruction>(Val: Unknown->getValue())) {
900 if (!Context.CurRegion.contains(Inst))
901 Operands.push_back(Elt: MulOp);
902
903 } else {
904 Operands.push_back(Elt: MulOp);
905 }
906 }
907 }
908 if (Operands.size())
909 Terms.push_back(Elt: SE.getMulExpr(Ops&: Operands));
910 }
911 }
912 }
913 if (Terms.empty())
914 collectParametricTerms(SE, Expr: Pair.second, Terms);
915 }
916 return Terms;
917}
918
919bool ScopDetection::hasValidArraySizes(DetectionContext &Context,
920 SmallVectorImpl<const SCEV *> &Sizes,
921 const SCEVUnknown *BasePointer,
922 Loop *Scope) const {
923 // If no sizes were found, all sizes are trivially valid. We allow this case
924 // to make it possible to pass known-affine accesses to the delinearization to
925 // try to recover some interesting multi-dimensional accesses, but to still
926 // allow the already known to be affine access in case the delinearization
927 // fails. In such situations, the delinearization will just return a Sizes
928 // array of size zero.
929 if (Sizes.size() == 0)
930 return true;
931
932 Value *BaseValue = BasePointer->getValue();
933 Region &CurRegion = Context.CurRegion;
934 for (const SCEV *DelinearizedSize : Sizes) {
935 // Don't pass down the scope to isAfffine; array dimensions must be
936 // invariant across the entire scop.
937 if (!isAffine(S: DelinearizedSize, Scope: nullptr, Context)) {
938 Sizes.clear();
939 break;
940 }
941 if (auto *Unknown = dyn_cast<SCEVUnknown>(Val: DelinearizedSize)) {
942 auto *V = dyn_cast<Value>(Val: Unknown->getValue());
943 if (auto *Load = dyn_cast<LoadInst>(Val: V)) {
944 if (Context.CurRegion.contains(Inst: Load) &&
945 isHoistableLoad(LInst: Load, R&: CurRegion, LI, SE, DT, KnownInvariantLoads: Context.RequiredILS))
946 Context.RequiredILS.insert(X: Load);
947 continue;
948 }
949 }
950 if (hasScalarDepsInsideRegion(Expr: DelinearizedSize, R: &CurRegion, Scope, AllowLoops: false,
951 ILS: Context.RequiredILS))
952 return invalid<ReportNonAffineAccess>(
953 Context, /*Assert=*/true, Arguments&: DelinearizedSize,
954 Arguments&: Context.Accesses[BasePointer].front().first, Arguments&: BaseValue);
955 }
956
957 // No array shape derived.
958 if (Sizes.empty()) {
959 if (AllowNonAffine)
960 return true;
961
962 for (const auto &Pair : Context.Accesses[BasePointer]) {
963 const Instruction *Insn = Pair.first;
964 const SCEV *AF = Pair.second;
965
966 if (!isAffine(S: AF, Scope, Context)) {
967 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Insn,
968 Arguments&: BaseValue);
969 if (!KeepGoing)
970 return false;
971 }
972 }
973 return false;
974 }
975 return true;
976}
977
978// We first store the resulting memory accesses in TempMemoryAccesses. Only
979// if the access functions for all memory accesses have been successfully
980// delinearized we continue. Otherwise, we either report a failure or, if
981// non-affine accesses are allowed, we drop the information. In case the
982// information is dropped the memory accesses need to be overapproximated
983// when translated to a polyhedral representation.
984bool ScopDetection::computeAccessFunctions(
985 DetectionContext &Context, const SCEVUnknown *BasePointer,
986 std::shared_ptr<ArrayShape> Shape) const {
987 Value *BaseValue = BasePointer->getValue();
988 bool BasePtrHasNonAffine = false;
989 MapInsnToMemAcc TempMemoryAccesses;
990 for (const auto &Pair : Context.Accesses[BasePointer]) {
991 const Instruction *Insn = Pair.first;
992 auto *AF = Pair.second;
993 AF = SCEVRemoveMax::rewrite(Scev: AF, SE);
994 bool IsNonAffine = false;
995 TempMemoryAccesses.insert(x: std::make_pair(x&: Insn, y: MemAcc(Insn, Shape)));
996 MemAcc *Acc = &TempMemoryAccesses.find(x: Insn)->second;
997 auto *Scope = LI.getLoopFor(BB: Insn->getParent());
998
999 if (!AF) {
1000 if (isAffine(S: Pair.second, Scope, Context))
1001 Acc->DelinearizedSubscripts.push_back(Elt: Pair.second);
1002 else
1003 IsNonAffine = true;
1004 } else {
1005 if (Shape->DelinearizedSizes.size() == 0) {
1006 Acc->DelinearizedSubscripts.push_back(Elt: AF);
1007 } else {
1008 llvm::computeAccessFunctions(SE, Expr: AF, Subscripts&: Acc->DelinearizedSubscripts,
1009 Sizes&: Shape->DelinearizedSizes);
1010 if (Acc->DelinearizedSubscripts.size() == 0)
1011 IsNonAffine = true;
1012 }
1013 for (const SCEV *S : Acc->DelinearizedSubscripts)
1014 if (!isAffine(S, Scope, Context))
1015 IsNonAffine = true;
1016 }
1017
1018 // (Possibly) report non affine access
1019 if (IsNonAffine) {
1020 BasePtrHasNonAffine = true;
1021 if (!AllowNonAffine) {
1022 invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments: Pair.second,
1023 Arguments&: Insn, Arguments&: BaseValue);
1024 if (!KeepGoing)
1025 return false;
1026 }
1027 }
1028 }
1029
1030 if (!BasePtrHasNonAffine)
1031 Context.InsnToMemAcc.insert(first: TempMemoryAccesses.begin(),
1032 last: TempMemoryAccesses.end());
1033
1034 return true;
1035}
1036
1037bool ScopDetection::hasBaseAffineAccesses(DetectionContext &Context,
1038 const SCEVUnknown *BasePointer,
1039 Loop *Scope) const {
1040 auto Shape = std::shared_ptr<ArrayShape>(new ArrayShape(BasePointer));
1041
1042 auto Terms = getDelinearizationTerms(Context, BasePointer);
1043
1044 findArrayDimensions(SE, Terms, Sizes&: Shape->DelinearizedSizes,
1045 ElementSize: Context.ElementSize[BasePointer]);
1046
1047 if (!hasValidArraySizes(Context, Sizes&: Shape->DelinearizedSizes, BasePointer,
1048 Scope))
1049 return false;
1050
1051 return computeAccessFunctions(Context, BasePointer, Shape);
1052}
1053
1054bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const {
1055 // TODO: If we have an unknown access and other non-affine accesses we do
1056 // not try to delinearize them for now.
1057 if (Context.HasUnknownAccess && !Context.NonAffineAccesses.empty())
1058 return AllowNonAffine;
1059
1060 for (auto &Pair : Context.NonAffineAccesses) {
1061 auto *BasePointer = Pair.first;
1062 auto *Scope = Pair.second;
1063 if (!hasBaseAffineAccesses(Context, BasePointer, Scope)) {
1064 Context.IsInvalid = true;
1065 if (!KeepGoing)
1066 return false;
1067 }
1068 }
1069 return true;
1070}
1071
1072bool ScopDetection::isValidAccess(Instruction *Inst, const SCEV *AF,
1073 const SCEVUnknown *BP,
1074 DetectionContext &Context) const {
1075
1076 if (!BP)
1077 return invalid<ReportNoBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1078
1079 auto *BV = BP->getValue();
1080 if (isa<UndefValue>(Val: BV))
1081 return invalid<ReportUndefBasePtr>(Context, /*Assert=*/true, Arguments&: Inst);
1082
1083 // FIXME: Think about allowing IntToPtrInst
1084 if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(Val: BV))
1085 return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Arguments&: Inst);
1086
1087 // Check that the base address of the access is invariant in the current
1088 // region.
1089 if (!isInvariant(Val&: *BV, Reg: Context.CurRegion, Ctx&: Context))
1090 return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, Arguments&: BV, Arguments&: Inst);
1091
1092 AF = SE.getMinusSCEV(LHS: AF, RHS: BP);
1093
1094 const SCEV *Size;
1095 if (!isa<MemIntrinsic>(Val: Inst)) {
1096 Size = SE.getElementSize(Inst);
1097 } else {
1098 auto *SizeTy =
1099 SE.getEffectiveSCEVType(Ty: PointerType::getUnqual(C&: SE.getContext()));
1100 Size = SE.getConstant(Ty: SizeTy, V: 8);
1101 }
1102
1103 if (Context.ElementSize[BP]) {
1104 if (!AllowDifferentTypes && Context.ElementSize[BP] != Size)
1105 return invalid<ReportDifferentArrayElementSize>(Context, /*Assert=*/true,
1106 Arguments&: Inst, Arguments&: BV);
1107
1108 Context.ElementSize[BP] = SE.getSMinExpr(LHS: Size, RHS: Context.ElementSize[BP]);
1109 } else {
1110 Context.ElementSize[BP] = Size;
1111 }
1112
1113 bool IsVariantInNonAffineLoop = false;
1114 SetVector<const Loop *> Loops;
1115 findLoops(Expr: AF, Loops);
1116 for (const Loop *L : Loops)
1117 if (Context.BoxedLoopsSet.count(key: L))
1118 IsVariantInNonAffineLoop = true;
1119
1120 auto *Scope = LI.getLoopFor(BB: Inst->getParent());
1121 bool IsAffine = !IsVariantInNonAffineLoop && isAffine(S: AF, Scope, Context);
1122 // Do not try to delinearize memory intrinsics and force them to be affine.
1123 if (isa<MemIntrinsic>(Val: Inst) && !IsAffine) {
1124 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1125 Arguments&: BV);
1126 } else if (PollyDelinearize && !IsVariantInNonAffineLoop) {
1127 Context.Accesses[BP].push_back(x: {Inst, AF});
1128
1129 if (!IsAffine)
1130 Context.NonAffineAccesses.insert(
1131 X: std::make_pair(x&: BP, y: LI.getLoopFor(BB: Inst->getParent())));
1132 } else if (!AllowNonAffine && !IsAffine) {
1133 return invalid<ReportNonAffineAccess>(Context, /*Assert=*/true, Arguments&: AF, Arguments&: Inst,
1134 Arguments&: BV);
1135 }
1136
1137 if (IgnoreAliasing)
1138 return true;
1139
1140 // Check if the base pointer of the memory access does alias with
1141 // any other pointer. This cannot be handled at the moment.
1142 AAMDNodes AATags = Inst->getAAMetadata();
1143 AliasSet &AS = Context.AST.getAliasSetFor(
1144 MemLoc: MemoryLocation::getBeforeOrAfter(Ptr: BP->getValue(), AATags));
1145
1146 if (!AS.isMustAlias()) {
1147 if (PollyUseRuntimeAliasChecks) {
1148 bool CanBuildRunTimeCheck = true;
1149 // The run-time alias check places code that involves the base pointer at
1150 // the beginning of the SCoP. This breaks if the base pointer is defined
1151 // inside the scop. Hence, we can only create a run-time check if we are
1152 // sure the base pointer is not an instruction defined inside the scop.
1153 // However, we can ignore loads that will be hoisted.
1154
1155 auto ASPointers = AS.getPointers();
1156
1157 InvariantLoadsSetTy VariantLS, InvariantLS;
1158 // In order to detect loads which are dependent on other invariant loads
1159 // as invariant, we use fixed-point iteration method here i.e we iterate
1160 // over the alias set for arbitrary number of times until it is safe to
1161 // assume that all the invariant loads have been detected
1162 while (true) {
1163 const unsigned int VariantSize = VariantLS.size(),
1164 InvariantSize = InvariantLS.size();
1165
1166 for (const Value *Ptr : ASPointers) {
1167 Instruction *Inst = dyn_cast<Instruction>(Val: const_cast<Value *>(Ptr));
1168 if (Inst && Context.CurRegion.contains(Inst)) {
1169 auto *Load = dyn_cast<LoadInst>(Val: Inst);
1170 if (Load && InvariantLS.count(key: Load))
1171 continue;
1172 if (Load && isHoistableLoad(LInst: Load, R&: Context.CurRegion, LI, SE, DT,
1173 KnownInvariantLoads: InvariantLS)) {
1174 if (VariantLS.count(key: Load))
1175 VariantLS.remove(X: Load);
1176 Context.RequiredILS.insert(X: Load);
1177 InvariantLS.insert(X: Load);
1178 } else {
1179 CanBuildRunTimeCheck = false;
1180 VariantLS.insert(X: Load);
1181 }
1182 }
1183 }
1184
1185 if (InvariantSize == InvariantLS.size() &&
1186 VariantSize == VariantLS.size())
1187 break;
1188 }
1189
1190 if (CanBuildRunTimeCheck)
1191 return true;
1192 }
1193 return invalid<ReportAlias>(Context, /*Assert=*/true, Arguments&: Inst, Arguments&: AS);
1194 }
1195
1196 return true;
1197}
1198
1199bool ScopDetection::isValidMemoryAccess(MemAccInst Inst,
1200 DetectionContext &Context) const {
1201 Value *Ptr = Inst.getPointerOperand();
1202 Loop *L = LI.getLoopFor(BB: Inst->getParent());
1203 const SCEV *AccessFunction = SE.getSCEVAtScope(V: Ptr, L);
1204 const SCEVUnknown *BasePointer;
1205
1206 BasePointer = dyn_cast<SCEVUnknown>(Val: SE.getPointerBase(V: AccessFunction));
1207
1208 return isValidAccess(Inst, AF: AccessFunction, BP: BasePointer, Context);
1209}
1210
1211bool ScopDetection::isValidInstruction(Instruction &Inst,
1212 DetectionContext &Context) {
1213 for (auto &Op : Inst.operands()) {
1214 auto *OpInst = dyn_cast<Instruction>(Val: &Op);
1215
1216 if (!OpInst)
1217 continue;
1218
1219 if (isErrorBlock(BB&: *OpInst->getParent(), R: Context.CurRegion)) {
1220 auto *PHI = dyn_cast<PHINode>(Val: OpInst);
1221 if (PHI) {
1222 for (User *U : PHI->users()) {
1223 auto *UI = dyn_cast<Instruction>(Val: U);
1224 if (!UI || !UI->isTerminator())
1225 return false;
1226 }
1227 } else {
1228 return false;
1229 }
1230 }
1231 }
1232
1233 if (isa<LandingPadInst>(Val: &Inst) || isa<ResumeInst>(Val: &Inst))
1234 return false;
1235
1236 // We only check the call instruction but not invoke instruction.
1237 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1238 if (isValidCallInst(CI&: *CI, Context))
1239 return true;
1240
1241 return invalid<ReportFuncCall>(Context, /*Assert=*/true, Arguments: &Inst);
1242 }
1243
1244 if (!Inst.mayReadOrWriteMemory()) {
1245 if (!isa<AllocaInst>(Val: Inst))
1246 return true;
1247
1248 return invalid<ReportAlloca>(Context, /*Assert=*/true, Arguments: &Inst);
1249 }
1250
1251 // Check the access function.
1252 if (auto MemInst = MemAccInst::dyn_cast(V&: Inst)) {
1253 Context.hasStores |= isa<StoreInst>(Val: MemInst);
1254 Context.hasLoads |= isa<LoadInst>(Val: MemInst);
1255 if (!MemInst.isSimple())
1256 return invalid<ReportNonSimpleMemoryAccess>(Context, /*Assert=*/true,
1257 Arguments: &Inst);
1258
1259 return isValidMemoryAccess(Inst: MemInst, Context);
1260 }
1261
1262 // We do not know this instruction, therefore we assume it is invalid.
1263 return invalid<ReportUnknownInst>(Context, /*Assert=*/true, Arguments: &Inst);
1264}
1265
1266/// Check whether @p L has exiting blocks.
1267///
1268/// @param L The loop of interest
1269///
1270/// @return True if the loop has exiting blocks, false otherwise.
1271static bool hasExitingBlocks(Loop *L) {
1272 SmallVector<BasicBlock *, 4> ExitingBlocks;
1273 L->getExitingBlocks(ExitingBlocks);
1274 return !ExitingBlocks.empty();
1275}
1276
1277bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) {
1278 // FIXME: Yes, this is bad. isValidCFG() may call invalid<Reason>() which
1279 // causes the SCoP to be rejected regardless on whether non-ISL trip counts
1280 // could be used. We currently preserve the legacy behaviour of rejecting
1281 // based on Context.Log.size() added by isValidCFG() or before, regardless on
1282 // whether the ISL trip count can be used or can be used as a non-affine
1283 // region. However, we allow rejections by isValidCFG() that do not result in
1284 // an error log entry.
1285 bool OldIsInvalid = Context.IsInvalid;
1286
1287 // Ensure the loop has valid exiting blocks as well as latches, otherwise we
1288 // need to overapproximate it as a boxed loop.
1289 SmallVector<BasicBlock *, 4> LoopControlBlocks;
1290 L->getExitingBlocks(ExitingBlocks&: LoopControlBlocks);
1291 L->getLoopLatches(LoopLatches&: LoopControlBlocks);
1292 for (BasicBlock *ControlBB : LoopControlBlocks) {
1293 if (!isValidCFG(BB&: *ControlBB, IsLoopBranch: true, AllowUnreachable: false, Context)) {
1294 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1295 return false;
1296 }
1297 }
1298
1299 // We can use ISL to compute the trip count of L.
1300 Context.IsInvalid = OldIsInvalid || Context.Log.size();
1301 return true;
1302}
1303
1304bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) {
1305 // Loops that contain part but not all of the blocks of a region cannot be
1306 // handled by the schedule generation. Such loop constructs can happen
1307 // because a region can contain BBs that have no path to the exit block
1308 // (Infinite loops, UnreachableInst), but such blocks are never part of a
1309 // loop.
1310 //
1311 // _______________
1312 // | Loop Header | <-----------.
1313 // --------------- |
1314 // | |
1315 // _______________ ______________
1316 // | RegionEntry |-----> | RegionExit |----->
1317 // --------------- --------------
1318 // |
1319 // _______________
1320 // | EndlessLoop | <--.
1321 // --------------- |
1322 // | |
1323 // \------------/
1324 //
1325 // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is
1326 // neither entirely contained in the region RegionEntry->RegionExit
1327 // (containing RegionEntry,EndlessLoop) nor is the region entirely contained
1328 // in the loop.
1329 // The block EndlessLoop is contained in the region because Region::contains
1330 // tests whether it is not dominated by RegionExit. This is probably to not
1331 // having to query the PostdominatorTree. Instead of an endless loop, a dead
1332 // end can also be formed by an UnreachableInst. This case is already caught
1333 // by isErrorBlock(). We hence only have to reject endless loops here.
1334 if (!hasExitingBlocks(L))
1335 return invalid<ReportLoopHasNoExit>(Context, /*Assert=*/true, Arguments&: L);
1336
1337 // The algorithm for domain construction assumes that loops has only a single
1338 // exit block (and hence corresponds to a subregion). Note that we cannot use
1339 // L->getExitBlock() because it does not check whether all exiting edges point
1340 // to the same BB.
1341 SmallVector<BasicBlock *, 4> ExitBlocks;
1342 L->getExitBlocks(ExitBlocks);
1343 BasicBlock *TheExitBlock = ExitBlocks[0];
1344 for (BasicBlock *ExitBB : ExitBlocks) {
1345 if (TheExitBlock != ExitBB)
1346 return invalid<ReportLoopHasMultipleExits>(Context, /*Assert=*/true, Arguments&: L);
1347 }
1348
1349 if (canUseISLTripCount(L, Context))
1350 return true;
1351
1352 if (AllowNonAffineSubLoops && AllowNonAffineSubRegions) {
1353 Region *R = RI.getRegionFor(BB: L->getHeader());
1354 while (R != &Context.CurRegion && !R->contains(L))
1355 R = R->getParent();
1356
1357 if (addOverApproximatedRegion(AR: R, Context))
1358 return true;
1359 }
1360
1361 const SCEV *LoopCount = SE.getBackedgeTakenCount(L);
1362 return invalid<ReportLoopBound>(Context, /*Assert=*/true, Arguments&: L, Arguments&: LoopCount);
1363}
1364
1365/// Return the number of loops in @p L (incl. @p L) that have a trip
1366/// count that is not known to be less than @MinProfitableTrips.
1367ScopDetection::LoopStats
1368ScopDetection::countBeneficialSubLoops(Loop *L, ScalarEvolution &SE,
1369 unsigned MinProfitableTrips) {
1370 const SCEV *TripCount = SE.getBackedgeTakenCount(L);
1371
1372 int NumLoops = 1;
1373 int MaxLoopDepth = 1;
1374 if (MinProfitableTrips > 0)
1375 if (auto *TripCountC = dyn_cast<SCEVConstant>(Val: TripCount))
1376 if (TripCountC->getType()->getScalarSizeInBits() <= 64)
1377 if (TripCountC->getValue()->getZExtValue() <= MinProfitableTrips)
1378 NumLoops -= 1;
1379
1380 for (auto &SubLoop : *L) {
1381 LoopStats Stats = countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1382 NumLoops += Stats.NumLoops;
1383 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth + 1);
1384 }
1385
1386 return {.NumLoops: NumLoops, .MaxDepth: MaxLoopDepth};
1387}
1388
1389ScopDetection::LoopStats
1390ScopDetection::countBeneficialLoops(Region *R, ScalarEvolution &SE,
1391 LoopInfo &LI, unsigned MinProfitableTrips) {
1392 int LoopNum = 0;
1393 int MaxLoopDepth = 0;
1394
1395 auto L = LI.getLoopFor(BB: R->getEntry());
1396
1397 // If L is fully contained in R, move to first loop surrounding R. Otherwise,
1398 // L is either nullptr or already surrounding R.
1399 if (L && R->contains(L)) {
1400 L = R->outermostLoopInRegion(L);
1401 L = L->getParentLoop();
1402 }
1403
1404 auto SubLoops =
1405 L ? L->getSubLoopsVector() : std::vector<Loop *>(LI.begin(), LI.end());
1406
1407 for (auto &SubLoop : SubLoops)
1408 if (R->contains(L: SubLoop)) {
1409 LoopStats Stats =
1410 countBeneficialSubLoops(L: SubLoop, SE, MinProfitableTrips);
1411 LoopNum += Stats.NumLoops;
1412 MaxLoopDepth = std::max(a: MaxLoopDepth, b: Stats.MaxDepth);
1413 }
1414
1415 return {.NumLoops: LoopNum, .MaxDepth: MaxLoopDepth};
1416}
1417
1418static bool isErrorBlockImpl(BasicBlock &BB, const Region &R, LoopInfo &LI,
1419 const DominatorTree &DT) {
1420 if (isa<UnreachableInst>(Val: BB.getTerminator()))
1421 return true;
1422
1423 if (LI.isLoopHeader(BB: &BB))
1424 return false;
1425
1426 // Don't consider something outside the SCoP as error block. It will precede
1427 // the code versioning runtime check.
1428 if (!R.contains(BB: &BB))
1429 return false;
1430
1431 // Basic blocks that are always executed are not considered error blocks,
1432 // as their execution can not be a rare event.
1433 bool DominatesAllPredecessors = true;
1434 if (R.isTopLevelRegion()) {
1435 for (BasicBlock &I : *R.getEntry()->getParent()) {
1436 if (isa<ReturnInst>(Val: I.getTerminator()) && !DT.dominates(A: &BB, B: &I)) {
1437 DominatesAllPredecessors = false;
1438 break;
1439 }
1440 }
1441 } else {
1442 for (auto Pred : predecessors(BB: R.getExit())) {
1443 if (R.contains(BB: Pred) && !DT.dominates(A: &BB, B: Pred)) {
1444 DominatesAllPredecessors = false;
1445 break;
1446 }
1447 }
1448 }
1449
1450 if (DominatesAllPredecessors)
1451 return false;
1452
1453 for (Instruction &Inst : BB)
1454 if (CallInst *CI = dyn_cast<CallInst>(Val: &Inst)) {
1455 if (isDebugCall(Inst: CI))
1456 continue;
1457
1458 if (isIgnoredIntrinsic(V: CI))
1459 continue;
1460
1461 // memset, memcpy and memmove are modeled intrinsics.
1462 if (isa<MemSetInst>(Val: CI) || isa<MemTransferInst>(Val: CI))
1463 continue;
1464
1465 if (!CI->doesNotAccessMemory())
1466 return true;
1467 if (CI->doesNotReturn())
1468 return true;
1469 }
1470
1471 return false;
1472}
1473
1474bool ScopDetection::isErrorBlock(llvm::BasicBlock &BB, const llvm::Region &R) {
1475 if (!PollyAllowErrorBlocks)
1476 return false;
1477
1478 auto It = ErrorBlockCache.insert(KV: {std::make_pair(x: &BB, y: &R), false});
1479 if (!It.second)
1480 return It.first->getSecond();
1481
1482 bool Result = isErrorBlockImpl(BB, R, LI, DT);
1483 It.first->second = Result;
1484 return Result;
1485}
1486
1487Region *ScopDetection::expandRegion(Region &R) {
1488 // Initial no valid region was found (greater than R)
1489 std::unique_ptr<Region> LastValidRegion;
1490 auto ExpandedRegion = std::unique_ptr<Region>(R.getExpandedRegion());
1491
1492 POLLY_DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n");
1493
1494 while (ExpandedRegion) {
1495 BBPair P = getBBPairForRegion(R: ExpandedRegion.get());
1496 std::unique_ptr<DetectionContext> &Entry = DetectionContextMap[P];
1497 Entry = std::make_unique<DetectionContext>(args&: *ExpandedRegion, args&: AA,
1498 /*Verifying=*/args: false);
1499 DetectionContext &Context = *Entry;
1500
1501 POLLY_DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr()
1502 << "\n");
1503 // Only expand when we did not collect errors.
1504
1505 if (!Context.Log.hasErrors()) {
1506 // If the exit is valid check all blocks
1507 // - if true, a valid region was found => store it + keep expanding
1508 // - if false, .tbd. => stop (should this really end the loop?)
1509 if (!allBlocksValid(Context) || Context.Log.hasErrors()) {
1510 removeCachedResults(R: *ExpandedRegion);
1511 DetectionContextMap.erase(Val: P);
1512 break;
1513 }
1514
1515 // Store this region, because it is the greatest valid (encountered so
1516 // far).
1517 if (LastValidRegion) {
1518 removeCachedResults(R: *LastValidRegion);
1519 DetectionContextMap.erase(Val: P);
1520 }
1521 LastValidRegion = std::move(ExpandedRegion);
1522
1523 // Create and test the next greater region (if any)
1524 ExpandedRegion =
1525 std::unique_ptr<Region>(LastValidRegion->getExpandedRegion());
1526
1527 } else {
1528 // Create and test the next greater region (if any)
1529 removeCachedResults(R: *ExpandedRegion);
1530 DetectionContextMap.erase(Val: P);
1531 ExpandedRegion =
1532 std::unique_ptr<Region>(ExpandedRegion->getExpandedRegion());
1533 }
1534 }
1535
1536 POLLY_DEBUG({
1537 if (LastValidRegion)
1538 dbgs() << "\tto " << LastValidRegion->getNameStr() << "\n";
1539 else
1540 dbgs() << "\tExpanding " << R.getNameStr() << " failed\n";
1541 });
1542
1543 return LastValidRegion.release();
1544}
1545
1546static bool regionWithoutLoops(Region &R, LoopInfo &LI) {
1547 for (const BasicBlock *BB : R.blocks())
1548 if (R.contains(L: LI.getLoopFor(BB)))
1549 return false;
1550
1551 return true;
1552}
1553
1554void ScopDetection::removeCachedResultsRecursively(const Region &R) {
1555 for (auto &SubRegion : R) {
1556 if (ValidRegions.count(key: SubRegion.get())) {
1557 removeCachedResults(R: *SubRegion);
1558 } else
1559 removeCachedResultsRecursively(R: *SubRegion);
1560 }
1561}
1562
1563void ScopDetection::removeCachedResults(const Region &R) {
1564 ValidRegions.remove(X: &R);
1565}
1566
1567void ScopDetection::findScops(Region &R) {
1568 std::unique_ptr<DetectionContext> &Entry =
1569 DetectionContextMap[getBBPairForRegion(R: &R)];
1570 Entry = std::make_unique<DetectionContext>(args&: R, args&: AA, /*Verifying=*/args: false);
1571 DetectionContext &Context = *Entry;
1572
1573 bool DidBailout = true;
1574 if (!PollyProcessUnprofitable && regionWithoutLoops(R, LI))
1575 invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &R);
1576 else
1577 DidBailout = !isValidRegion(Context);
1578
1579 (void)DidBailout;
1580 if (KeepGoing) {
1581 assert((!DidBailout || Context.IsInvalid) &&
1582 "With -polly-detect-keep-going, it is sufficient that if "
1583 "isValidRegion short-circuited, that SCoP is invalid");
1584 } else {
1585 assert(DidBailout == Context.IsInvalid &&
1586 "isValidRegion must short-circuit iff the ScoP is invalid");
1587 }
1588
1589 if (Context.IsInvalid) {
1590 removeCachedResults(R);
1591 } else {
1592 ValidRegions.insert(X: &R);
1593 return;
1594 }
1595
1596 for (auto &SubRegion : R)
1597 findScops(R&: *SubRegion);
1598
1599 // Try to expand regions.
1600 //
1601 // As the region tree normally only contains canonical regions, non canonical
1602 // regions that form a Scop are not found. Therefore, those non canonical
1603 // regions are checked by expanding the canonical ones.
1604
1605 std::vector<Region *> ToExpand;
1606
1607 for (auto &SubRegion : R)
1608 ToExpand.push_back(x: SubRegion.get());
1609
1610 for (Region *CurrentRegion : ToExpand) {
1611 // Skip invalid regions. Regions may become invalid, if they are element of
1612 // an already expanded region.
1613 if (!ValidRegions.count(key: CurrentRegion))
1614 continue;
1615
1616 // Skip regions that had errors.
1617 bool HadErrors = lookupRejectionLog(R: CurrentRegion)->hasErrors();
1618 if (HadErrors)
1619 continue;
1620
1621 Region *ExpandedR = expandRegion(R&: *CurrentRegion);
1622
1623 if (!ExpandedR)
1624 continue;
1625
1626 R.addSubRegion(SubRegion: ExpandedR, moveChildren: true);
1627 ValidRegions.insert(X: ExpandedR);
1628 removeCachedResults(R: *CurrentRegion);
1629 removeCachedResultsRecursively(R: *ExpandedR);
1630 }
1631}
1632
1633bool ScopDetection::allBlocksValid(DetectionContext &Context) {
1634 Region &CurRegion = Context.CurRegion;
1635
1636 for (const BasicBlock *BB : CurRegion.blocks()) {
1637 Loop *L = LI.getLoopFor(BB);
1638 if (L && L->getHeader() == BB) {
1639 if (CurRegion.contains(L)) {
1640 if (!isValidLoop(L, Context)) {
1641 Context.IsInvalid = true;
1642 if (!KeepGoing)
1643 return false;
1644 }
1645 } else {
1646 SmallVector<BasicBlock *, 1> Latches;
1647 L->getLoopLatches(LoopLatches&: Latches);
1648 for (BasicBlock *Latch : Latches)
1649 if (CurRegion.contains(BB: Latch))
1650 return invalid<ReportLoopOnlySomeLatches>(Context, /*Assert=*/true,
1651 Arguments&: L);
1652 }
1653 }
1654 }
1655
1656 for (BasicBlock *BB : CurRegion.blocks()) {
1657 bool IsErrorBlock = isErrorBlock(BB&: *BB, R: CurRegion);
1658
1659 // Also check exception blocks (and possibly register them as non-affine
1660 // regions). Even though exception blocks are not modeled, we use them
1661 // to forward-propagate domain constraints during ScopInfo construction.
1662 if (!isValidCFG(BB&: *BB, IsLoopBranch: false, AllowUnreachable: IsErrorBlock, Context) && !KeepGoing)
1663 return false;
1664
1665 if (IsErrorBlock)
1666 continue;
1667
1668 for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I)
1669 if (!isValidInstruction(Inst&: *I, Context)) {
1670 Context.IsInvalid = true;
1671 if (!KeepGoing)
1672 return false;
1673 }
1674 }
1675
1676 if (!hasAffineMemoryAccesses(Context))
1677 return false;
1678
1679 return true;
1680}
1681
1682bool ScopDetection::hasSufficientCompute(DetectionContext &Context,
1683 int NumLoops) const {
1684 int InstCount = 0;
1685
1686 if (NumLoops == 0)
1687 return false;
1688
1689 for (auto *BB : Context.CurRegion.blocks())
1690 if (Context.CurRegion.contains(L: LI.getLoopFor(BB)))
1691 InstCount += BB->size();
1692
1693 InstCount = InstCount / NumLoops;
1694
1695 return InstCount >= ProfitabilityMinPerLoopInstructions;
1696}
1697
1698bool ScopDetection::hasPossiblyDistributableLoop(
1699 DetectionContext &Context) const {
1700 for (auto *BB : Context.CurRegion.blocks()) {
1701 auto *L = LI.getLoopFor(BB);
1702 if (!L)
1703 continue;
1704 if (!Context.CurRegion.contains(L))
1705 continue;
1706 if (Context.BoxedLoopsSet.count(key: L))
1707 continue;
1708 unsigned StmtsWithStoresInLoops = 0;
1709 for (auto *LBB : L->blocks()) {
1710 bool MemStore = false;
1711 for (auto &I : *LBB)
1712 MemStore |= isa<StoreInst>(Val: &I);
1713 StmtsWithStoresInLoops += MemStore;
1714 }
1715 return (StmtsWithStoresInLoops > 1);
1716 }
1717 return false;
1718}
1719
1720bool ScopDetection::isProfitableRegion(DetectionContext &Context) const {
1721 Region &CurRegion = Context.CurRegion;
1722
1723 if (PollyProcessUnprofitable)
1724 return true;
1725
1726 // We can probably not do a lot on scops that only write or only read
1727 // data.
1728 if (!Context.hasStores || !Context.hasLoads)
1729 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1730
1731 int NumLoops =
1732 countBeneficialLoops(R: &CurRegion, SE, LI, MinProfitableTrips: MIN_LOOP_TRIP_COUNT).NumLoops;
1733 int NumAffineLoops = NumLoops - Context.BoxedLoopsSet.size();
1734
1735 // Scops with at least two loops may allow either loop fusion or tiling and
1736 // are consequently interesting to look at.
1737 if (NumAffineLoops >= 2)
1738 return true;
1739
1740 // A loop with multiple non-trivial blocks might be amendable to distribution.
1741 if (NumAffineLoops == 1 && hasPossiblyDistributableLoop(Context))
1742 return true;
1743
1744 // Scops that contain a loop with a non-trivial amount of computation per
1745 // loop-iteration are interesting as we may be able to parallelize such
1746 // loops. Individual loops that have only a small amount of computation
1747 // per-iteration are performance-wise very fragile as any change to the
1748 // loop induction variables may affect performance. To not cause spurious
1749 // performance regressions, we do not consider such loops.
1750 if (NumAffineLoops == 1 && hasSufficientCompute(Context, NumLoops))
1751 return true;
1752
1753 return invalid<ReportUnprofitable>(Context, /*Assert=*/true, Arguments: &CurRegion);
1754}
1755
1756bool ScopDetection::isValidRegion(DetectionContext &Context) {
1757 Region &CurRegion = Context.CurRegion;
1758
1759 POLLY_DEBUG(dbgs() << "Checking region: " << CurRegion.getNameStr()
1760 << "\n\t");
1761
1762 if (!PollyAllowFullFunction && CurRegion.isTopLevelRegion()) {
1763 POLLY_DEBUG(dbgs() << "Top level region is invalid\n");
1764 Context.IsInvalid = true;
1765 return false;
1766 }
1767
1768 DebugLoc DbgLoc;
1769 if (CurRegion.getExit() &&
1770 isa<UnreachableInst>(Val: CurRegion.getExit()->getTerminator())) {
1771 POLLY_DEBUG(dbgs() << "Unreachable in exit\n");
1772 return invalid<ReportUnreachableInExit>(Context, /*Assert=*/true,
1773 Arguments: CurRegion.getExit(), Arguments&: DbgLoc);
1774 }
1775
1776 if (!OnlyRegion.empty() &&
1777 !CurRegion.getEntry()->getName().count(Str: OnlyRegion)) {
1778 POLLY_DEBUG({
1779 dbgs() << "Region entry does not match -polly-only-region";
1780 dbgs() << "\n";
1781 });
1782 Context.IsInvalid = true;
1783 return false;
1784 }
1785
1786 for (BasicBlock *Pred : predecessors(BB: CurRegion.getEntry())) {
1787 Instruction *PredTerm = Pred->getTerminator();
1788 if (isa<IndirectBrInst>(Val: PredTerm) || isa<CallBrInst>(Val: PredTerm))
1789 return invalid<ReportIndirectPredecessor>(
1790 Context, /*Assert=*/true, Arguments&: PredTerm, Arguments: PredTerm->getDebugLoc());
1791 }
1792
1793 // SCoP cannot contain the entry block of the function, because we need
1794 // to insert alloca instruction there when translate scalar to array.
1795 if (!PollyAllowFullFunction &&
1796 CurRegion.getEntry() ==
1797 &(CurRegion.getEntry()->getParent()->getEntryBlock()))
1798 return invalid<ReportEntry>(Context, /*Assert=*/true, Arguments: CurRegion.getEntry());
1799
1800 if (!allBlocksValid(Context)) {
1801 // TODO: Every failure condition within allBlocksValid should call
1802 // invalid<Reason>(). Otherwise we reject SCoPs without giving feedback to
1803 // the user.
1804 Context.IsInvalid = true;
1805 return false;
1806 }
1807
1808 if (!isReducibleRegion(R&: CurRegion, DbgLoc))
1809 return invalid<ReportIrreducibleRegion>(Context, /*Assert=*/true,
1810 Arguments: &CurRegion, Arguments&: DbgLoc);
1811
1812 POLLY_DEBUG(dbgs() << "OK\n");
1813 return true;
1814}
1815
1816void ScopDetection::markFunctionAsInvalid(Function *F) {
1817 F->addFnAttr(Kind: PollySkipFnAttr);
1818}
1819
1820bool ScopDetection::isValidFunction(Function &F) {
1821 return !F.hasFnAttribute(Kind: PollySkipFnAttr);
1822}
1823
1824void ScopDetection::printLocations(Function &F) {
1825 for (const Region *R : *this) {
1826 unsigned LineEntry, LineExit;
1827 std::string FileName;
1828
1829 getDebugLocation(R, LineBegin&: LineEntry, LineEnd&: LineExit, FileName);
1830 DiagnosticScopFound Diagnostic(F, FileName, LineEntry, LineExit);
1831 F.getContext().diagnose(DI: Diagnostic);
1832 }
1833}
1834
1835void ScopDetection::emitMissedRemarks(const Function &F) {
1836 for (auto &DIt : DetectionContextMap) {
1837 DetectionContext &DC = *DIt.getSecond();
1838 if (DC.Log.hasErrors())
1839 emitRejectionRemarks(P: DIt.getFirst(), Log: DC.Log, ORE);
1840 }
1841}
1842
1843bool ScopDetection::isReducibleRegion(Region &R, DebugLoc &DbgLoc) const {
1844 /// Enum for coloring BBs in Region.
1845 ///
1846 /// WHITE - Unvisited BB in DFS walk.
1847 /// GREY - BBs which are currently on the DFS stack for processing.
1848 /// BLACK - Visited and completely processed BB.
1849 enum Color { WHITE, GREY, BLACK };
1850
1851 BasicBlock *REntry = R.getEntry();
1852 BasicBlock *RExit = R.getExit();
1853 // Map to match the color of a BasicBlock during the DFS walk.
1854 DenseMap<const BasicBlock *, Color> BBColorMap;
1855 // Stack keeping track of current BB and index of next child to be processed.
1856 std::stack<std::pair<BasicBlock *, unsigned>> DFSStack;
1857
1858 unsigned AdjacentBlockIndex = 0;
1859 BasicBlock *CurrBB, *SuccBB;
1860 CurrBB = REntry;
1861
1862 // Initialize the map for all BB with WHITE color.
1863 for (auto *BB : R.blocks())
1864 BBColorMap[BB] = WHITE;
1865
1866 // Process the entry block of the Region.
1867 BBColorMap[CurrBB] = GREY;
1868 DFSStack.push(x: std::make_pair(x&: CurrBB, y: 0));
1869
1870 while (!DFSStack.empty()) {
1871 // Get next BB on stack to be processed.
1872 CurrBB = DFSStack.top().first;
1873 AdjacentBlockIndex = DFSStack.top().second;
1874 DFSStack.pop();
1875
1876 // Loop to iterate over the successors of current BB.
1877 const Instruction *TInst = CurrBB->getTerminator();
1878 unsigned NSucc = TInst->getNumSuccessors();
1879 for (unsigned I = AdjacentBlockIndex; I < NSucc;
1880 ++I, ++AdjacentBlockIndex) {
1881 SuccBB = TInst->getSuccessor(Idx: I);
1882
1883 // Checks for region exit block and self-loops in BB.
1884 if (SuccBB == RExit || SuccBB == CurrBB)
1885 continue;
1886
1887 // WHITE indicates an unvisited BB in DFS walk.
1888 if (BBColorMap[SuccBB] == WHITE) {
1889 // Push the current BB and the index of the next child to be visited.
1890 DFSStack.push(x: std::make_pair(x&: CurrBB, y: I + 1));
1891 // Push the next BB to be processed.
1892 DFSStack.push(x: std::make_pair(x&: SuccBB, y: 0));
1893 // First time the BB is being processed.
1894 BBColorMap[SuccBB] = GREY;
1895 break;
1896 } else if (BBColorMap[SuccBB] == GREY) {
1897 // GREY indicates a loop in the control flow.
1898 // If the destination dominates the source, it is a natural loop
1899 // else, an irreducible control flow in the region is detected.
1900 if (!DT.dominates(A: SuccBB, B: CurrBB)) {
1901 // Get debug info of instruction which causes irregular control flow.
1902 DbgLoc = TInst->getDebugLoc();
1903 return false;
1904 }
1905 }
1906 }
1907
1908 // If all children of current BB have been processed,
1909 // then mark that BB as fully processed.
1910 if (AdjacentBlockIndex == NSucc)
1911 BBColorMap[CurrBB] = BLACK;
1912 }
1913
1914 return true;
1915}
1916
1917static void updateLoopCountStatistic(ScopDetection::LoopStats Stats,
1918 bool OnlyProfitable) {
1919 if (!OnlyProfitable) {
1920 NumLoopsInScop += Stats.NumLoops;
1921 MaxNumLoopsInScop =
1922 std::max(a: MaxNumLoopsInScop.getValue(), b: (uint64_t)Stats.NumLoops);
1923 if (Stats.MaxDepth == 0)
1924 NumScopsDepthZero++;
1925 else if (Stats.MaxDepth == 1)
1926 NumScopsDepthOne++;
1927 else if (Stats.MaxDepth == 2)
1928 NumScopsDepthTwo++;
1929 else if (Stats.MaxDepth == 3)
1930 NumScopsDepthThree++;
1931 else if (Stats.MaxDepth == 4)
1932 NumScopsDepthFour++;
1933 else if (Stats.MaxDepth == 5)
1934 NumScopsDepthFive++;
1935 else
1936 NumScopsDepthLarger++;
1937 } else {
1938 NumLoopsInProfScop += Stats.NumLoops;
1939 MaxNumLoopsInProfScop =
1940 std::max(a: MaxNumLoopsInProfScop.getValue(), b: (uint64_t)Stats.NumLoops);
1941 if (Stats.MaxDepth == 0)
1942 NumProfScopsDepthZero++;
1943 else if (Stats.MaxDepth == 1)
1944 NumProfScopsDepthOne++;
1945 else if (Stats.MaxDepth == 2)
1946 NumProfScopsDepthTwo++;
1947 else if (Stats.MaxDepth == 3)
1948 NumProfScopsDepthThree++;
1949 else if (Stats.MaxDepth == 4)
1950 NumProfScopsDepthFour++;
1951 else if (Stats.MaxDepth == 5)
1952 NumProfScopsDepthFive++;
1953 else
1954 NumProfScopsDepthLarger++;
1955 }
1956}
1957
1958ScopDetection::DetectionContext *
1959ScopDetection::getDetectionContext(const Region *R) const {
1960 auto DCMIt = DetectionContextMap.find(Val: getBBPairForRegion(R));
1961 if (DCMIt == DetectionContextMap.end())
1962 return nullptr;
1963 return DCMIt->second.get();
1964}
1965
1966const RejectLog *ScopDetection::lookupRejectionLog(const Region *R) const {
1967 const DetectionContext *DC = getDetectionContext(R);
1968 return DC ? &DC->Log : nullptr;
1969}
1970
1971void ScopDetection::verifyRegion(const Region &R) {
1972 assert(isMaxRegionInScop(R) && "Expect R is a valid region.");
1973
1974 DetectionContext Context(const_cast<Region &>(R), AA, true /*verifying*/);
1975 isValidRegion(Context);
1976}
1977
1978void ScopDetection::verifyAnalysis() {
1979 if (!VerifyScops)
1980 return;
1981
1982 for (const Region *R : ValidRegions)
1983 verifyRegion(R: *R);
1984}
1985
1986bool ScopDetectionWrapperPass::runOnFunction(Function &F) {
1987 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1988 auto &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1989 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1990 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
1991 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1992 auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
1993
1994 Result = std::make_unique<ScopDetection>(args&: DT, args&: SE, args&: LI, args&: RI, args&: AA, args&: ORE);
1995 Result->detect(F);
1996 return false;
1997}
1998
1999void ScopDetectionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
2000 AU.addRequired<LoopInfoWrapperPass>();
2001 AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
2002 AU.addRequired<DominatorTreeWrapperPass>();
2003 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
2004 // We also need AA and RegionInfo when we are verifying analysis.
2005 AU.addRequiredTransitive<AAResultsWrapperPass>();
2006 AU.addRequiredTransitive<RegionInfoPass>();
2007 AU.setPreservesAll();
2008}
2009
2010void ScopDetectionWrapperPass::print(raw_ostream &OS, const Module *) const {
2011 for (const Region *R : Result->ValidRegions)
2012 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2013
2014 OS << "\n";
2015}
2016
2017ScopDetectionWrapperPass::ScopDetectionWrapperPass() : FunctionPass(ID) {
2018 // Disable runtime alias checks if we ignore aliasing all together.
2019 if (IgnoreAliasing)
2020 PollyUseRuntimeAliasChecks = false;
2021}
2022
2023ScopAnalysis::ScopAnalysis() {
2024 // Disable runtime alias checks if we ignore aliasing all together.
2025 if (IgnoreAliasing)
2026 PollyUseRuntimeAliasChecks = false;
2027}
2028
2029void ScopDetectionWrapperPass::releaseMemory() { Result.reset(); }
2030
2031char ScopDetectionWrapperPass::ID;
2032
2033AnalysisKey ScopAnalysis::Key;
2034
2035ScopDetection ScopAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
2036 auto &LI = FAM.getResult<LoopAnalysis>(IR&: F);
2037 auto &RI = FAM.getResult<RegionInfoAnalysis>(IR&: F);
2038 auto &AA = FAM.getResult<AAManager>(IR&: F);
2039 auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(IR&: F);
2040 auto &DT = FAM.getResult<DominatorTreeAnalysis>(IR&: F);
2041 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(IR&: F);
2042
2043 ScopDetection Result(DT, SE, LI, RI, AA, ORE);
2044 Result.detect(F);
2045 return Result;
2046}
2047
2048PreservedAnalyses ScopAnalysisPrinterPass::run(Function &F,
2049 FunctionAnalysisManager &FAM) {
2050 OS << "Detected Scops in Function " << F.getName() << "\n";
2051 auto &SD = FAM.getResult<ScopAnalysis>(IR&: F);
2052 for (const Region *R : SD.ValidRegions)
2053 OS << "Valid Region for Scop: " << R->getNameStr() << '\n';
2054
2055 OS << "\n";
2056 return PreservedAnalyses::all();
2057}
2058
2059Pass *polly::createScopDetectionWrapperPassPass() {
2060 return new ScopDetectionWrapperPass();
2061}
2062
2063INITIALIZE_PASS_BEGIN(ScopDetectionWrapperPass, "polly-detect",
2064 "Polly - Detect static control parts (SCoPs)", false,
2065 false);
2066INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass);
2067INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
2068INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
2069INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
2070INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
2071INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass);
2072INITIALIZE_PASS_END(ScopDetectionWrapperPass, "polly-detect",
2073 "Polly - Detect static control parts (SCoPs)", false, false)
2074
2075//===----------------------------------------------------------------------===//
2076
2077namespace {
2078/// Print result from ScopDetectionWrapperPass.
2079class ScopDetectionPrinterLegacyPass final : public FunctionPass {
2080public:
2081 static char ID;
2082
2083 ScopDetectionPrinterLegacyPass() : ScopDetectionPrinterLegacyPass(outs()) {}
2084
2085 explicit ScopDetectionPrinterLegacyPass(llvm::raw_ostream &OS)
2086 : FunctionPass(ID), OS(OS) {}
2087
2088 bool runOnFunction(Function &F) override {
2089 ScopDetectionWrapperPass &P = getAnalysis<ScopDetectionWrapperPass>();
2090
2091 OS << "Printing analysis '" << P.getPassName() << "' for function '"
2092 << F.getName() << "':\n";
2093 P.print(OS);
2094
2095 return false;
2096 }
2097
2098 void getAnalysisUsage(AnalysisUsage &AU) const override {
2099 FunctionPass::getAnalysisUsage(AU);
2100 AU.addRequired<ScopDetectionWrapperPass>();
2101 AU.setPreservesAll();
2102 }
2103
2104private:
2105 llvm::raw_ostream &OS;
2106};
2107
2108char ScopDetectionPrinterLegacyPass::ID = 0;
2109} // namespace
2110
2111Pass *polly::createScopDetectionPrinterLegacyPass(raw_ostream &OS) {
2112 return new ScopDetectionPrinterLegacyPass(OS);
2113}
2114
2115INITIALIZE_PASS_BEGIN(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2116 "Polly - Print static control parts (SCoPs)", false,
2117 false);
2118INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
2119INITIALIZE_PASS_END(ScopDetectionPrinterLegacyPass, "polly-print-detect",
2120 "Polly - Print static control parts (SCoPs)", false, false)
2121

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of polly/lib/Analysis/ScopDetection.cpp