1 | //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines some loop transformation utilities. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
14 | #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
15 | |
16 | #include "llvm/Analysis/IVDescriptors.h" |
17 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
18 | #include "llvm/Transforms/Utils/ValueMapper.h" |
19 | |
20 | namespace llvm { |
21 | |
22 | template <typename T> class DomTreeNodeBase; |
23 | using DomTreeNode = DomTreeNodeBase<BasicBlock>; |
24 | class AssumptionCache; |
25 | class StringRef; |
26 | class AnalysisUsage; |
27 | class TargetTransformInfo; |
28 | class AAResults; |
29 | class BasicBlock; |
30 | class ICFLoopSafetyInfo; |
31 | class IRBuilderBase; |
32 | class Loop; |
33 | class LoopInfo; |
34 | class MemoryAccess; |
35 | class MemorySSA; |
36 | class MemorySSAUpdater; |
37 | class ; |
38 | class PredIteratorCache; |
39 | class ScalarEvolution; |
40 | class SCEV; |
41 | class SCEVExpander; |
42 | class TargetLibraryInfo; |
43 | class LPPassManager; |
44 | class Instruction; |
45 | struct RuntimeCheckingPtrGroup; |
46 | typedef std::pair<const RuntimeCheckingPtrGroup *, |
47 | const RuntimeCheckingPtrGroup *> |
48 | RuntimePointerCheck; |
49 | |
50 | template <typename T, unsigned N> class SmallSetVector; |
51 | template <typename T, unsigned N> class SmallPriorityWorklist; |
52 | |
53 | BasicBlock *(Loop *L, DominatorTree *DT, LoopInfo *LI, |
54 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
55 | |
56 | /// Ensure that all exit blocks of the loop are dedicated exits. |
57 | /// |
58 | /// For any loop exit block with non-loop predecessors, we split the loop |
59 | /// predecessors to use a dedicated loop exit block. We update the dominator |
60 | /// tree and loop info if provided, and will preserve LCSSA if requested. |
61 | bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, |
62 | MemorySSAUpdater *MSSAU, bool PreserveLCSSA); |
63 | |
64 | /// Ensures LCSSA form for every instruction from the Worklist in the scope of |
65 | /// innermost containing loop. |
66 | /// |
67 | /// For the given instruction which have uses outside of the loop, an LCSSA PHI |
68 | /// node is inserted and the uses outside the loop are rewritten to use this |
69 | /// node. |
70 | /// |
71 | /// LoopInfo and DominatorTree are required and, since the routine makes no |
72 | /// changes to CFG, preserved. |
73 | /// |
74 | /// Returns true if any modifications are made. |
75 | /// |
76 | /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not |
77 | /// nullptr, those are added to it (before removing, the caller has to check if |
78 | /// they still do not have any uses). Otherwise the PHIs are directly removed. |
79 | /// |
80 | /// If \p InsertedPHIs is not nullptr, inserted phis will be added to this |
81 | /// vector. |
82 | bool formLCSSAForInstructions( |
83 | SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT, |
84 | const LoopInfo &LI, ScalarEvolution *SE, |
85 | SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr, |
86 | SmallVectorImpl<PHINode *> *InsertedPHIs = nullptr); |
87 | |
88 | /// Put loop into LCSSA form. |
89 | /// |
90 | /// Looks at all instructions in the loop which have uses outside of the |
91 | /// current loop. For each, an LCSSA PHI node is inserted and the uses outside |
92 | /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form |
93 | /// already. |
94 | /// |
95 | /// LoopInfo and DominatorTree are required and preserved. |
96 | /// |
97 | /// If ScalarEvolution is passed in, it will be preserved. |
98 | /// |
99 | /// Returns true if any modifications are made to the loop. |
100 | bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
101 | ScalarEvolution *SE); |
102 | |
103 | /// Put a loop nest into LCSSA form. |
104 | /// |
105 | /// This recursively forms LCSSA for a loop nest. |
106 | /// |
107 | /// LoopInfo and DominatorTree are required and preserved. |
108 | /// |
109 | /// If ScalarEvolution is passed in, it will be preserved. |
110 | /// |
111 | /// Returns true if any modifications are made to the loop. |
112 | bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, |
113 | ScalarEvolution *SE); |
114 | |
115 | /// Flags controlling how much is checked when sinking or hoisting |
116 | /// instructions. The number of memory access in the loop (and whether there |
117 | /// are too many) is determined in the constructors when using MemorySSA. |
118 | class SinkAndHoistLICMFlags { |
119 | public: |
120 | // Explicitly set limits. |
121 | SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, |
122 | unsigned LicmMssaNoAccForPromotionCap, bool IsSink, |
123 | Loop &L, MemorySSA &MSSA); |
124 | // Use default limits. |
125 | SinkAndHoistLICMFlags(bool IsSink, Loop &L, MemorySSA &MSSA); |
126 | |
127 | void setIsSink(bool B) { IsSink = B; } |
128 | bool getIsSink() { return IsSink; } |
129 | bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } |
130 | bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } |
131 | void incrementClobberingCalls() { ++LicmMssaOptCounter; } |
132 | |
133 | protected: |
134 | bool NoOfMemAccTooLarge = false; |
135 | unsigned LicmMssaOptCounter = 0; |
136 | unsigned LicmMssaOptCap; |
137 | unsigned LicmMssaNoAccForPromotionCap; |
138 | bool IsSink; |
139 | }; |
140 | |
141 | /// Walk the specified region of the CFG (defined by all blocks |
142 | /// dominated by the specified block, and that are in the current loop) in |
143 | /// reverse depth first order w.r.t the DominatorTree. This allows us to visit |
144 | /// uses before definitions, allowing us to sink a loop body in one pass without |
145 | /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
146 | /// TargetLibraryInfo, Loop, AliasSet information for all |
147 | /// instructions of the loop and loop safety information as |
148 | /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. |
149 | /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when |
150 | /// this function is called by \p sinkRegionForLoopNest. |
151 | bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
152 | TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, |
153 | MemorySSAUpdater &, ICFLoopSafetyInfo *, |
154 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, |
155 | Loop *OutermostLoop = nullptr); |
156 | |
157 | /// Call sinkRegion on loops contained within the specified loop |
158 | /// in order from innermost to outermost. |
159 | bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, |
160 | DominatorTree *, TargetLibraryInfo *, |
161 | TargetTransformInfo *, Loop *, MemorySSAUpdater &, |
162 | ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, |
163 | OptimizationRemarkEmitter *); |
164 | |
165 | /// Walk the specified region of the CFG (defined by all blocks |
166 | /// dominated by the specified block, and that are in the current loop) in depth |
167 | /// first order w.r.t the DominatorTree. This allows us to visit definitions |
168 | /// before uses, allowing us to hoist a loop body in one pass without iteration. |
169 | /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, |
170 | /// TargetLibraryInfo, Loop, AliasSet information for all |
171 | /// instructions of the loop and loop safety information as arguments. |
172 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
173 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
174 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
175 | bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, |
176 | AssumptionCache *, TargetLibraryInfo *, Loop *, |
177 | MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, |
178 | SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, |
179 | bool AllowSpeculation); |
180 | |
181 | /// Return true if the induction variable \p IV in a Loop whose latch is |
182 | /// \p LatchBlock would become dead if the exit test \p Cond were removed. |
183 | /// Conservatively returns false if analysis is insufficient. |
184 | bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond); |
185 | |
186 | /// This function deletes dead loops. The caller of this function needs to |
187 | /// guarantee that the loop is infact dead. |
188 | /// The function requires a bunch or prerequisites to be present: |
189 | /// - The loop needs to be in LCSSA form |
190 | /// - The loop needs to have a Preheader |
191 | /// - A unique dedicated exit block must exist |
192 | /// |
193 | /// This also updates the relevant analysis information in \p DT, \p SE, \p LI |
194 | /// and \p MSSA if pointers to those are provided. |
195 | /// It also updates the loop PM if an updater struct is provided. |
196 | |
197 | void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, |
198 | LoopInfo *LI, MemorySSA *MSSA = nullptr); |
199 | |
200 | /// Remove the backedge of the specified loop. Handles loop nests and general |
201 | /// loop structures subject to the precondition that the loop has no parent |
202 | /// loop and has a single latch block. Preserves all listed analyses. |
203 | void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, |
204 | LoopInfo &LI, MemorySSA *MSSA); |
205 | |
206 | /// Try to promote memory values to scalars by sinking stores out of |
207 | /// the loop and moving loads to before the loop. We do this by looping over |
208 | /// the stores in the loop, looking for stores to Must pointers which are |
209 | /// loop invariant. It takes a set of must-alias values, Loop exit blocks |
210 | /// vector, loop exit blocks insertion point vector, PredIteratorCache, |
211 | /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions |
212 | /// of the loop and loop safety information as arguments. |
213 | /// Diagnostics is emitted via \p ORE. It returns changed status. |
214 | /// \p AllowSpeculation is whether values should be hoisted even if they are not |
215 | /// guaranteed to execute in the loop, but are safe to speculatively execute. |
216 | bool promoteLoopAccessesToScalars( |
217 | const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &, |
218 | SmallVectorImpl<BasicBlock::iterator> &, SmallVectorImpl<MemoryAccess *> &, |
219 | PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, |
220 | const TargetLibraryInfo *, TargetTransformInfo *, Loop *, |
221 | MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, |
222 | bool AllowSpeculation, bool HasReadsOutsideSet); |
223 | |
224 | /// Does a BFS from a given node to all of its children inside a given loop. |
225 | /// The returned vector of nodes includes the starting point. |
226 | SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N, |
227 | const Loop *CurLoop); |
228 | |
229 | /// Returns the instructions that use values defined in the loop. |
230 | SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L); |
231 | |
232 | /// Find a combination of metadata ("llvm.loop.vectorize.width" and |
233 | /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a |
234 | /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found |
235 | /// then std::nullopt is returned. |
236 | std::optional<ElementCount> |
237 | getOptionalElementCountLoopAttribute(const Loop *TheLoop); |
238 | |
239 | /// Create a new loop identifier for a loop created from a loop transformation. |
240 | /// |
241 | /// @param OrigLoopID The loop ID of the loop before the transformation. |
242 | /// @param FollowupAttrs List of attribute names that contain attributes to be |
243 | /// added to the new loop ID. |
244 | /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited |
245 | /// from the original loop. The following values |
246 | /// are considered: |
247 | /// nullptr : Inherit all attributes from @p OrigLoopID. |
248 | /// "" : Do not inherit any attribute from @p OrigLoopID; only use |
249 | /// those specified by a followup attribute. |
250 | /// "<prefix>": Inherit all attributes except those which start with |
251 | /// <prefix>; commonly used to remove metadata for the |
252 | /// applied transformation. |
253 | /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return |
254 | /// std::nullopt. |
255 | /// |
256 | /// @return The loop ID for the after-transformation loop. The following values |
257 | /// can be returned: |
258 | /// std::nullopt : No followup attribute was found; it is up to the |
259 | /// transformation to choose attributes that make sense. |
260 | /// @p OrigLoopID: The original identifier can be reused. |
261 | /// nullptr : The new loop has no attributes. |
262 | /// MDNode* : A new unique loop identifier. |
263 | std::optional<MDNode *> |
264 | makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs, |
265 | const char *InheritOptionsAttrsPrefix = "" , |
266 | bool AlwaysNew = false); |
267 | |
268 | /// Look for the loop attribute that disables all transformation heuristic. |
269 | bool hasDisableAllTransformsHint(const Loop *L); |
270 | |
271 | /// Look for the loop attribute that disables the LICM transformation heuristics. |
272 | bool hasDisableLICMTransformsHint(const Loop *L); |
273 | |
274 | /// The mode sets how eager a transformation should be applied. |
275 | enum TransformationMode { |
276 | /// The pass can use heuristics to determine whether a transformation should |
277 | /// be applied. |
278 | TM_Unspecified, |
279 | |
280 | /// The transformation should be applied without considering a cost model. |
281 | TM_Enable, |
282 | |
283 | /// The transformation should not be applied. |
284 | TM_Disable, |
285 | |
286 | /// Force is a flag and should not be used alone. |
287 | TM_Force = 0x04, |
288 | |
289 | /// The transformation was directed by the user, e.g. by a #pragma in |
290 | /// the source code. If the transformation could not be applied, a |
291 | /// warning should be emitted. |
292 | TM_ForcedByUser = TM_Enable | TM_Force, |
293 | |
294 | /// The transformation must not be applied. For instance, `#pragma clang loop |
295 | /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike |
296 | /// general loop metadata, it must not be dropped. Most passes should not |
297 | /// behave differently under TM_Disable and TM_SuppressedByUser. |
298 | TM_SuppressedByUser = TM_Disable | TM_Force |
299 | }; |
300 | |
301 | /// @{ |
302 | /// Get the mode for LLVM's supported loop transformations. |
303 | TransformationMode hasUnrollTransformation(const Loop *L); |
304 | TransformationMode hasUnrollAndJamTransformation(const Loop *L); |
305 | TransformationMode hasVectorizeTransformation(const Loop *L); |
306 | TransformationMode hasDistributeTransformation(const Loop *L); |
307 | TransformationMode hasLICMVersioningTransformation(const Loop *L); |
308 | /// @} |
309 | |
310 | /// Set input string into loop metadata by keeping other values intact. |
311 | /// If the string is already in loop metadata update value if it is |
312 | /// different. |
313 | void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, |
314 | unsigned V = 0); |
315 | |
316 | /// Returns a loop's estimated trip count based on branch weight metadata. |
317 | /// In addition if \p EstimatedLoopInvocationWeight is not null it is |
318 | /// initialized with weight of loop's latch leading to the exit. |
319 | /// Returns 0 when the count is estimated to be 0, or std::nullopt when a |
320 | /// meaningful estimate can not be made. |
321 | std::optional<unsigned> |
322 | getLoopEstimatedTripCount(Loop *L, |
323 | unsigned *EstimatedLoopInvocationWeight = nullptr); |
324 | |
325 | /// Set a loop's branch weight metadata to reflect that loop has \p |
326 | /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits |
327 | /// through latch. Returns true if metadata is successfully updated, false |
328 | /// otherwise. Note that loop must have a latch block which controls loop exit |
329 | /// in order to succeed. |
330 | bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, |
331 | unsigned EstimatedLoopInvocationWeight); |
332 | |
333 | /// Check inner loop (L) backedge count is known to be invariant on all |
334 | /// iterations of its outer loop. If the loop has no parent, this is trivially |
335 | /// true. |
336 | bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); |
337 | |
338 | /// Helper to consistently add the set of standard passes to a loop pass's \c |
339 | /// AnalysisUsage. |
340 | /// |
341 | /// All loop passes should call this as part of implementing their \c |
342 | /// getAnalysisUsage. |
343 | void getLoopAnalysisUsage(AnalysisUsage &AU); |
344 | |
345 | /// Returns true if is legal to hoist or sink this instruction disregarding the |
346 | /// possible introduction of faults. Reasoning about potential faulting |
347 | /// instructions is the responsibility of the caller since it is challenging to |
348 | /// do efficiently from within this routine. |
349 | /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the |
350 | /// target executes at most once per execution of the loop body. This is used |
351 | /// to assess the legality of duplicating atomic loads. Generally, this is |
352 | /// true when moving out of loop and not true when moving into loops. |
353 | /// If \p ORE is set use it to emit optimization remarks. |
354 | bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, |
355 | Loop *CurLoop, MemorySSAUpdater &MSSAU, |
356 | bool TargetExecutesOncePerLoop, |
357 | SinkAndHoistLICMFlags &LICMFlags, |
358 | OptimizationRemarkEmitter *ORE = nullptr); |
359 | |
360 | /// Returns the min/max intrinsic used when expanding a min/max reduction. |
361 | Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK); |
362 | |
363 | /// Returns the comparison predicate used when expanding a min/max reduction. |
364 | CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); |
365 | |
366 | /// See RecurrenceDescriptor::isAnyOfPattern for a description of the pattern we |
367 | /// are trying to match. In this pattern, we are only ever selecting between two |
368 | /// values: 1) an initial start value \p StartVal of the reduction PHI, and 2) a |
369 | /// loop invariant value. If any of lane value in \p Left, \p Right is not equal |
370 | /// to \p StartVal, select the loop invariant value. This is done by selecting |
371 | /// \p Right iff \p Left is equal to \p StartVal. |
372 | Value *createAnyOfOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, |
373 | Value *Left, Value *Right); |
374 | |
375 | /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. |
376 | /// The Builder's fast-math-flags must be set to propagate the expected values. |
377 | Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, |
378 | Value *Right); |
379 | |
380 | /// Generates an ordered vector reduction using extracts to reduce the value. |
381 | Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, |
382 | unsigned Op, RecurKind MinMaxKind = RecurKind::None); |
383 | |
384 | /// Generates a vector reduction using shufflevectors to reduce the value. |
385 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
386 | Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, |
387 | RecurKind MinMaxKind = RecurKind::None); |
388 | |
389 | /// Create a target reduction of the given vector. The reduction operation |
390 | /// is described by the \p Opcode parameter. min/max reductions require |
391 | /// additional information supplied in \p RdxKind. |
392 | /// The target is queried to determine if intrinsics or shuffle sequences are |
393 | /// required to implement the reduction. |
394 | /// Fast-math-flags are propagated using the IRBuilder's setting. |
395 | Value *createSimpleTargetReduction(IRBuilderBase &B, Value *Src, |
396 | RecurKind RdxKind); |
397 | |
398 | /// Create a target reduction of the given vector \p Src for a reduction of the |
399 | /// kind RecurKind::IAnyOf or RecurKind::FAnyOf. The reduction operation is |
400 | /// described by \p Desc. |
401 | Value *createAnyOfTargetReduction(IRBuilderBase &B, Value *Src, |
402 | const RecurrenceDescriptor &Desc, |
403 | PHINode *OrigPhi); |
404 | |
405 | /// Create a generic target reduction using a recurrence descriptor \p Desc |
406 | /// The target is queried to determine if intrinsics or shuffle sequences are |
407 | /// required to implement the reduction. |
408 | /// Fast-math-flags are propagated using the RecurrenceDescriptor. |
409 | Value *createTargetReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, |
410 | Value *Src, PHINode *OrigPhi = nullptr); |
411 | |
412 | /// Create an ordered reduction intrinsic using the given recurrence |
413 | /// descriptor \p Desc. |
414 | Value *createOrderedReduction(IRBuilderBase &B, |
415 | const RecurrenceDescriptor &Desc, Value *Src, |
416 | Value *Start); |
417 | |
418 | /// Get the intersection (logical and) of all of the potential IR flags |
419 | /// of each scalar operation (VL) that will be converted into a vector (I). |
420 | /// If OpValue is non-null, we only consider operations similar to OpValue |
421 | /// when intersecting. |
422 | /// Flag set: NSW, NUW (if IncludeWrapFlags is true), exact, and all of |
423 | /// fast-math. |
424 | void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr, |
425 | bool IncludeWrapFlags = true); |
426 | |
427 | /// Returns true if we can prove that \p S is defined and always negative in |
428 | /// loop \p L. |
429 | bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); |
430 | |
431 | /// Returns true if we can prove that \p S is defined and always non-negative in |
432 | /// loop \p L. |
433 | bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, |
434 | ScalarEvolution &SE); |
435 | /// Returns true if we can prove that \p S is defined and always positive in |
436 | /// loop \p L. |
437 | bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); |
438 | |
439 | /// Returns true if we can prove that \p S is defined and always non-positive in |
440 | /// loop \p L. |
441 | bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L, |
442 | ScalarEvolution &SE); |
443 | |
444 | /// Returns true if \p S is defined and never is equal to signed/unsigned max. |
445 | bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
446 | bool Signed); |
447 | |
448 | /// Returns true if \p S is defined and never is equal to signed/unsigned min. |
449 | bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, |
450 | bool Signed); |
451 | |
452 | enum ReplaceExitVal { |
453 | NeverRepl, |
454 | OnlyCheapRepl, |
455 | NoHardUse, |
456 | UnusedIndVarInLoop, |
457 | AlwaysRepl |
458 | }; |
459 | |
460 | /// If the final value of any expressions that are recurrent in the loop can |
461 | /// be computed, substitute the exit values from the loop into any instructions |
462 | /// outside of the loop that use the final values of the current expressions. |
463 | /// Return the number of loop exit values that have been replaced, and the |
464 | /// corresponding phi node will be added to DeadInsts. |
465 | int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, |
466 | ScalarEvolution *SE, const TargetTransformInfo *TTI, |
467 | SCEVExpander &Rewriter, DominatorTree *DT, |
468 | ReplaceExitVal ReplaceExitValue, |
469 | SmallVector<WeakTrackingVH, 16> &DeadInsts); |
470 | |
471 | /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for |
472 | /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p |
473 | /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that |
474 | /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect |
475 | /// the remaining TC%UF iterations. |
476 | /// |
477 | /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p |
478 | /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. |
479 | /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are |
480 | /// equal. \p UF must be greater than zero. |
481 | /// If \p OrigLoop has no profile info associated nothing happens. |
482 | /// |
483 | /// This utility may be useful for such optimizations as unroller and |
484 | /// vectorizer as it's typical transformation for them. |
485 | void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, |
486 | Loop *RemainderLoop, uint64_t UF); |
487 | |
488 | /// Utility that implements appending of loops onto a worklist given a range. |
489 | /// We want to process loops in postorder, but the worklist is a LIFO data |
490 | /// structure, so we append to it in *reverse* postorder. |
491 | /// For trees, a preorder traversal is a viable reverse postorder, so we |
492 | /// actually append using a preorder walk algorithm. |
493 | template <typename RangeT> |
494 | void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &); |
495 | /// Utility that implements appending of loops onto a worklist given a range. |
496 | /// It has the same behavior as appendLoopsToWorklist, but assumes the range of |
497 | /// loops has already been reversed, so it processes loops in the given order. |
498 | template <typename RangeT> |
499 | void appendReversedLoopsToWorklist(RangeT &&, |
500 | SmallPriorityWorklist<Loop *, 4> &); |
501 | |
502 | /// Utility that implements appending of loops onto a worklist given LoopInfo. |
503 | /// Calls the templated utility taking a Range of loops, handing it the Loops |
504 | /// in LoopInfo, iterated in reverse. This is because the loops are stored in |
505 | /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, |
506 | /// loop deletion, and LICM, we largely want to work forward across the CFG so |
507 | /// that we visit defs before uses and can propagate simplifications from one |
508 | /// loop nest into the next. Calls appendReversedLoopsToWorklist with the |
509 | /// already reversed loops in LI. |
510 | /// FIXME: Consider changing the order in LoopInfo. |
511 | void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &); |
512 | |
513 | /// Recursively clone the specified loop and all of its children, |
514 | /// mapping the blocks with the specified map. |
515 | Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, |
516 | LoopInfo *LI, LPPassManager *LPM); |
517 | |
518 | /// Add code that checks at runtime if the accessed arrays in \p PointerChecks |
519 | /// overlap. Returns the final comparator value or NULL if no check is needed. |
520 | Value * |
521 | addRuntimeChecks(Instruction *Loc, Loop *TheLoop, |
522 | const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, |
523 | SCEVExpander &Expander, bool HoistRuntimeChecks = false); |
524 | |
525 | Value *addDiffRuntimeChecks( |
526 | Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander, |
527 | function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC); |
528 | |
529 | /// Struct to hold information about a partially invariant condition. |
530 | struct IVConditionInfo { |
531 | /// Instructions that need to be duplicated and checked for the unswitching |
532 | /// condition. |
533 | SmallVector<Instruction *> InstToDuplicate; |
534 | |
535 | /// Constant to indicate for which value the condition is invariant. |
536 | Constant *KnownValue = nullptr; |
537 | |
538 | /// True if the partially invariant path is no-op (=does not have any |
539 | /// side-effects and no loop value is used outside the loop). |
540 | bool PathIsNoop = true; |
541 | |
542 | /// If the partially invariant path reaches a single exit block, ExitForPath |
543 | /// is set to that block. Otherwise it is nullptr. |
544 | BasicBlock *ExitForPath = nullptr; |
545 | }; |
546 | |
547 | /// Check if the loop header has a conditional branch that is not |
548 | /// loop-invariant, because it involves load instructions. If all paths from |
549 | /// either the true or false successor to the header or loop exists do not |
550 | /// modify the memory feeding the condition, perform 'partial unswitching'. That |
551 | /// is, duplicate the instructions feeding the condition in the pre-header. Then |
552 | /// unswitch on the duplicated condition. The condition is now known in the |
553 | /// unswitched version for the 'invariant' path through the original loop. |
554 | /// |
555 | /// If the branch condition of the header is partially invariant, return a pair |
556 | /// containing the instructions to duplicate and a boolean Constant to update |
557 | /// the condition in the loops created for the true or false successors. |
558 | std::optional<IVConditionInfo> hasPartialIVCondition(const Loop &L, |
559 | unsigned MSSAThreshold, |
560 | const MemorySSA &MSSA, |
561 | AAResults &AA); |
562 | |
563 | } // end namespace llvm |
564 | |
565 | #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |
566 | |